o
    [A¸hER  ã                   @   s„   d dl Z d dlZd dlmZ d dlZd dlZd dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ ejZG dd„ dƒZd	d
„ ZdS )é    N)Úasync_playwright)Údatetime)Úquote)Útimezone)Úsettingsc                   @   sH   e Zd Zdd„ Zdd„ Zdd„ Zdedefd	d
„Zddd„Z	dd„ Z
dS )ÚMadeInChinaScraperc                 C   s"   g | _ d| _d| _d| _d| _d S )NiÈ¯  é   é   é   )ÚproductsÚtimeoutÚretry_countÚ	min_delayÚ	max_delay)Úself© r   ú2/var/www/Befach/backend/bot/china/made_in_china.pyÚ__init__   s
   
zMadeInChinaScraper.__init__c                 Ã   sô  zUt d|› ƒ z|j|| jddI d H  |jdddI d H  W n% tyG } zt d|› dt|ƒd d	… › d
ƒ W Y d }~W d S d }~ww i dg dddi i dœ}zLg }| d¡I d H }|r„| d¡I d H }|D ]}z| ¡ I d H  	¡ }	|	r}| 
|	¡ W qj   Y qj|rž|r—|d  ¡  	¡ dkr—| d¡ d |¡|d< W n ty¸ } zt d|› ƒ W Y d }~nd }~ww zÊi |d< | d¡I d H }
|
rt dƒ |
D ]K}| d¡I d H }| d¡I d H }|rì| ¡ I d H  	¡ nd }|rù| ¡ I d H  	¡ nd }|r|rt dd|¡ 	¡ }||d |< t d|› d|› ƒ qÏnf| d¡I d H }|r‚t dt|ƒ› dƒ |D ]N}| d¡I d H }| d ¡I d H }|rQ| ¡ I d H  	¡ nd }|r_| ¡ I d H  	¡ nd }|r€|r€t dd|¡ 	¡ }||d |< t d!|› d|› ƒ q3W n ty } zt d"|› ƒ W Y d }~nd }~ww z*| d#¡I d H }|r¿| ¡ I d H  	¡ }||d$< t d%|› ƒ nd&|d$< t d'ƒ W n tyâ } zt d(|› ƒ W Y d }~nd }~ww z| d)¡I d H }|rý| ¡ I d H  	¡ }||d*< nd |d*< W n ty } zt d+|› ƒ W Y d }~nd }~ww zv| d,¡I d H }|D ]i}zL| d-¡I d H }| d.¡I d H }|r^|r^| ¡ I d H  	¡  d/d¡}| ¡ I d H  	¡ }||d0 |< n|rt| ¡ I d H  	¡  d/d¡}d |d0 |< W q( ty‘ } zt d1|› ƒ W Y d }~q(d }~ww W n ty­ } zt d2|› ƒ W Y d }~nd }~ww t |ƒ z| d3¡I d H }|rÉ| ¡ I d H  	¡ |d4< W n   t d5ƒ Y zhg |d6< | d7¡I d H }|D ]I}| d8¡I d H }|rø| d9¡I d H }n| d:¡I d H }|r*| d;¡rd<| }n| d=¡sd>| d?¡ }| ¡  d@¡s*|d6  
|¡ qât dAt|d6 ƒ› dBƒ W |W S  tyW } zt dC|› ƒ W Y d }~|W S d }~ww  tyy } zt dDt|ƒd d	… › d
ƒ W Y d }~d S d }~ww )ENz%[INFO] Loading product details page: Údomcontentloaded©r   Ú
wait_untilz".detail-tab-item, .basic-info-listé˜:  ©r   z[WARN] Page load failed for ú: éd   ú...Ú )Ú
basic_infoÚdescriptionÚimagesÚ
categoriesÚprice_rangesÚRatingÚFeaturesúSample Infoz.sr-QPWords-itemÚar   Úhomez > r    z1[WARN] Couldn't scrape category from Breadcrumb: r!   z.only-one-priceNum-trz[INFO] Found single price rangez.only-one-priceNum-td-leftz.sa-only-property-priceú\(.*?\)z[INFO] Single range: z -> z.swiper-slide-divú[INFO] Found z multiple price rangesz.swiper-money-containerz.swiper-unit-containerz[INFO] Multiple range: z%[WARN] Couldn't scrape price ranges: z.review-scorer"   z[INFO] Rating found: úN/Az[INFO] No rating element foundz[WARN] Couldn't scrape rating: z.J-sample-order .sample-pricer$   z-[WARN] Couldn't scrape sample product price: z	.bsc-itemz.bac-item-labelz.bac-item-valueú:r   z[WARN] Failed parsing item: z#[WARN] Couldn't scrape basic info: z.detail-desc .rich-textr   z"[WARN] Couldn't scrape descriptionr   z.sr-proMainInfo-slide-picItemzimg.J-picImg-zoom-inÚsrcÚfsrcú//úhttps:©zhttp:r.   zhttps://ú/z	.mp4.webpz[INFO] Collected z imagesz[WARN] Couldn't scrape images: z([ERROR] Error scraping product details: )ÚprintÚgotor   Úwait_for_selectorÚ	ExceptionÚstrÚquery_selectorÚquery_selector_allÚtext_contentÚstripÚappendÚlowerÚpopÚjoinÚ
inner_textÚreÚsubÚlenÚreplaceÚget_attributeÚ
startswithÚlstripÚendswith)r   ÚpageÚurlÚeÚdetailsÚcatÚcategory_breadcrumÚcategory_a_tagsÚitemÚcategory_textÚone_price_rowsÚrowÚprice_elÚmoq_elÚpriceÚmoqÚmultiple_price_rowsÚ	rating_elÚrating_textÚsample_orderÚ
price_textÚbasic_info_itemsÚlabel_elementÚvalue_elementÚlabelÚvalueÚdesc_elementÚslide_itemsÚslideÚimg_tagr+   r   r   r   Úscrape_product_details   s>  €"€þø
€
€€ÿ€õ€€ÿ€€ÿ
€€ÿ€€þò€ÿ€

€ý€ý€þz)MadeInChinaScraper.scrape_product_detailsc                 Ã   s&  t | jƒD ]Š}z8td|d › d|› ƒ |j|| jddI d H  |jdddI d H  | d	¡I d H }|r=td
ƒ W  dS W  dS  ty } zDtd|d › dt|ƒd d… › dƒ || jd k rzt	 
dd¡}td|d›dƒ t |¡I d H  ntdƒ W Y d }~ dS W Y d }~qd }~ww d S )Nz[INFO] Loading page (attempt é   z): r   r   z!.prod-content, .no-result-contentr   r   z.no-result-contentz$[INFO] No results found on this pageFTz[WARN] Attempt z	 failed: r   r   r   é   z[INFO] Retrying in z.1fz seconds...z)[ERROR] Max retries reached for this page)Úranger   r1   r2   r   r3   r6   r4   r5   ÚrandomÚuniformÚasyncioÚsleep)r   rG   rH   ÚattemptÚ
no_resultsrI   Údelayr   r   r   Úscrape_pageÏ   s4   €þ&ý€ûízMadeInChinaScraper.scrape_pageÚfiltersÚreturnc                 C   s  g }ddddddddd	d
ddœ}dddddœ}|  d¡r!| d¡ |  d¡r+| d¡ |  d¡r:| d|d › ¡ |  d¡rL|d }| d|› d¡ |  d¡rV| d¡ |  d¡rl|  |d ¡}|rl| d|› ¡ |  d¡r‚|  |d ¡}|r‚| d|› ¡ |r‹dd |¡ S d S )!Nre   r	   r   é   r
   é   é   rf   é	   é
   é   )zManufacturer/ factoryzTrading CompanyzService ProviderzGroup CorporationÚRetailerzTrade AgentzBuying OfficeÚOtherÚ
WholesalerzGovernment InstitutionzIndividuals/SOHOéc   )ÚOEMÚODMz	Own BrandÚOthersÚsecured_tradingÚDP_1Ú
buy_sampleÚODS_1Ú	min_orderÚMin_Úsearch_withinÚPV_9999_Ú
_999999999Úaudited_supplierszSGS_AS--CL_DGMÚbusiness_typeÚBT_Úrd_typeÚRD_zF1--z--ÚF1)Úgetr:   r=   )r   rp   Úfilter_partsÚbusiness_type_mapÚrd_type_mapÚtermÚbt_valueÚrd_valuer   r   r   Úbuild_filter_stringí   sP   õü









z&MadeInChinaScraper.build_filter_stringre   Nr   c                 Ã   sD  t ƒ 4 I d H š}|jjdddgdtdI d H }|jdddd	œd
dI d H }| ¡ I d H }	t| dd¡ƒ}
|  |p;i ¡}d}|}d}	 d|
› d|› d|› d}|  	|	|¡I d H }|sct
d|› ƒ n–|	 d¡I d H }t
dt|ƒ› d|› ƒ |D ]j}||k r„|d7 }qy|r¯||kr¯t
d|› dƒ | ¡ I d H  | ¡ I d H  | j  W  d   ƒI d H  S z|  |	||¡I d H }|rÀ|d7 }W qy tyã } zt
d|› dt|ƒd d… › ƒ W Y d }~qyd }~ww |d7 }t | j| j¡}t |¡I d H  qD| ¡ I d H  | ¡ I d H  | jW  d   ƒI d H  S 1 I d H sw   Y  d S )NTz---disable-blink-features=AutomationControlledz--start-maximizedr   )ÚheadlessÚargsÚslow_moÚexecutable_pathzoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36iV  i   )ÚwidthÚheightzen-US)Ú
user_agentÚviewportÚlocaleÚ ú+r   z+https://www.made-in-china.com/multi-search/r0   z.htmlz$[INFO] Stopping, no results on page z.prod-contentr(   z products on page re   z"[INFO] Collected enough products (z), stopping...z)[ERROR] Failed to scrape product on page r   )r   ÚchromiumÚlaunchÚEXECUTABLE_PATHÚnew_contextÚnew_pager   rB   r•   ro   r1   r7   rA   Úcloser   Úscrape_single_productr4   r5   rh   ri   r   r   rj   rk   )r   Úsearch_termÚ
start_pageÚnum_productsÚskip_in_pagerp   ÚpÚbrowserÚcontextrG   Úencoded_termÚfilter_stringÚ	collectedÚcurrent_pageÚ
seen_countrH   ÚsuccessÚproduct_nodesÚproductrI   rn   r   r   r   Úscrape_made_in_china&  sp   €þù
ûË7€,€ÿÛ'2»z'MadeInChinaScraper.scrape_made_in_chinac                 Ã   sx  |  d¡I d H }dddddi t ¡  d¡|ddddddœ}| d¡I d H }|rl| ¡ I d H  ¡ |d< | d¡I d H p<d|d	< |d	  d
¡rM|d	 |d	< n|d	  d¡r]d|d	  |d	< n|d	  d¡sld|d	  |d	< |d	 dksd|d	 vs|d	  	d¡rˆt
d|› ƒ dS z| d¡I d H }|rž| ¡ I d H  ¡ |d< W n   t
dƒ Y z$| d¡I d H }|rË| ¡ I d H  ¡ }	t dd|	¡ dd¡ ¡ |d< W n   t
dƒ Y z#| d¡I d H }
|
sè| d¡I d H }
|
ró|
 ¡ I d H  ¡ nd|d< W n   t
dƒ Y zF| d¡I d H }|D ]9}z1| ¡ I d H  ¡ }	|	 d ¡d!  ¡ }| d"¡I d H }|r5| ¡ I d H  ¡ nd}||d# |< W q   Y qW n   t
d$ƒ Y |d	 dkr¬|j}| ¡ I d H }|  ||d	 ¡I d H }t d%|d	 ¡}|r~| d&¡|d'< |r¥| | d(i ¡| d)d¡| d*g ¡| d+d¡| d,i ¡| d-d¡d.œ¡ | ¡ I d H  d/|d0< d|d1< | j |¡ d2S )3Nzelement => element.outerHTMLr)   z%Y-%m-%d %H:%M:%Sr   )ÚnameÚlinkrT   rU   ÚcompanyÚ
propertiesÚ
scraped_atÚpage_numÚtypeÚskuÚvariantr$   r"   zOh2.product-name a:not(.activity-flag-img), .prod-name a:not(.activity-flag-img)r¸   Úhrefr¹   Úhttpsr-   r.   r/   zhttps://www.made-in-china.comzmade-in-china.comz.en.made-in-china.comz*[INFO] Skipping non-product entry on page Fz.price-info .price, .price-valrT   z[WARN] Couldn't get pricez&.product-property .info:nth-of-type(2)r'   ÚMOQrU   z[WARN] Couldn't get MOQz..company-name-wrapper .company-name-txt a spanz.company-name-wrapper spanrº   z[WARN] Couldn't get companyz.property-list lir*   r   z.property-valr»   z[WARN] Couldn't get propertiesz/product/([^/]+)/re   r¿   r   r   r   r    r!   r"   )r   r   r   r    úprice rangesr"   Úinquiryr¾   rÀ   T)Úevaluater   ÚnowÚstrftimer6   r>   r9   rC   rD   rF   r1   r?   r@   rB   r7   Úsplitr®   r¥   rd   ÚsearchÚgroupÚupdaterŽ   r¦   r   r:   )r   rG   r¶   r½   Úproduct_htmlÚproduct_dataÚname_elementÚprice_elementÚmoq_elementÚ	full_textÚcompany_elementÚprop_elementsÚpropÚ
label_textr]   Ú
value_textr®   Údetails_pageÚproduct_detailsÚmatchr   r   r   r§   n  s²   €óÿ€
€
"
ô






úz(MadeInChinaScraper.scrape_single_product)re   Nr   N)Ú__name__Ú
__module__Ú__qualname__r   rd   ro   Údictr5   r•   r·   r§   r   r   r   r   r      s     8
9Hr   c                 ƒ   s|  | s	t dƒ d S g d¢}tdd„ | D ƒƒr| d¡ tdd„ | D ƒƒr)| d¡ tdd„ | D ƒƒr7| d	¡ td
d„ | D ƒƒrE| d¡ tdd„ | D ƒƒrS| d¡ tdd„ | D ƒƒra| d¡ t|dddd?}tj||d}| ¡  | D ])‰ ‡ fdd„|D ƒ}| ¡ D ]\}}t|t	t
fƒrštj|dd||< q†| |¡ qwW d   ƒ n1 s«w   Y  t dt| ƒ› d|› ƒ d S )Nz[WARN] No products to save)r¸   r¹   rT   rU   rº   r½   r¼   c                 s   ó    | ]}d |v V  qdS )r»   Nr   ©Ú.0r¬   r   r   r   Ú	<genexpr>å  ó   € zsave_to_csv.<locals>.<genexpr>r»   c                 s   rß   )r   Nr   rà   r   r   r   râ   è  rã   r   c                 s   rß   )r   Nr   rà   r   r   r   râ   ê  rã   r   c                 s   rß   )r   Nr   rà   r   r   r   râ   ì  rã   r   c                 s   rß   )r    Nr   rà   r   r   r   râ   î  rã   r    c                 s   rß   )rÄ   Nr   rà   r   r   r   râ   ð  rã   rÄ   Úwr   zutf-8)ÚnewlineÚencoding)Ú
fieldnamesc                    s   i | ]	}|ˆ   |d ¡“qS )r   )rŽ   )rá   Úk©r¶   r   r   Ú
<dictcomp>ø  s    zsave_to_csv.<locals>.<dictcomp>F)Úensure_asciiz[INFO] Saved z products to )r1   Úanyr:   ÚopenÚcsvÚ
DictWriterÚwriteheaderÚitemsÚ
isinstancerÞ   ÚlistÚjsonÚdumpsÚwriterowrA   )r   Úfilenamerç   ÚcsvfileÚwriterrQ   rè   Úvr   ré   r   Úsave_to_csvÝ  s<   €





€úürû   )rj   r?   Úplaywright.async_apir   rî   rô   r   rh   Úurllib.parser   Údjango.utilsr   Údjango.confr   r£   r   rû   r   r   r   r   Ú<module>   s        Q