
    [AhER                         d dl Z d dlZd dlmZ d dlZd dlZd dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ ej                  Z G d d      Zd	 Zy)
    N)async_playwright)datetime)quote)timezone)settingsc                   <    e Zd Zd Zd Zd ZdedefdZd
dZ	d	 Z
y)MadeInChinaScraperc                 J    g | _         d| _        d| _        d| _        d| _        y )Niȯ           )productstimeoutretry_count	min_delay	max_delay)selfs    2/var/www/Befach/backend/bot/china/made_in_china.py__init__zMadeInChinaScraper.__init__   s&        c           	        K   	 t        d|        	 |j                  || j                  d       d {    |j                  dd       d {    i dg dddi i d}	 g }|j                  d       d {   }|rZ|j                  d       d {   }|D ]<  }	 |j                          d {   j                         }	|	r|j                  |	       > |rK|r5|d   j                         j                         dk(  r|j                  d       dj                  |      |d<   	 i |d<   |j                  d       d {   }
|
rt        d       |
D ]  }|j                  d       d {   }|j                  d       d {   }|r&|j                          d {   j                         nd }|r&|j                          d {   j                         nd }|s|st        j                   dd|      j                         }||d   |<   t        d| d|         n|j                  d       d {   }|rt        dt#        |       d       |D ]  }|j                  d       d {   }|j                  d        d {   }|r&|j                          d {   j                         nd }|r&|j                          d {   j                         nd }|s|st        j                   dd|      j                         }||d   |<   t        d!| d|         	 |j                  d#       d {   }|r:|j                          d {   j                         }||d$<   t        d%|        nd&|d$<   t        d'       	 |j                  d)       d {   }|r,|j                          d {   j                         }||d*<   nd |d*<   	 |j                  d,       d {   }|D ]  }	 |j                  d-       d {   }|j                  d.       d {   }|rg|re|j                          d {   j                         j%                  d/d      }|j                          d {   j                         }||d0   |<   n@|r>|j                          d {   j                         j%                  d/d      }d |d0   |<    	 t        |       	 |j                  d3       d {   }|r)|j                          d {   j                         |d4<   	 g |d6<   |j                  d7       d {   }|D ]  }|j                  d8       d {   }|r|j'                  d9       d {   }n|j'                  d:       d {   }|sT|j)                  d;      rd<|z   }n%|j)                  d=      sd>|j+                  d?      z   }|j                         j-                  d@      r|d6   j                  |        t        dAt#        |d6          dB       |S 7 7 # t        $ r(}t        d| dt        |      d d	  d
       Y d }~y d }~ww xY w7 7 7 #  Y xY w# t        $ r}t        d|        Y d }~pd }~ww xY w7 ^7 67  7 	7 7 t7 ?7 )7 7 # t        $ r}t        d"|        Y d }~d }~ww xY w7 7 # t        $ r}t        d(|        Y d }~ud }~ww xY w7 h7 Q# t        $ r}t        d+|        Y d }~Sd }~ww xY w7 F7 *7 7 7 7 # t        $ r}t        d1|        Y d }~nd }~ww xY w# t        $ r}t        d2|        Y d }~d }~ww xY w7 7 #  t        d5       Y xY w7 h7 M7 57 # t        $ r}t        dC|        Y d }~|S d }~ww xY w# t        $ r%}t        dDt        |      d d	  d
       Y d }~y d }~ww xY ww)ENz%[INFO] Loading product details page: domcontentloadedr   
wait_untilz".detail-tab-item, .basic-info-list:  r   z[WARN] Page load failed for : d   ... )
basic_infodescriptionimages
categoriesprice_rangesRatingFeaturesSample Infoz.sr-QPWords-itemar   homez > r$   z1[WARN] Couldn't scrape category from Breadcrumb: r%   z.only-one-priceNum-trz[INFO] Found single price rangez.only-one-priceNum-td-leftz.sa-only-property-price\(.*?\)z[INFO] Single range: z -> z.swiper-slide-div[INFO] Found z multiple price rangesz.swiper-money-containerz.swiper-unit-containerz[INFO] Multiple range: z%[WARN] Couldn't scrape price ranges: z.review-scorer&   z[INFO] Rating found: N/Az[INFO] No rating element foundz[WARN] Couldn't scrape rating: z.J-sample-order .sample-pricer(   z-[WARN] Couldn't scrape sample product price: z	.bsc-itemz.bac-item-labelz.bac-item-value:r!   z[WARN] Failed parsing item: z#[WARN] Couldn't scrape basic info: z.detail-desc .rich-textr"   z"[WARN] Couldn't scrape descriptionr#   z.sr-proMainInfo-slide-picItemzimg.J-picImg-zoom-insrcfsrc//https:zhttp:r2   zhttps:///z	.mp4.webpz[INFO] Collected z imagesz[WARN] Couldn't scrape images: z([ERROR] Error scraping product details: )printgotor   wait_for_selector	Exceptionstrquery_selectorquery_selector_alltext_contentstripappendlowerpopjoin
inner_textresublenreplaceget_attribute
startswithlstripendswith)r   pageurledetailscatcategory_breadcrumcategory_a_tagsitemcategory_textone_price_rowsrowprice_elmoq_elpricemoqmultiple_price_rows	rating_elrating_textsample_order
price_textbasic_info_itemslabel_elementvalue_elementlabelvaluedesc_elementslide_itemsslideimg_tagr/   s                                  r   scrape_product_detailsz)MadeInChinaScraper.scrape_product_details   s    t	9#?@iiT\\FXiYYY,,-Q[`,aaa !!! 	GO+/+>+>?Q+R%R"%,>,Q,QRU,V&VO /%373D3D3F-F,M,M,OM, #

= 9	 !0 s1v||~335?
,1JJsOGL)'C*,' (,'>'>?V'W!W!;<-),););<X)Y#Y'*'9'9:S'T!TIQx':':'<!< C C EW[EKV%6%6%88??AQU S"$&&R"="C"C"EC;@GN3C8!$9#d5'"JK  . 150G0GH[0\*\'*c2E.F-GG]^_#6C-0-?-?@Y-Z'ZH+.+=+=>V+W%WFMU8+>+>+@%@$G$G$I[_EIO):):)<#<"C"C"EUYC$&(ffZS&A&G&G&I?D 7 < %(?uD&P Q $7 
="&"5"5o"FF	)2)=)=)?#?"F"F"HK(3GH%1+?@(-GH%:;
K%)%8%89X%YY(4(?(?(A"A!H!H!JJ-7GM*-1GM*A)-)@)@)M#M ,D!.2.A.ABS.T(T.2.A.ABS.T(T(]+8+C+C+E%E$L$L$N$V$VWZ\^$_E+8+C+C+E%E$L$L$NE;@GL1%8  -/</G/G/I)I(P(P(R(Z(Z[^`b(c?C 5e < -( 'N<%)%8%89R%SS4@4K4K4M.M-T-T-VGM*
=$&! %)$;$;<[$\\(E$)$8$89O$PPG$+$9$9%$@@ %*$7$7$??>>$/"*S.C!$0C!D",szz#">C  #yy{33K@#H-44S9% )( )#gh.?*@)AIJ N[ Za 4SECF4CL>MN( &S 'W .G%$  OI!MNNO "X $Z!T!<8 +] ([%W%@#<  C=aSABBC G#?  =7s;<<=
  Z"A  KEaSIJJK $N )U(T &F%E
 *J$ ! <QC@A !  A;A3?@@A  T.M<:; ] Q@ @  =7s;<<N=
  	<SVDS\N#NO	s  `_ !X2 X,X2 X/X2 _  Y7 6Y&7Y7 Y)	Y7 Y//Y,0%Y/AY7 %Z: >Z?*Z: )Z*Z: Z"Z: Z%)Z: Z(Z: Z: !AZ: 7Z+87Z: /Z.0Z: Z1	Z: "Z4#)Z: Z7Z: $Z: 'A Z: ([% <[=[% ["6[% \ "\
#\ <\=\ ], /\50	], :]\8]'\;(]\>5]9]:0]*]+*]], _ $^ 8^9^ ^^ )^6 ^*^6  ^-!^6 ;^0<^6 ^3^6 A^6 91^6 *_ +`,X2 /X2 2	Y#;Y_ `Y##_ &Y7 )Y7 ,Y//Y41Y7 7	Z Z_ Z_ Z: Z: "Z: %Z: (Z: +Z: .Z: 1Z: 4Z: 7Z: :	[[_ [_ [% "[% %	\.\<_ \_ 
\ \ 	\2\-'_ -\22_ 5], 8];]>]]]	])]$], $]))], ,	^5^	_ 	^_ ^ ^ ^'$_ *^6 -^6 0^6 3^6 6	_?__ `__ 	`	$`?``		`c           
        K   t        | j                        D ]  }	 t        d|dz    d|        |j                  || j                  d       d {    |j                  dd       d {    |j                  d	       d {   }|rt        d
        y y y 7 K7 27 # t        $ r}t        d|dz    dt        |      d d  d       || j                  dz
  k  rEt        j                  dd      }t        d|dd       t        j                  |       d {  7   nt        d       Y d }~ yY d }~#d }~ww xY ww)Nz[INFO] Loading page (attempt    z): r   r   z!.prod-content, .no-result-contentr   r   z.no-result-contentz$[INFO] No results found on this pageFTz[WARN] Attempt z	 failed: r   r   r      z[INFO] Retrying in z.1fz seconds...z)[ERROR] Max retries reached for this page)ranger   r5   r6   r   r7   r:   r8   r9   randomuniformasynciosleep)r   rK   rL   attempt
no_resultsrM   delays          r   scrape_pagezMadeInChinaScraper.scrape_page   sG    T--.G!5gk]#cUKLiiT\\FXiYYY ,,7! -    $(#6#67K#LL
@A # / Z M  !!}Ic!fTcl^3OPT--11"NN1a0E/c{+FG!--...EF  /!sq   E5B#BB#,B-B#B!B#EB#B#!B##	E ,A1D;D D;/E;E  Efiltersreturnc                    g }ddddddddd	d
dd}ddddd}|j                  d      r|j                  d       |j                  d      r|j                  d       |j                  d      r|j                  d|d           |j                  d      r|d   }|j                  d| d       |j                  d      r|j                  d       |j                  d      r*|j                  |d         }|r|j                  d|        |j                  d      r*|j                  |d         }|r|j                  d|        |rddj                  |      z   S y )!Nrj   r   r      r         rk   	   
      )zManufacturer/ factoryzTrading CompanyzService ProviderzGroup CorporationRetailerzTrade AgentzBuying OfficeOther
WholesalerzGovernment InstitutionzIndividuals/SOHOc   )OEMODMz	Own BrandOtherssecured_tradingDP_1
buy_sampleODS_1	min_orderMin_search_withinPV_9999_
_999999999audited_supplierszSGS_AS--CL_DGMbusiness_typeBT_rd_typeRD_zF1--z--F1)getr>   rA   )r   ru   filter_partsbusiness_type_maprd_type_maptermbt_valuerd_values           r   build_filter_stringz&MadeInChinaScraper.build_filter_string   sz    &'  !!"&( "
  	
 ;;()';;|$(;;{#$w{';&< =>;;'?+D(4&
 ;<;;*+ 01;;'(,,W_-EFH##c($45;;y!"wy'9:H##c($45 DIIl333r   Nc                 ~  K   t               4 d {   }|j                  j                  dddgdt               d {   }|j	                  dddd	d
       d {   }|j                          d {   }	t        |j                  dd            }
| j                  |xs i       }d}|}d}	 d|
 d| d| d}| j                  |	|       d {   }|st        d|        n|	j                  d       d {   }t        dt        |       d|        |D ]  }||k  r|dz  }|rc||k\  r^t        d| d       |j                          d {    |j                          d {    | j                  c cd d d       d {    S 	 | j                  |	||       d {   }|r|dz  } |dz  }t%        j&                  | j(                  | j*                        }t-        j.                  |       d {    S|j                          d {    |j                          d {    | j                  cd d d       d {    S 7 @7 7 7 7 7 c7 7 7 7 # t         $ r(}t        d| dt#        |      d d         Y d }~zd }~ww xY w7 7 7 t7 Z# 1 d {  7  sw Y   y xY ww)NTz---disable-blink-features=AutomationControlledz--start-maximizedr   )headlessargsslow_moexecutable_pathzoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36iV  i   )widthheightzen-US)
user_agentviewportlocale +r   z+https://www.made-in-china.com/multi-search/r4   z.htmlz$[INFO] Stopping, no results on page z.prod-contentr,   z products on page rj   z"[INFO] Collected enough products (z), stopping...z)[ERROR] Failed to scrape product on page r   )r   chromiumlaunchEXECUTABLE_PATHnew_contextnew_pager   rF   r   rt   r5   r;   rE   closer   scrape_single_productr8   r9   rm   rn   r   r   ro   rp   )r   search_term
start_pagenum_productsskip_in_pageru   pbrowsercontextrK   encoded_termfilter_string	collectedcurrent_page
seen_countrL   successproduct_nodesproductrM   rs   s                        r   scrape_made_in_chinaz'MadeInChinaScraper.scrape_made_in_china&  s    #% E	! E	!JJ--C'  / .  G $//9 $(37 0  G !))++D !4!4S#!>?L 44W]CMI%LJCL>QRS`Raabcobppuv $ 0 0s ;;@OP '+&=&=o&N Nc-&8%99KL>Z[  -G!L0"a
 #	\(A B9+^\]%mmo--%mmo--#}},kE	! E	! E	!nj(,(B(B4R^(_"_"%NI  -& !t~~t~~FmmE***K N --/!!--/!!==KE	! E	! E	! , < !O .-iE	!p #` % j I,WYZ]^_Z`aebeZfYghiij +!!IE	! E	! E	! E	!sG  J=IJ=)J(IJ(IJ(5I6AJ(I*J(=I >AJ(I#J(-I&.J(?J=I(J=I,(I*)I,4A	J(=J >J(J"J(/J$0J(?J=J&J=J(J(J(J( J(#J(&J((J=*I,,	J5JJ(JJ("J($J(&J=(J:.J1/J:6J=c                 	  K   |j                  d       d {   }dddddi t        j                         j                  d      |dddddd}|j	                  d       d {   }|r|j                          d {   j                         |d<   |j                  d       d {   xs d|d	<   |d	   j                  d
      r	|d	   |d	<   n?|d	   j                  d      rd|d	   z   |d	<   n|d	   j                  d      sd|d	   z   |d	<   |d	   dk(  sd|d	   vs|d	   j                  d      rt        d|        y	 |j	                  d       d {   }|r)|j                          d {   j                         |d<   	 |j	                  d       d {   }|r^|j                          d {   j                         }	t        j                  dd|	      j                  dd      j                         |d<   	 |j	                  d       d {   }
|
s|j	                  d       d {   }
|
r&|
j                          d {   j                         nd|d<   	 |j                  d       d {   }|D ]  }	 |j                          d {   j                         }	|	j                  d       d!   j                         }|j	                  d"       d {   }|r&|j                          d {   j                         nd}||d#   |<    	 |d	   dk7  r|j                   }|j#                          d {   }| j%                  ||d	          d {   }t        j&                  d%|d	         }|r|j)                  d&      |d'<   |rx|j+                  |j-                  d(i       |j-                  d)d      |j-                  d*g       |j-                  d+d      |j-                  d,i       |j-                  d-d      d.       |j/                          d {    d/|d0<   d|d1<   | j0                  j3                  |       y27 07 7 7 7 7 #  t        d       Y xY w7 7 #  t        d       Y xY w7 r7 Z7 C#  t        d       Y >xY w7 ,7 7 7 #  Y 6xY w#  t        d$       Y xY w7 7 p7 w)3Nzelement => element.outerHTMLr-   z%Y-%m-%d %H:%M:%Sr    )namelinkrX   rY   company
properties
scraped_atpage_numtypeskuvariantr(   r&   zOh2.product-name a:not(.activity-flag-img), .prod-name a:not(.activity-flag-img)r   hrefr   httpsr1   r2   r3   zhttps://www.made-in-china.comzmade-in-china.comz.en.made-in-china.comz*[INFO] Skipping non-product entry on page Fz.price-info .price, .price-valrX   z[WARN] Couldn't get pricez&.product-property .info:nth-of-type(2)r+   MOQrY   z[WARN] Couldn't get MOQz..company-name-wrapper .company-name-txt a spanz.company-name-wrapper spanr   z[WARN] Couldn't get companyz.property-list lir.   r   z.property-valr   z[WARN] Couldn't get propertiesz/product/([^/]+)/rj   r   r!   r"   r#   r$   r%   r&   )r!   r"   r#   r$   price rangesr&   inquiryr   r   T)evaluater   nowstrftimer:   rB   r=   rG   rH   rJ   r5   rC   rD   rF   r;   splitr   r   rh   searchgroupupdater   r   r   r>   )r   rK   r   r   product_htmlproduct_dataname_elementprice_elementmoq_element	full_textcompany_elementprop_elementsprop
label_textra   
value_textr   details_pageproduct_detailsmatchs                       r   r   z(MadeInChinaScraper.scrape_single_productn  s    $--.LMM ",,.112EF 
  %33  5F  G  G*6*A*A*C$C#J#J#LL )5)C)CF)K#K#TuL F#..w7%1&%9V$f%006'/,v2F'FV$!&)445HI'FV\I]']V$  E)",v*>>F#,,-DE>xjIJ	/")"8"89Y"ZZM/</G/G/I)I(P(P(RW%
	- ' 6 67_ ``K#.#9#9#;;BBD	&(ffZY&G&O&OPUWY&Z&`&`&bU#
	1$+$:$:;k$llO"(/(>(>?[(\"\Ve_-G-G-I'I&P&P&RkpL#
	4")"<"<=P"QQM%'+'8!8 ? ? AI!*!5a!8!>!>!@J +/*=*=o*N$NMO\(@(@(B"B!I!I!KbdJ=GL.z: &" 5(llG!(!1!1!33L$($?$?l[aNb$ccOII2L4HIE&+kk!nU###"1"5"5lB"G#2#6#6}b#I-11(B?!0!4!4\"!E#2#6#6~b#I,00%@%  $$&&&&V %Y\*U N$ G$C#K" [)I	/-. a;	-+, m"\'I	1/0 R "9 %O"B	423
 4c 's  SQA	S!Q
"S;Q<)S%Q&BS Q QQ .Q/Q Q2 Q,Q2 3Q/4A
Q2 ?R RR .R/R R	R !R5 5R!6	R5  R-R$AR-R'R-6R*7R-R5 )S>S?SSC SS+S
SSSQ Q Q)&S,Q2 /Q2 2R?SR R R RS!R5 $R-'R-*R--R2/R5 5SSSS)rj   Nr   N)__name__
__module____qualname__r   rh   rt   dictr9   r   r   r    r   r   r	   r	      s4    un!<74 7C 7rF!Pkr   r	   c           
        K   | st        d       y g d}t        d | D              r|j                  d       t        d | D              r|j                  d       t        d | D              r|j                  d       t        d	 | D              r|j                  d
       t        d | D              r|j                  d       t        d | D              r|j                  d       t        |ddd      5 }t	        j
                  ||      }|j                          | D ]|  }|D ci c]  }||j                  |d       }}|j                         D ]6  \  }}t        |t        t        f      st        j                  |d      ||<   8 |j                  |       ~ 	 d d d        t        dt        |        d|        y c c}w # 1 sw Y   )xY ww)Nz[WARN] No products to save)r   r   rX   rY   r   r   r   c              3   $   K   | ]  }d |v  
 yw)r   Nr   .0r   s     r   	<genexpr>zsave_to_csv.<locals>.<genexpr>       
/<1
/   r   c              3   $   K   | ]  }d |v  
 yw)r!   Nr   r   s     r   r   zsave_to_csv.<locals>.<genexpr>  r   r   r!   c              3   $   K   | ]  }d |v  
 yw)r"   Nr   r   s     r   r   zsave_to_csv.<locals>.<genexpr>  s     
0!=A
0r   r"   c              3   $   K   | ]  }d |v  
 yw)r#   Nr   r   s     r   r   zsave_to_csv.<locals>.<genexpr>  s     
+Q8q=
+r   r#   c              3   $   K   | ]  }d |v  
 yw)r$   Nr   r   s     r   r   zsave_to_csv.<locals>.<genexpr>  r   r   r$   c              3   $   K   | ]  }d |v  
 yw)r   Nr   r   s     r   r   zsave_to_csv.<locals>.<genexpr>  s     
11>Q
1r   r   wr    zutf-8)newlineencoding)
fieldnamesF)ensure_asciiz[INFO] Saved z products to )r5   anyr>   opencsv
DictWriterwriteheaderr   items
isinstancer   listjsondumpswriterowrE   )	r   filenamer   csvfilewriterr   krU   vs	            r   save_to_csvr	    s    *+ WJ

/h
//,'

/h
//,'

0x
00-(

+(
++(#

/h
//,'

1
11.)	hR'	: 
!gJ?G2<=Q1gkk!R((=C=		1a$.!ZZ>CF $ OOC   	
! 
M#h-hZ
@A >
! 
!s6   C4G61G'G.G0/G #GGGG)ro   rC   playwright.async_apir   r   r  r   rm   urllib.parser   django.utilsr   django.confr   r   r	   r	  r   r   r   <module>r     sA     	 1 
     !  ((J J\"Br   