
    h                         d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ dedefd	Zd
edefdZdedefdZdedefdZd Zy)    N)urlparse)BeautifulSoup)async_playwright)stealth_async)human_delayproduct_urlreturnc                     	 t        j                  d|       }d}|r|j                  d      }d| S # t        $ r}t	        d|       d}~ww xY w)zE
    Convert a product detail URL to the description iframe URL.
    z_([0-9]+)\.html    zMhttps://www.alibaba.com/product-detail/description/descIframe.html?productId=zInvalid product URL format: N)researchgroup	Exception
ValueError)r   match
product_ides       7/var/www/Befach/backend/bot/utils/scrape_description.pyconvert_to_description_urlr      s_    =		,k:
QJ^_i^jkk =7s;<<=s   /2 	AA		Aurlc                   K   t               4 d{   }|j                  j                  ddg dd       d{   }|j                  ddd	d
d       d{   }|j	                          d{   }t        |       d{    t        d|         |j                  | d       d{    t        dd       t        d       |j                  dd       d{    |j                          d{   }|j                          d{    |cddd      d{    S 7 7 7 7 7 7 7 T7 >7 (7 # 1 d{  7  sw Y   yxY ww)zS
    Use Playwright and stealth to get HTML content after waiting for <tbody>.
    NTchrome)z--no-sandboxz---disable-blink-features=AutomationControlledz--disable-extensions2   )headlesschannelargsslow_moz[Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36i   i  )widthheightzen-US)
user_agentviewportlocaleu   ⏳ Loading domcontentloaded)
wait_untilr      u"   ⌛ Waiting for <tbody> to load...tablei0u  )timeout)r   chromiumlaunchnew_contextnew_pager   printgotor   wait_for_selectorcontentclose)r   pbrowsercontextpagehtmls         r   fetch_html_with_tbody_waitr7      sT      !  Q

))
  * 	
 	
  ++t#s3 , 
 
 %%''D!!!SE"#ii(:i;;;Aq23$$We$<<<\\^#mmo=  	

 (! 	< 	=#;   s   ED'E%D<D*D<D,D<1D.2D<D0(D<-D2.1D<D4 D<7D68D<D8D<E!D:"E*D<,D<.D<0D<2D<4D<6D<8D<:E<EEE
Er6   c                 .   t        | d      }|j                  d      }|st        d      i }|j                  d      D ]T  }|j                  d      }t	        |      dk\  s#|d   j                  d	      }|d
   j                  d	      }|sP|||<   V |S )zO
    Extract table data (class 'all magic-1') into a key-value dictionary.
    zhtml.parserr'   z)Table with class 'all magic-1' not found.trtdr&   r   T)stripr   )r   findr   find_alllenget_text)r6   soupr'   datarowcellskeyvalues           r   parse_table_to_dictrF   @   s     }-DIIgEDEED~~d#T"u:?(##$#/C!H%%D%1E!S	 $ K    c                 d   K   t        |       }t        |       d{   }t        |      }|S 7 w)zp
    Convert product detail URL to description URL, load it,
    and extract table data as key-value pairs.
    N)r   r7   rF   )r   description_urlr6   
table_datas       r    extract_product_description_datarK   T   s4     
 1=O+O<<D$T*J =s   0.0c                 N     i  fd}t        j                   |              S )Nc                     K   	 t               d {   } t        d       | j                         D ]  \  }}t        | d|        ||<    y 7 =# t        $ r}t        d|        Y d }~y d }~ww xY ww)Nu   ✅ Extracted Table Data:z: u   ❌ Error: )rK   r-   itemsr   )rA   kvr   discriptionr   s       r   mainz!extract_description.<locals>.maina   sy     	%9#>>D-.

12aSk" A % ?
  	%Ks#$$	%s>   A7A A<A A7A 	A4A/*A7/A44A7)asynciorun)r   rR   rQ   s   ` @r   extract_descriptionrU   ^   s#    K% KKrG   )rS   r   urllib.parser   bs4r   playwright.async_apir   playwright_stealth.stealthr   bot.utils.scraper_utilsr   strr   r7   dictrF   rK   rU    rG   r   <module>r^      sl     	 !  1 4 /=C =C ="# "# "Jc d (  rG   