o
    [Ah#                     @   s   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZ deded	ed
efddZdS )    )BeautifulSoupN)extract_query_name)settings)extract_product_info)scrap_detail_pagesave_images_to_uploadthing	html_filecsv_filenamenum_productsskip_in_pagec           .      C   s  t jtjd}t j|dd t j||}t j|| }t j|s-td| d d S t|ddd}	t	
|	}
W d    n1 sDw   Y  |
}d	}d	}g }g d
}g }||d  }|D ]}z||krkW  n|d}tdd|}d|d }t|}|std| |d7 }|dkrtd W  nW q_|r|dnd}td}||}|r|d}|d}|di d}|d}|d}|di d}|d}|d} |d}!|di d}"|di d }#|}|di d!}$|d"}%|di d#}&z|di di d$}'W n   d}'Y |di d%}(t jd d& })||(|)d' ||)|||d(|||||||| |!|"|#|$|%|&|'g |d7 }W q_ ty }* zd	d l}+|+  td)|*  W Y d }*~*q_d }*~*ww t|d*ddd+},t|,}-|-| |-| W d    n	1 sw   Y  tj d,v rt ||| td-t!| d.|  d S )/Noutput_filesT)exist_oku   [❌] HTML file z not found.rzutf-8)encodingr   )IDSKUURLTitlez	Image URLDescriptionPriceMOQCompanyRatingTypeCategory
AttributesFeatureszSample InfozRange PriceVariantsz
Brand NameUnitMXQtitlez<[^>]+> zhttps:
productUrlzResult not found   d   zBroken after 100 retriesdescriptionz_(\d+)\.htmlpriceproductmoqcompanyNameproductScoreproduct_typecategory
attributesfeaturessample_infoladder_pricevariants
brand_nameunitmaxQuantityimages   )
image_urls
product_idzN/Az [Error] Skipping a card due to: w)newliner   )yestrue1u   ✅ Extracted z products and saved to )"ospathjoinr   
MEDIA_ROOTmakedirsexistsprintopenjsonloadgetresubr   compilesearchgroupuuiduuid4hexappend	Exception	traceback	print_exccsvwriterwriterow	writerowsSCRAPE_IMAGElowerr   delaylen).r   r	   r
   scrape_task_iduser_idr   
output_dircsv_path	html_pathfilehtmlcards	processedretriedcsv_datacsv_headersimages_taskscard	raw_titler    urlresultr%   patternmatchskur&   r(   companyratingr+   r,   r-   r.   r/   range_pricer1   r2   r3   mxqr5   r8   erS   frV    rw   </var/www/Befach/backend/bot/alibaba/products_page_scraper.pyh_to_x   s   















ry   )bs4r   rU   rN   rI   r>   rF   bot.utils.data_utilsr   django.confr   bot.utils.scrape_detailsr   bot.utils.scrape_imager   r   strintry   rw   rw   rw   rx   <module>   s    