o
    jhP%                     @   sz  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlm Z m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' ej(Z(dd Z)d$de*de*de*fddZ+de*fddZ,dd Z-dd Z.dd Z/dd  Z0d d!l1m2Z2 d dl3Z3e#d$d"d#Z4dS )%    N)urlparse)async_playwright)sync_playwright)stealth_asyncstealth_sync)BeautifulSoup)
PROXY_LISTparse_proxy_urlhuman_delay)ProductScrapeTaskSerializer)settings)UTApiUTApiOptions)ProductScrapeTaskProductImage)shared_task)get_channel_layer)async_to_syncc                 C   sF   t dd| }t dd|}t dd|}t jdd|t jd}| S )Nz\s+//\s+ z\s+//z//\s+z	^\s*//.*$ )flags)resub	MULTILINEstrip)raw_textcleaned r   1/var/www/Befach/backend/bot/utils/scrape_image.pyclean_json_text   s
   r   urlproxyreturnc                 C   s   |rt |nd }t =}|jj|dg ddtd}|jddddd	d
}| }t| |j| ddd t	  |
 }|  |W  d    S 1 sKw   Y  d S )Nchrome)z--no-sandboxz---disable-blink-features=AutomationControlledz--disable-extensions2   )r!   channelargsslow_moexecutable_pathz[Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36i   i  )widthheightzen-US)
user_agentviewportlocaledomcontentloadedi@  )
wait_untiltimeout)r	   r   chromiumlaunchEXECUTABLE_PATHnew_contextnew_pager   gotor
   contentclose)r    r!   proxy_configpbrowsercontextpagehtmlr   r   r   fetch_html_with_stealth#   s,   	$r?   html_contentc                 C   s   t | d}|jddd}i }|D ]O}z6|j}|sW qt|}t|}t|trG|D ]}|ddkrF|d|d< |d	i }	|d
g }
q*W q t	y_ } zt
d| W Y d }~qd }~ww |S )Nzhtml.parserscriptzapplication/ld+json)typez@typeProductimageimagesoffersreviewu   ❌ Error parsing JSON:)r   find_allstringr   jsonloads
isinstancelistget	Exceptionprint)r@   soupscriptsdatarA   r   cleaned_json_textparseditemofferreviewser   r   r   parse_and_print_product_infoF   s,   


rZ   c              	   C   s   t t}t| |}tjtjddt	
 jd d  d}t|ddd}|| W d    n1 s5w   Y  zt|}|W tj|rMt| S S tj|rZt| w w )Nmediaoutput_files
   z.htmlwutf-8)encoding)randomchoicer   r?   ospathjoinr   BASE_DIRuuiduuid4hexopenwriterZ   existsremove)r    r!   r>   	file_pathfrS   r   r   r   scrap_detail_pagea   s   

(rp   c                  C   sB   t jt jt jgd} tt| d	d}t
t|d}|S )N)apiKeyappIdregionsr_   )token)r   UPLOADTHING_SECRETUPLOADTHING_APP_IDUPLOADTHING_REGIONbase64	b64encoderJ   dumpsencodedecoder   r   )
token_dataencoded_tokenutapir   r   r   get_utapi_objectr   s   r   c              
   C   sP   t  }z
|j| dd}|W S  ty' } ztdt| W Y d }~d S d }~ww )Ninline)filescontent_dispositionz*Intenal Server Error while uploadig file :)r   upload_filesrO   rP   str)	file_objsr   responserY   r   r   r   upload_images_to_uploadthing}   s   r   c                 C   s"   t  }t|j|| ||d d S )N)rB   messagerS   )r   r   
group_send)rB   r   rS   
group_namechannel_layerr   r   r   emit_to_socket   s   r   )BytesIOc                 C   s  d }z	t jj| d}W n
   td Y d S |js6td td z	t jj| d}W n
   td Y d S |jj}d}d|t|j	d}d|_
|  d	| }td
|||d tj|sftd| zbt|dddd}tt|}	|	d }
|	dd  }W d    n1 sw   Y  |
d}|
d}d}g }|D ]}|d}|dg }|r|sqg }|d d D ]W}z8|sW qtj|dd}|  t|j}|jdd}t|dd pd}t j | |_ |!| W q t"y } ztd| d|  W Y d }~qd }~ww |sqt#|}d d! |D }|D ] }|| |krCd"$|||< td#| d$t%| d%  nq$|d7 }|!||d d& |d' dkrld(}d)||d}td
|||d g }qd*}d)||d}td
|||d t|d+ddd}t&|}|'|
 |(| W d    n	1 sw   Y  td, d-|_
|  d.}d/|i d}td
|||d d0d1|j) d2W S  t"y } ztd3|  d4|_
d5d3| d2W  Y d }~S d }~ww )6N)idz3Error in save_images_to_uploadthing: Task not foundzNo file foundr$   zImages Processing Started!images_started)eventr   payloadIN_PROGRESSuser_scrapper_complete)rB   r   rS   r   zCSV path does not exist: rr_   r   )r`   newliner      IDz	Image URL
product_id
image_urls      )r0   zContent-Typez
image/jpeg;z.jpgzError downloading z: c                 S   s   g | ]
}t |d r|jqS )r    )hasattrr    ).0resr   r   r   
<listcomp>   s    z.save_images_to_uploadthing.<locals>.<listcomp>,zUpdated z with z images)r   	image_url   z&Some Images are Processed Succesfully!images_processedz%All Images are Processed Succesfully!r^   zCSV updated successfully.	COMPLETEDzImages Processed Succesfully!images_completedTz(Images uploaded successfully for task : )successr   z%Error in save_images_to_uploadthing: FAILEDF)*r   objectsrN   rP   result_filetimesleeprd   r   rS   images_statussaver   rc   rl   FileNotFoundErrorrj   rM   csvreaderindexrequestsraise_for_statusr   r7   headers	mimetypesguess_extensionsplitrg   rh   ri   nameappendrO   r   re   lenwriterwriterow	writerowsr   )task_idimages_dictuser_idscrape_taskcsv_pathr   rS   r   ro   r   headerrowsproduct_id_idximage_urls_idx	processedprocessed_obj	image_objpidr   r   img_urlr   file_objcontent_typeextrY   upload_responsesuploaded_urlsrowr   r   r   r   save_images_to_uploadthing   s   





 

r   )N)5rx   ra   rc   rg   rJ   r   r   r   r   urllib.parser   playwright.async_apir   playwright.sync_apir   playwright_stealth.stealthr   r   bs4r   bot.utils.scraper_utilsr   r	   r
   bot.serializersr   django.confr   upyloadthingr   r   
bot.modelsr   r   celeryr   channels.layersr   asgiref.syncr   r3   r   r   r?   rZ   rp   r   r   r   ior   r   r   r   r   r   r   <module>   sF    #