
    Nyh%                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlm Z m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' ejP                  Z(d Z)dde*de*de*fdZ+de*fdZ,d Z-d Z.d Z/d Z0d dl1m2Z2 d dl3Z3e#dd       Z4y)    N)urlparse)async_playwright)sync_playwright)stealth_asyncstealth_sync)BeautifulSoup)
PROXY_LISTparse_proxy_urlhuman_delay)ProductScrapeTaskSerializer)settings)UTApiUTApiOptions)ProductScrapeTaskProductImage)shared_task)get_channel_layer)async_to_syncc                     t        j                  dd|       }t        j                  dd|      }t        j                  dd|      }t        j                  dd|t         j                        }|j                         S )Nz\s+//\s+ z\s+//z//\s+z	^\s*//.*$ )flags)resub	MULTILINEstrip)raw_textcleaneds     1/var/www/Befach/backend/bot/utils/scrape_image.pyclean_json_textr       s\    ff[#x0GffXsG,GffXsG,Gff\2wbllCG==?    urlproxyreturnc                    |rt        |      nd }t               5 }|j                  j                  |dg ddt              }|j                  ddddd	
      }|j                         }t        |       |j                  | dd       t                |j                         }|j                          |cd d d        S # 1 sw Y   y xY w)Nchrome)z--no-sandboxz---disable-blink-features=AutomationControlledz--disable-extensions2   )r#   channelargsslow_moexecutable_pathz[Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36i   i  )widthheightzen-US)
user_agentviewportlocaledomcontentloadedi@  )
wait_untiltimeout)r
   r   chromiumlaunchEXECUTABLE_PATHnew_contextnew_pager   gotor   contentclose)r"   r#   proxy_configpbrowsercontextpagehtmls           r   fetch_html_with_stealthrB   #   s    -2?5)L		 a**##
 + $ 
 %%D  $s3 & 
 !T		#"4e	D||~=  s   BB<<Chtml_contentc                    t        | d      }|j                  dd      }i }|D ]  }	 |j                  }|st        |      }t	        j
                  |      }t        |t              rT|D ]O  }|j                  d      dk(  s|j                  d      |d<   |j                  d	i       }	|j                  d
g       }
Q  |S # t        $ r}t        d|       Y d }~d }~ww xY w)Nzhtml.parserscriptzapplication/ld+json)typez@typeProductimageimagesoffersreviewu   ❌ Error parsing JSON:)r   find_allstringr    jsonloads
isinstancelistget	Exceptionprint)rC   soupscriptsdatarE   r   cleaned_json_textparseditemofferreviewses               r   parse_and_print_product_infor^   F   s    }5DmmH+@mAGD	0}}H / 9ZZ 12F&$'"Dxx(I5)-'):X $2 6"&((8R"8 # * K  	0+Q//	0s#   B?A	B?:B??	CCCc                 R   t        j                  t              }t        | |      }t        j
                  j                  t        j                  ddt        j                         j                  d d  d      }t        |dd      5 }|j                  |       d d d        	 t        |      }|t        j
                  j                  |      rt	        j                   |       S S # 1 sw Y   LxY w# t        j
                  j                  |      rt	        j                   |       w w xY w)Nmediaoutput_files
   z.htmlwutf-8)encoding)randomchoicer	   rB   ospathjoinr   BASE_DIRuuiduuid4hexopenwriter^   existsremove)r"   r#   rA   	file_pathfrW   s         r   scrap_detail_pageru   a   s    MM*%E"3.DX..DJJLL\L\]`^`LaKbbgIhiI	iw	/ 1	!+D1 77>>)$IIi  %  77>>)$IIi  %s   C#!C/ #C,/7D&c                     t         j                  t         j                  t         j                  gd} t	        j
                  t        j                  |       j                  d            j                  d      }t        t        |            }|S )N)apiKeyappIdregionsrd   )token)r   UPLOADTHING_SECRETUPLOADTHING_APP_IDUPLOADTHING_REGIONbase64	b64encoderN   dumpsencodedecoder   r   )
token_dataencoded_tokenutapis      r   get_utapi_objectr   r   sm    --,,//0J
 $$TZZ
%;%B%B7%KLSST[\M,]34ELr!   c                     t               }	 |j                  | d      }|S # t        $ r}t        dt	        |             Y d }~y d }~ww xY w)Ninline)filescontent_dispositionz*Intenal Server Error while uploadig file :)r   upload_filesrS   rT   str)	file_objsr   responser]   s       r   upload_images_to_uploadthingr   }   sN    EC%%I8%T C:3q6BBCs   ! 	A	AA	c                 X    t               } t        |j                        || ||d       y )N)rF   messagerW   )r   r   
group_send)rF   r   rW   
group_namechannel_layers        r   emit_to_socketr      s0    %'M+M-**+	
r!   )BytesIOc           
         d }	 t         j                  j                  |       }|j                  sAt        d       t        j                  d       	 t         j                  j                  |       }|j                  j                  }d}d|t        |      j                  d}d|_
        |j                          d	| }t        d
|||       t        j                  j                  |      st        d|       	 t!        |ddd      5 }t#        t%        j&                  |            }	|	d   }
|	dd  }d d d        
j)                  d      }	 |
j)                  d      }d}g }|D ]  }|j                  d      }|j                  dg       }|r|s,g }|D ]  }	 |st+        j                  |d      }|j-                          t/        |j0                        }|j2                  j                  dd      }t5        j6                  |j9                  d      d         xs d}t;        j<                         j>                   | |_         |jC                  |        |stG        |      }|D cg c]  }tI        |d       s|jJ                   }}D ];  }||   |k(  sd!jM                  |      ||<   t        d"| d#tO        |       d$        n |dz  }|jC                  ||d   d%       |d&z  dk(  sd'}d(||d}t        d
|||       g } d)}d(||d}t        d
|||       t!        |d*dd      5 }t%        jP                  |      }|jS                  |
       |jU                         d d d        t        d+       d,|_
        |j                          d-}d.|i d}t        d
|||       d/d0|jV                   d1S #  t        d       Y y xY w#  t        d       Y y xY w# 1 sw Y   xY w#  |
j)                  d      }Y xY w# tD        $ r}t        d| d|        Y d }~d }~ww xY wc c}w # 1 sw Y   xY w# tD        $ r'}t        d2|        d3|_
        d4d2| d1cY d }~S d }~ww xY w)5N)idz3Error in save_images_to_uploadthing: Task not foundzNo file foundr'   zImages Processing Started!images_started)eventr   payloadIN_PROGRESSuser_scrapper_complete)rF   r   rW   r   zCSV path does not exist: rrd   r   )re   newliner      IDz	Image URLz
Image URLs
product_id
image_urls   )r3   zContent-Typez
image/jpeg;z.jpgzError downloading z: r"   ,zUpdated z with z images)r   	image_url   z&Some Images are Processed Succesfully!images_processedz%All Images are Processed Succesfully!rc   zCSV updated successfully.	COMPLETEDzImages Processed Succesfully!images_completedTz(Images uploaded successfully for task : )successr   z%Error in save_images_to_uploadthing: FAILEDF),r   objectsrR   rT   result_filetimesleepri   r   rW   images_statussaver   rh   rq   FileNotFoundErrorro   rQ   csvreaderindexrequestsraise_for_statusr   r:   headers	mimetypesguess_extensionsplitrl   rm   rn   nameappendrS   r   hasattrr"   rj   lenwriterwriterow	writerowsr   )task_idimages_dictuser_idscrape_taskcsv_pathr   rW   r   rt   r   headerrowsproduct_id_idximage_urls_idx	processedprocessed_obj	image_objpidr   r   img_urlr   file_objcontent_typeextr]   upload_responsesresuploaded_urlsrowr   s                                  r   save_images_to_uploadthingr      s   K'//33w3? ""

2	+337777CK &&++H*G -k:??D
 !.K	"J+W4T^_77>>(#";H: FGG_X(C'2> 	!#**Q-(FAYF!":D	  d+	8#\\+6N 	$I---C"|R8JS I%?" '||GR@H--/&x'7'78H#+#3#3#7#7#UL#33L4F4Fs4KA4NOYSYC'+zz|'7'7&8$>HM$$X. &    <IF0@XGCQVDWSWWXMX ~&#-*-((=*AC'HSEM0B/C7KL	  NI  sa8H!IJ1}!B.%+
 $7t`jk "c %f :&#

 	/tXbc(C'2> 	#!ZZ]FOOF#T"	#
 	)*$/!1&

 	/tXbc,TU`UcUcTd*effkCE	GI&	 		8#\\,7N2 ! ?.wir!=>>? Y<	# 	#"  X5aS9:$,!-RSTRU+VWWXs    N?  O ;Q 
)O#3Q O0 9Q P	Q B;P	Q *P1<P1
Q AQ 2AQ 48P6,AQ ?OO #O-(Q 0PQ 		P.P)#Q )P..Q 6P?;Q 	Q2Q-'Q2-Q2)N)5r~   rf   rh   rl   rN   r   r   r   r   urllib.parser   playwright.async_apir   playwright.sync_apir   playwright_stealth.stealthr   r   bs4r   bot.utils.scraper_utilsr	   r
   r   bot.serializersr   django.confr   upyloadthingr   r   
bot.modelsr   r   celeryr   channels.layersr   asgiref.syncr   r6   r    r   rB   r^   ru   r   r   r   ior   r   r    r!   r   <module>r      s      	   	  
 	  ! 1 / B  L L 7   , 6  - &((! !S !C !Fs 6!"C	  ~X ~Xr!   