o
    h#                     @   sB  d dl mZ d dlmZ d dlZd dlZd dlZd dlZddlT d dl	Z	d dl
Zd dlmZ ejeje ejeZe dd Ze d	d
 Ze dd Zdd Ze dd Ze dd Ze d%defddZe dd Ze dd Ze dd Ze dd Z d&d d!Z!d"d# Z"e#d$kre"  dS dS )'    )	lru_cache)PathN   )*)MODEL_REPO_BRANCHc                 C   s2   dd l }| jdkr|j| jS | jdkrdS dS )Nr   cudampsl       0 )torchtyper   get_device_propertiestotal_memory)devicer	    r   R/var/www/Befach/backend/venv/lib/python3.10/site-packages/crawl4ai/model_loader.pyget_available_memory   s   

r   c                 C   sL   t | }| jdkrdS | jdv r$|dkrdS |dkrdS |dkr"d	S d
S dS )Ncpu   )r   r   l           l           l        @       )r   r
   )r   available_memoryr   r   r   calculate_batch_size   s   

r   c                  C   sH   dd l } | j r| d}|S | jj r| d}|S | d}|S )Nr   r   r   r   )r	   r   is_availabler   backendsr   )r	   r   r   r   r   
get_device)   s   



r   c                 C   s   t  }| | | |fS )N)r   to)modelr   r   r   r   set_model_device4   s   
r   c                  C   sT   t jt dt d} t j| dd t j|  ddd t j|  ddd | S )NCRAWL4_AI_BASE_DIRECTORYz	.crawl4aiT)exist_okz/cachez/models)ospathjoingetenvr   homemakedirs)home_folderr   r   r   get_home_folder9   s
   r(   c                  C   sP   ddl m} m}m}m} | jdd d}|jdd d}|  t|\}}||fS )Nr   BertTokenizer	BertModelAutoTokenizer	AutoModelzbert-base-uncasedresume_downloadtransformersr*   r+   r,   r-   from_pretrainedevalr   )r*   r+   r,   r-   	tokenizerr   r   r   r   r   load_bert_base_uncasedA   s   r5   BAAI/bge-small-en-v1.5returnc                 C   sP   ddl m}m}m}m} |j| dd}|j| dd}|  t|\}}||fS )zLoad the Hugging Face model for embedding.
    
    Args:
        model_name (str, optional): The model name to load. Defaults to "BAAI/bge-small-en-v1.5".
        
    Returns:
        tuple: The tokenizer and model.
    r   r)   Nr.   r0   )
model_namer*   r+   r,   r-   r4   r   r   r   r   r   load_HF_embedding_modelJ   s   
r9   c                  C   s^   ddl m} m} ddl m} dd l}| d}|d}|  t|\}}|d||d}|S )Nr   )r,   "AutoModelForSequenceClassification)pipelinez1dstefa/roberta-base_topic_classification_nyt_newsztext-classification)r   r4   )r1   r,   r:   r;   r	   r2   r3   r   )r,   r:   r;   r	   r4   r   r   piper   r   r   load_text_classifier[   s   

r=   c                     s   ddl m} m} dd l}ddlm dd ld}|j|d d| j|d d  t	\j
j d
 fdd		}|fS )Nr   )r:   r,   )expitzcardiffnlp/tweet-topic-21-multir.         ?r   c           
         s   | ddd|d}fdd|  D }  d	i |}W d    n1 s*w   Y  |j   }|}||kd }g }|D ]} fddt|D }	||	 qF|S )
NptT)return_tensorspadding
truncation
max_lengthc                    s   i | ]
\}}||  qS r   )r   ).0keyval)r   r   r   
<dictcomp>   s    zHload_text_multilabel_classifier.<locals>._classifier.<locals>.<dictcomp>r   c                    s    g | ]\}}|d kr | qS )r   r   )rE   ivalue)class_mappingr   r   
<listcomp>   s     zHload_text_multilabel_classifier.<locals>._classifier.<locals>.<listcomp>r   )itemsno_gradlogitsdetachr   numpy	enumerateappend)
texts	thresholdrD   tokensoutputscorespredictionsbatch_labels
predictionlabelsrK   r   r>   r   r4   r	   r   r   _classifier   s   
z4load_text_multilabel_classifier.<locals>._classifier)r?   r   )r1   r:   r,   rQ   scipy.specialr>   r	   r2   r3   r   configid2label)r:   r,   npMODELr^   r   r]   r   load_text_multilabel_classifierh   s   rd   c                  C   sB   dd l } z| jd W n ty   | d Y nw | jdS )Nr   ztokenizers/punktpunkt)nltkdatafindLookupErrordownload)rf   r   r   r   load_nltk_punkt   s   rk   c            
   
   C   s  dd l } d}t }t|| }| rt| sd}t}t|d }td | rXzt	| | r:t	| W n t
yW   td td|  td|  Y d S w z7tjdd	d
||t|gtjtjdd t|d }|jddd |d d }t|| t	| td W n4 tjy }	 ztd|	  W Y d }	~	d S d }	~	w ty }	 ztd|	  W Y d }	~	d S d }	~	ww z| t|W S  ty }	 ztd|	  W Y d }	~	d S d }	~	ww )Nr   models/reutersz)https://github.com/unclecode/crawl4ai.gitcrawl4aiu7   [LOG] ⏬ Downloading Spacy model for the first time...zh[WARNING] Unable to remove existing folders. Please manually delete the following folders and try again:z- gitclonez-bT)stdoutstderrcheckmodels)parentsr    reutersu-   [LOG] ✅ Spacy Model downloaded successfullyz0An error occurred while cloning the repository: zAn error occurred: zError loading spacy model: )spacyr(   r   existsanyiterdirr   printshutilrmtreePermissionError
subprocessrunstrDEVNULLmkdircopytreeCalledProcessError	Exceptionload)
rv   namer'   model_folderrepo_urlbranchrepo_foldermodels_foldersource_folderer   r   r   load_spacy_model   sd   


r   Fc                 C   s   | r+t d t }tj|dtj|dg}|D ]}t| r&t| qt d t d t	 \}}t d|  t d t
  t d d	S )
z*Download all models required for Crawl4AI.z![LOG] Removing existing models...rl   rs   z[LOG] Existing models removed.z$[LOG] Downloading text classifier...z [LOG] Text classifier loaded on z,[LOG] Downloading custom NLTK Punkt model...u-   [LOG] ✅ All models downloaded successfully.N)rz   r(   r!   r"   r#   r   rw   r{   r|   rd   rk   )remove_existingr'   model_foldersfolder_r   r   r   r   download_all_models   s"   
	
r   c                  C   sD   t d t d tjdd} | jdddd |  }t|jd	 d S )
Nz/[LOG] Welcome to the Crawl4AI Model Downloader!zE[LOG] This script will download all the models required for Crawl4AI.zCrawl4AI Model Downloader)descriptionz--remove-existing
store_truez)Remove existing models before downloading)actionhelp)r   )rz   argparseArgumentParseradd_argument
parse_argsr   r   )parserargsr   r   r   main   s   r   __main__)r6   )F)$	functoolsr   pathlibr   r~   r!   r{   tarfilemodel_loaderr   urllib.requesturllibcrawl4ai.configr   r"   realpathr#   getcwddirname__file____location__r   r   r   r   r(   r5   tupler9   r=   rd   rk   r   r   r   __name__r   r   r   r   <module>   sH    "
	






+


<	
