o
    ›Œh  ã                   @   s@  d dl Z d dlmZ d dlZd dlmZmZ e j e  	de 
¡ ¡d¡ae jtdd e j td¡ad	d
„ Zd+defdd„Zdd„ Zdedeeeeeeeeeeef	  fdd„Zd,dedededededededededefdd „Zdefd!d"„Zd#d$„ Zd%d&„ Zd-ded'efd(d)„Zed*krže j t¡r™e  t¡ eƒ  dS dS ).é    N)ÚPath)ÚOptionalÚTupleÚCRAWL4_AI_BASE_DIRECTORYz	.crawl4aiT)Úexist_okzcrawl4ai.dbc                  C   s0   t  t¡} |  ¡ }| d¡ |  ¡  |  ¡  d S )NaŽ  
        CREATE TABLE IF NOT EXISTS crawled_data (
            url TEXT PRIMARY KEY,
            html TEXT,
            cleaned_html TEXT,
            markdown TEXT,
            extracted_content TEXT,
            success BOOLEAN,
            media TEXT DEFAULT "{}",
            links TEXT DEFAULT "{}",
            metadata TEXT DEFAULT "{}",
            screenshot TEXT DEFAULT ""
        )
    )Úsqlite3ÚconnectÚDB_PATHÚcursorÚexecuteÚcommitÚclose)Úconnr
   © r   úN/var/www/Befach/backend/venv/lib/python3.10/site-packages/crawl4ai/database.pyÚinit_db
   s
   

r   ÚmediaÚ
new_columnc              
   C   sv   t ƒ  zt t¡}| ¡ }| d| › d¡ | ¡  | ¡  W d S  ty: } zt	d|› ƒ W Y d }~d S d }~ww )Nz$ALTER TABLE crawled_data ADD COLUMN z TEXT DEFAULT ""z2Error altering database to add screenshot column: ©
Úcheck_db_pathr   r   r	   r
   r   r   r   Ú	ExceptionÚprint)r   r   r
   Úer   r   r   Úalter_db_add_screenshot   s   
€ÿr   c                   C   s   t stdƒ‚d S )Nz%Database path is not set or is empty.)r	   Ú
ValueErrorr   r   r   r   r   *   s   ÿr   ÚurlÚreturnc              
   C   sr   t ƒ  zt t¡}| ¡ }| d| f¡ | ¡ }| ¡  |W S  ty8 } zt	d|› ƒ W Y d }~d S d }~ww )NzˆSELECT url, html, cleaned_html, markdown, extracted_content, success, media, links, metadata, screenshot FROM crawled_data WHERE url = ?zError retrieving cached URL: ©
r   r   r   r	   r
   r   Úfetchoner   r   r   )r   r   r
   Úresultr   r   r   r   Úget_cached_url.   s   
€þr    ú{}Ú ÚhtmlÚcleaned_htmlÚmarkdownÚextracted_contentÚsuccessÚlinksÚmetadataÚ
screenshotc
                 C   s„   t ƒ  z$t t¡}
|
 ¡ }| d| |||||||||	f
¡ |
 ¡  |
 ¡  W d S  tyA } zt	d|› ƒ W Y d }~d S d }~ww )Na¤  
            INSERT INTO crawled_data (url, html, cleaned_html, markdown, extracted_content, success, media, links, metadata, screenshot)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ON CONFLICT(url) DO UPDATE SET
                html = excluded.html,
                cleaned_html = excluded.cleaned_html,
                markdown = excluded.markdown,
                extracted_content = excluded.extracted_content,
                success = excluded.success,
                media = excluded.media,      
                links = excluded.links,    
                metadata = excluded.metadata,      
                screenshot = excluded.screenshot
        zError caching URL: r   )r   r#   r$   r%   r&   r'   r   r(   r)   r*   r   r
   r   r   r   r   Ú	cache_url;   s   
ó€ÿr+   c               
   C   sr   t ƒ  zt t¡} |  ¡ }| d¡ | ¡ }|  ¡  |d W S  ty8 } zt	d|› ƒ W Y d }~dS d }~ww )Nz!SELECT COUNT(*) FROM crawled_datar   zError getting total count: r   )r   r
   r   r   r   r   r   Úget_total_countS   s   


€þr,   c               
   C   ón   t ƒ  zt t¡} |  ¡ }| d¡ |  ¡  |  ¡  W d S  ty6 } zt	d|› ƒ W Y d }~d S d }~ww )NzDELETE FROM crawled_datazError clearing database: r   ©r   r
   r   r   r   r   Úclear_db`   ó   

€ÿr/   c               
   C   r-   )NzDROP TABLE crawled_datazError flushing database: r   r.   r   r   r   Úflush_dbk   r0   r1   Údefault_valuec              
   C   s|   t ƒ  z t t¡}| ¡ }| d| › d|› d¡ | ¡  | ¡  W d S  ty= } zt	d|› ƒ W Y d }~d S d }~ww )NzUPDATE crawled_data SET z = "z" WHERE screenshot IS NULLz!Error updating existing records: r   )r   r2   r   r
   r   r   r   r   Úupdate_existing_recordsv   s   
€ÿr3   Ú__main__)r   )r!   r!   r!   r"   )r   r!   )ÚosÚpathlibr   r   Útypingr   r   ÚpathÚjoinÚgetenvÚhomer	   Úmakedirsr   Ústrr   r   Úboolr    r+   Úintr,   r/   r1   r3   Ú__name__ÚexistsÚremover   r   r   r   Ú<module>   s*    ,4

ü