from bs4 import BeautifulSoup
import csv
import uuid
import re
import os
import json
from bot.utils.data_utils import extract_query_name
from django.conf import settings
from bot.utils.scrape_details import extract_product_info
from bot.utils.scrape_image import scrap_detail_page, save_images_to_uploadthing

def h_to_x(html_file: str, csv_filename: str, num_products: int, scrape_task_id, user_id, skip_in_page: int):
    # Step 1: Read HTML
    output_dir = os.path.join(settings.MEDIA_ROOT, 'output_files')
    os.makedirs(output_dir, exist_ok=True)
    csv_path = os.path.join(output_dir, csv_filename)
    html_path = os.path.join(output_dir, html_file)

    if not os.path.exists(html_path):
        print(f"[❌] HTML file {html_path} not found.")
        return

    with open(html_path, "r", encoding="utf-8") as file:
        # html = file.read()
        html = json.load(file)

    # soup = BeautifulSoup(html, "html.parser")

    # Step 2: Find all product cards
    # cards = soup.find_all("div", class_="fy23-search-card")
    cards = html
    processed = 0
    retried = 0

    # Step 3: Prepare CSV data
    csv_data = []
    csv_headers = [
        "ID","SKU" ,"URL", "Title", "Image URL", "Description", "Price", "MOQ", "Company",
        "Rating", "Type", "Category", "Attributes", "Features", "Sample Info",
        "Range Price", "Variants", "Brand Name", "Unit", "MXQ"
    ]
    images_tasks = []
    cards = cards[skip_in_page:]

    for card in cards:
        try:
            if processed >= num_products:
                break
            # title_tag = card.select_one("h2.search-card-e-title a")
            raw_title = card.get('title')
            title = re.sub(r"<[^>]+>","", raw_title)
            # url = "https:" + title_tag['href'] if title_tag and title_tag.has_attr("href") else ""
            url = "https:" + card.get('productUrl')
            
            result = extract_product_info(url)
            if not result:
                print("Result not found",retried)
                retried += 1
                if retried >= 100:
                    print("Broken after 100 retries")
                    break
                continue
            # result = {}
            description = result.get('description') if result else ''
            pattern = re.compile(r"_(\d+)\.html")
            match = pattern.search(url) 
            if match:
                sku = match.group(1)
            # img_tag = card.select_one("div.search-card-e-slider img")
            # img_url = "https:" + img_tag['src'] if img_tag and img_tag.has_attr("src") else ""

            # price_tag = card.select_one("div.search-card-e-price-main")
            price = card.get("price")
            # moq_tag = card.select_one("div.search-card-m-sale-features__item")
            moq = result.get('product', {}).get('moq')

            # company_tag = card.select_one("a.search-card-e-company")
            company = card.get("companyName")

            # supplier_info = card.select_one("a.search-card-e-supplier__year")
            # supplier_years = supplier_info.get_text() if supplier_info else ""

            # rating_tag = card.select_one("span.search-card-e-review")
            rating = card.get("productScore")

            # Additional Details
            product_type = result.get('product',{}).get('product_type')
            category = result.get('category')
            attributes = result.get('attributes')
            features = result.get('features')
            sample_info = result.get('product',{}).get('sample_info')
            range_price = result.get('product',{}).get('ladder_price')
            description = description
            variants = result.get('product', {}).get('variants')
            brand_name = result.get('brand_name')
            unit = result.get('product', {}).get('unit')
            try:
                mxq = result.get('product', {}).get('sample_info', {}).get('maxQuantity')
            except:
                mxq = ''
            # images = scrap_detail_page(url)
            images = result.get('product', {}).get('images')
            product_id = uuid.uuid4().hex[:16]
            images_tasks.append({
                'image_urls':images,
                'product_id':product_id
            })

            csv_data.append([
                product_id,sku, url, title, "N/A", description, price, moq, company
                , rating, product_type, category, attributes, features, sample_info, 
                range_price, variants, brand_name, unit, mxq
            ])
            processed += 1
        except Exception as e:
            import traceback
            traceback.print_exc()
            print(f"[Error] Skipping a card due to: {e}")
    
    # Step 5: Write to CSV
    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(csv_headers)
        writer.writerows(csv_data)
    
    if settings.SCRAPE_IMAGE.lower() in ['yes','true','1']:
        save_images_to_uploadthing.delay(scrape_task_id, images_tasks,user_id)

    print(f"✅ Extracted {len(csv_data)} products and saved to {csv_filename}")

