import requests
import re
import json
import random
from bs4 import BeautifulSoup
import time
from django.conf import settings
from .scraper_utils import parse_proxy_url_for_requests

# Proxy configuration (if needed)
PROXY_LIST = getattr(settings, 'PROXY_LIST', [])
USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0"
]

def fetch_product_page(url: str) -> str:
    """
    Fetch product page using requests with proper headers and proxies
    """
    headers = {
        'User-Agent': random.choice(USER_AGENTS),
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'Cache-Control': 'max-age=0'
    }

    proxies = None
    if PROXY_LIST:
        proxy = random.choice(PROXY_LIST)
        proxies = parse_proxy_url_for_requests(proxy)
        print(f"[Proxy] Using: {proxy}")

    try:
        response = requests.get(
            url,
            headers=headers,
            proxies=proxies,
            timeout=30,
            allow_redirects=True
        )
        response.raise_for_status()
        return response.text
    except requests.RequestException as e:
        print(f"Error fetching page: {e}")
        return None

def extract_detail_data_from_script(html: str) -> dict:
    """
    Extract product and seller data from script tags containing window.detailData.
    """
    try:
        # Look for the detailData script pattern
        detail_data_pattern = r"window\.detailData\s*=\s*({.*?})\s*;"
        match = re.search(detail_data_pattern, html, re.DOTALL)
        
        if match:
            script_data = match.group(1)
            # Clean up the JSON string (remove trailing commas, etc.)
            script_data = re.sub(r',\s*}', '}', script_data)
            script_data = re.sub(r',\s*]', ']', script_data)
            
            detail_data = json.loads(script_data)
            return detail_data
        
        # Alternative pattern if the first one doesn't work
        alt_pattern = r"detailData\s*:\s*({.*?})\s*,"
        alt_match = re.search(alt_pattern, html, re.DOTALL)
        if alt_match:
            script_data = alt_match.group(1)
            script_data = re.sub(r',\s*}', '}', script_data)
            script_data = re.sub(r',\s*]', ']', script_data)
            
            detail_data = json.loads(script_data)
            return detail_data
            
    except (json.JSONDecodeError, AttributeError, re.error) as e:
        print(f"Error parsing detailData: {e}")
    
    return {}

def extract_json_ld_data(html: str) -> dict:
    """
    Extract JSON-LD data from script tags in the HTML.
    """
    try:
        json_ld_pattern = r'<script type="application/ld\+json">(.*?)</script>'
        matches = re.findall(json_ld_pattern, html, re.DOTALL)
        
        if not matches:
            return {}

        match = matches[0]
        data = json.loads(match.strip())
        if not data:
            return {}
        print("data is",data)
        if data[0].get("@type") == "Product":
            return data[0]
    except Exception as e:
        import traceback
        traceback.print_exc()
        print(f"Error extracting JSON-LD data: {e}")
    
    return {}

def extract_product_type_from_html(html: str) -> str:
    """
    Extract product type from HTML content without full parsing
    """
    product_type = 'PRE_ORDER'
    
    # Simple string checks instead of full HTML parsing
    if re.search(r"add to cart", html, re.IGNORECASE):
        product_type = 'IN_STOCK'
    if re.search(r"send inquiry", html, re.IGNORECASE):
        product_type = 'INQUIRY'
    
    return product_type

def parse_detail_data(detail_data: dict, html: str = None) -> dict:
    """
    Parse the extracted detailData into a structured format with product and seller info.
    """
    result = {}
    
    try:
        if not detail_data:
            return result
        
        # Extract global data
        global_data = detail_data.get("globalData", {})
        
        # Extract product information
        product_info = global_data.get("product", {})
        if product_info:
            price_info = product_info.get("price", {})
            range_prices = price_info.get("productRangePrices", {})
            media_items = product_info.get("mediaItems", [])
            
            # Extract ladder prices
            ladder_price = product_info.get("customPrice", {}).get("productLadderPrices", [])
            ladder_price = [{
                "min": item.get('min'),
                "max": item.get('max'), 
                "price": item.get('price')
            } for item in ladder_price]
            # Extract images
            images = [item.get('imageUrl', {}).get('big') for item in media_items if item.get('imageUrl', {}).get('big')]
            
            result["product"] = {
                "title": product_info.get("subject"),
                "sku": product_info.get("productId"),
                "price_min": range_prices.get("dollarPriceRangeLow"),
                "price_max": range_prices.get("dollarPriceRangeHigh"),
                "min_order": product_info.get("moq"),
                "unit": price_info.get("unit"),
                "moq": product_info.get("moq"),
                "package_type": global_data.get("trade", {}).get("logisticInfo", {}).get("unitSize"),
                "lead_time": global_data.get("trade", {}).get("leadTimeInfo", {}).get("ladderPeriodList", [{}])[0].get("processPeriod"),
                "port": product_info.get("deliverPlace"),
                "payment_terms": [],
                "supply_ability": product_info.get("supplyAbility"),
                "images": images,
                "ladder_price": ladder_price,
                "sample_available": detail_data.get("nodeMap", {}).get("module_actions", {}).get("privateData", {}).get("isSampleTradable"),
                "sample": product_info.get('sample'),
                "sample_info": product_info.get('sampleInfo'),
                "product_type": extract_product_type_from_html(html) if html else 'UNKNOWN'
            }
        
        # Extract variants (from SKU)
        sku_info = product_info.get("sku", {})
        sku_attrs = {}
        if sku_info:
            result["product"]["variants"] = sku_info.get("skuInfoMap", {})
            attrs = sku_info.get("skuAttrs", [])
            
            for attr in attrs:
                attr_name = attr.get('name')
                if attr_name not in sku_attrs:
                    sku_attrs[attr_name] = []
                
                for value in attr.get("values", []):
                    sku_attrs[attr_name].append({
                        "id": value.get('id'),
                        "type": value.get('type'),
                        "name": value.get('name'),
                        "image": value.get('largeImage')
                    })
        
        result["attributes"] = sku_attrs
        
        # Extract category from breadcrumb
        breadcrumb = detail_data.get("nodeMap", {}).get("module_breadcrumb", {}).get("privateData", {}).get("breadCrumb", [])
        category = ' > '.join([item.get('name') for item in breadcrumb if item.get('name')])
        result["category"] = category

        result["review"] = global_data.get('review')
        
        # Extract seller information
        seller_info = global_data.get("seller", {})
        if seller_info:
            result["seller"] = {
                "company_name": seller_info.get("companyName"),
                "gold_supplier": seller_info.get("accountIsGoldPlusSupplier"),
                "verified_supplier": seller_info.get("hasPassAssessment"),
                "trade_assurance": seller_info.get("baoAccountIsService"),
                "business_type": seller_info.get("companyBusinessType"),
                "year_established": seller_info.get("companyJoinYears"),
                "annual_revenue": seller_info.get("companyTotalRevenueTitle"),
                "employees": seller_info.get("totalStaffNum"),
            }
        
        # Extract product attributes
        attributes = product_info.get("productBasicProperties", [])
        result["features"] = {attr["attrName"]: attr["attrValue"] for attr in attributes if "attrName" in attr and "attrValue" in attr}

        # Extract additional info from JSON-LD if available
        if html:
            json_ld_data = extract_json_ld_data(html)
            if json_ld_data:
                result['detail_url'] = json_ld_data.get('offers', {}).get('url')
                result['brand_name'] = json_ld_data.get('brand', {}).get('name')
                result['description'] = json_ld_data.get('description')

        return result
        
    except Exception as e:
        print(f"Error parsing detail data: {e}")
        return {}

def extract_product_info(product_url: str) -> dict:
    """
    Main function to extract product data from Alibaba product page using requests.
    """
    html = fetch_product_page(product_url)
    
    if not html:
        return {"error": "Failed to fetch product page"}
    
    # Try to extract data from the script tag with window.detailData
    detail_data = extract_detail_data_from_script(html)
    
    if detail_data:
        parsed_data = parse_detail_data(detail_data, html)
        if parsed_data:
            return parsed_data
    
    # Fallback to JSON-LD data if detailData is not available
    json_ld_data = extract_json_ld_data(html)
    if json_ld_data:
        return {
            "product": {
                "name": json_ld_data.get("name"),
                "description": json_ld_data.get("description"),
                "brand": json_ld_data.get("brand", {}).get("name") if isinstance(json_ld_data.get("brand"), dict) else json_ld_data.get("brand"),
            },
            "offers": json_ld_data.get("offers", {})
        }
    
    return {"error": "No product data found"}

# Example usage
# if __name__ == "__main__":
#     example_url = "https://www.alibaba.com/product-detail/High-Quality-Men-s-Women-s_1601560067748.html"
#     result = extract_product_data(example_url)
#     print("✅ Extracted Product Data:")
    
#     if "error" in result:
#         print(f"Error: {result['error']}")
#     else:
#         for k, v in result.items():
#             if isinstance(v, dict):
#                 print(f"{k}:")
#                 for sub_k, sub_v in v.items():
#                     print(f"  {sub_k}: {sub_v}")
#             else:
#                 print(f"{k}: {v}")