import asyncio
import re
from playwright.async_api import async_playwright
import csv
import json
from datetime import datetime
import random
from urllib.parse import quote
from django.utils import timezone
from django.conf import settings

EXECUTABLE_PATH=settings.EXECUTABLE_PATH


class MadeInChinaScraper:
    
    def __init__(self):
        self.products = []
        self.timeout = 45000
        self.retry_count = 3
        self.min_delay = 2
        self.max_delay = 5

    async def scrape_product_details(self, page, url):
        try:
            print(f"[INFO] Loading product details page: {url}")
            try:
                await page.goto(url, timeout=self.timeout, wait_until="domcontentloaded")
                await page.wait_for_selector('.detail-tab-item, .basic-info-list', timeout=15000)
            except Exception as e:
                print(f"[WARN] Page load failed for {url}: {str(e)[:100]}...")
                return None
            
            details = {
                'basic_info': {},
                'description': "",
                'images': [],
                'categories':"",
                'price_ranges':"",
                'Rating': "",
                'Features':{},
                'Sample Info':{},
            }

            # Scrape basic info if available
            # Scrape category breadcrumb
            try:
                cat = []
                # Get the <li> element that holds the breadcrumb
                category_breadcrum = await page.query_selector('.sr-QPWords-item')

                if category_breadcrum:
                    # Get all <a> tags inside the breadcrumb
                    category_a_tags = await category_breadcrum.query_selector_all('a')

                    for item in category_a_tags:
                        try:
                            category_text = (await item.text_content()).strip()
                            if category_text:
                                cat.append(category_text)
                        except:
                            continue

                # Store breadcrumb joined with " > "
                if cat:
                    if cat and cat[0].lower().strip() == "home":
                        cat.pop(0)
                    details['categories'] = " > ".join(cat)

            except Exception as e:
                print(f"[WARN] Couldn't scrape category from Breadcrumb: {e}")

            
            try:
                details['price_ranges'] = {}

                # ---- Case 1: single price structure ----
                one_price_rows = await page.query_selector_all(".only-one-priceNum-tr")
                if one_price_rows:
                    print("[INFO] Found single price range")
                    for row in one_price_rows:
                        price_el = await row.query_selector('.only-one-priceNum-td-left')
                        moq_el = await row.query_selector('.sa-only-property-price')

                        price = (await price_el.inner_text()).strip() if price_el else None
                        moq = (await moq_el.inner_text()).strip() if moq_el else None

                        if price and moq:
                            # clean moq like "1 Set (MOQ)" → "1 Set"
                            moq = re.sub(r'\(.*?\)', '', moq).strip()
                            details['price_ranges'][moq] = price

                # ---- Case 2: multiple ranges ----
                else:
                    multiple_price_rows = await page.query_selector_all(".swiper-slide-div")
                    if multiple_price_rows:
                        for row in multiple_price_rows:
                            price_el = await row.query_selector('.swiper-money-container')
                            moq_el = await row.query_selector('.swiper-unit-container')

                            price = (await price_el.inner_text()).strip() if price_el else None
                            moq = (await moq_el.inner_text()).strip() if moq_el else None

                            if price and moq:
                                # clean "50+ Pieces" stays the same, but we strip whitespace
                                moq = re.sub(r'\(.*?\)', '', moq).strip()
                                details['price_ranges'][moq] = price

            except Exception as e:
                print(f"[WARN] Couldn't scrape price ranges: {e}")

            try:
                rating_el = await page.query_selector('.review-score')
                if rating_el:
                    rating_text = (await rating_el.inner_text()).strip()
                    details['Rating'] = rating_text
                    print(f"[INFO] Rating found: {rating_text}")
                else:
                    details['Rating'] = "N/A"
                    print("[INFO] No rating element found")
            except Exception as e:
                print(f"[WARN] Couldn't scrape rating: {e}")

                
            try:
                sample_order = await page.query_selector('.J-sample-order .sample-price')
                if sample_order:
                    price_text = (await sample_order.inner_text()).strip()
                    details["Sample Info"] = price_text
                else:
                    details["Sample Info"] = None
            except Exception as e:
                print(f"[WARN] Couldn't scrape sample product price: {e}")

            try:
                basic_info_items = await page.query_selector_all('.bsc-item')
                for item in basic_info_items:
                    try:
                        label_element = await item.query_selector('.bac-item-label')
                        value_element = await item.query_selector('.bac-item-value')

                        if label_element and value_element:
                            label = (await label_element.inner_text()).strip().replace(':', '')
                            value = (await value_element.inner_text()).strip()
                            details['basic_info'][label] = value
                        else:
                            # If one of them is missing, store None
                            if label_element:
                                label = (await label_element.inner_text()).strip().replace(':', '')
                                details['basic_info'][label] = None
                    except Exception as e:
                        print(f"[WARN] Failed parsing item: {e}")
                        continue
            except Exception as e:
                print(f"[WARN] Couldn't scrape basic info: {e}")

            # Scrape description if available
            try:
                desc_element = await page.query_selector('.detail-desc .rich-text')
                if desc_element:
                    details['description'] = (await desc_element.inner_text()).strip()
            except:
                print("[WARN] Couldn't scrape description")

            # Scrape images if available
            try:
                details['images'] = []

                # Select all slide items
                slide_items = await page.query_selector_all('.sr-proMainInfo-slide-picItem')

                for slide in slide_items:
                    # 1) Check <img> inside slide
                    img_tag = await slide.query_selector('img.J-picImg-zoom-in')
                    if img_tag:
                        src = await img_tag.get_attribute('src')
                    else:
                        # 2) fallback: use fsrc attribute on the slide itself
                        src = await slide.get_attribute('fsrc')

                    if src:
                        # Clean protocol-relative URLs
                        if src.startswith("//"):
                            src = "https:" + src
                        elif not src.startswith(("http:", "https:")):
                            src = "https://" + src.lstrip("/")

                        # Skip .mp4.webp thumbnails (video previews)
                        if not src.lower().endswith(".mp4.webp"):
                            details['images'].append(src)

                print(f"[INFO] Collected {len(details['images'])} images")
            except Exception as e:
                print(f"[WARN] Couldn't scrape images: {e}")

            return details

        except Exception as e:
            print(f"[ERROR] Error scraping product details: {str(e)[:100]}...")
            return None
        
    async def scrape_page(self, page, url):
        for attempt in range(self.retry_count):
            try:
                print(f"[INFO] Loading page (attempt {attempt + 1}): {url}")
                await page.goto(url, timeout=self.timeout, wait_until="domcontentloaded")
                
                # Wait for either products or "no results" message
                await page.wait_for_selector(
                    '.prod-content, .no-result-content', 
                    timeout=15000
                )
                
                # Check for no results
                no_results = await page.query_selector('.no-result-content')
                if no_results:
                    print("[INFO] No results found on this page")
                    return False
                
                return True
                
            except Exception as e:
                print(f"[WARN] Attempt {attempt + 1} failed: {str(e)[:100]}...")
                if attempt < self.retry_count - 1:
                    delay = random.uniform(3, 8)
                    print(f"[INFO] Retrying in {delay:.1f} seconds...")
                    await asyncio.sleep(delay)
                else:
                    print("[ERROR] Max retries reached for this page")
                    return False

    def build_filter_string(self, filters: dict) -> str:
        filter_parts = []

        # Map for business type
        business_type_map = {
            "Manufacturer/ factory": 1,
            "Trading Company": 2,
            "Service Provider": 3,
            "Group Corporation": 4,
            "Retailer": 5,
            "Trade Agent": 6,
            "Buying Office": 7,
            "Other": 8,
            "Wholesaler": 9,
            "Government Institution": 10,
            "Individuals/SOHO": 11
        }

        # Map for R&D type
        rd_type_map = {
            "OEM": 4,
            "ODM": 5,
            "Own Brand": 6,
            "Others": 99
        }

        if filters.get("secured_trading"):
            filter_parts.append("DP_1")

        if filters.get("buy_sample"):
            filter_parts.append("ODS_1")

        if filters.get("min_order"):
            filter_parts.append(f"Min_{filters['min_order']}")

        if filters.get("search_within"):
            term = filters["search_within"]
            filter_parts.append(f"PV_9999_{term}_999999999")

        if filters.get("audited_suppliers"):
            filter_parts.append("SGS_AS--CL_DGM")

        if filters.get("business_type"):
            bt_value = business_type_map.get(filters["business_type"])
            if bt_value:
                filter_parts.append(f"BT_{bt_value}")

        if filters.get("rd_type"):
            rd_value = rd_type_map.get(filters["rd_type"])
            if rd_value:
                filter_parts.append(f"RD_{rd_value}")

        # Join with --
        if filter_parts:
            return "F1--" + "--".join(filter_parts)
        return "F1"

    async def scrape_made_in_china(self, search_term, start_page=1, num_products=None, skip_in_page=0,filters=None):
        async with async_playwright() as p:
            browser = await p.chromium.launch(
                headless=True,
                args=[
                    "--disable-blink-features=AutomationControlled",
                    "--start-maximized"
                ],
                slow_mo=100,
                executable_path=EXECUTABLE_PATH
            )
            
            context = await browser.new_context(
                user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                        "AppleWebKit/537.36 (KHTML, like Gecko) "
                        "Chrome/115.0.0.0 Safari/537.36",
                viewport={"width": 1366, "height": 768},
                locale="en-US"
            )
            
            page = await context.new_page()
            encoded_term = quote(search_term.replace(' ', '+'))

            filter_string = self.build_filter_string(filters or {})

            collected = 0
            current_page = start_page
            seen_count = 0

            while True:
                url = f"https://www.made-in-china.com/multi-search/{encoded_term}/{filter_string}/{current_page}.html"
                success = await self.scrape_page(page, url)
                if not success:
                    print(f"[INFO] Stopping, no results on page {current_page}")
                    break

                # product_nodes = await page.query_selector_all('.list-item, .list-node, .prod-item')
                product_nodes = await page.query_selector_all('.prod-content')
                print(f"[INFO] Found {len(product_nodes)} products on page {current_page}")

                # for i, product in enumerate(product_nodes):
                #     # apply skip logic
                #     if skip_in_page > 0:
                #         skip_in_page -= 1
                #         continue
                for product in product_nodes:
                    if seen_count < skip_in_page:
                        seen_count += 1
                        continue

                    if num_products and collected >= num_products:
                        print(f"[INFO] Collected enough products ({collected}), stopping...")
                        await context.close()
                        await browser.close()
                        return self.products

                    try:
                        success = await self.scrape_single_product(page, product, current_page)
                        if success:   # ✅ only count real products
                            collected += 1
                    except Exception as e:
                        print(f"[ERROR] Failed to scrape product on page {current_page}: {str(e)[:100]}")

                # Stop if not enough products and next page required
                current_page += 1
                delay = random.uniform(self.min_delay, self.max_delay)
                await asyncio.sleep(delay)

            await context.close()
            await browser.close()
            return self.products

    async def scrape_single_product(self, page, product, page_num):
        product_html = await product.evaluate('element => element.outerHTML')

        product_data = {
            'name': "N/A",
            'link': "N/A",
            'price': "N/A",
            'moq': "N/A",
            'company': "N/A",
            'properties': {},
            'scraped_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            'page_num': page_num,
            'type': "N/A",
            'sku':"N/A",
            'variant':"N/A",
            'Sample Info':"",
            "Rating":""
        }
        # Name & Link
        name_element = await product.query_selector('h2.product-name a:not(.activity-flag-img), .prod-name a:not(.activity-flag-img)')
        if name_element:
            product_data['name'] = (await name_element.inner_text()).strip()
            product_data['link'] = await name_element.get_attribute('href') or "N/A"
            if product_data['link'].startswith("https"):
                product_data['link']=product_data['link']
            elif product_data['link'].startswith("//"):
                product_data['link'] = "https:" + product_data['link']
            elif not product_data['link'].startswith(("http:", "https:")):
                product_data['link'] = "https://www.made-in-china.com" + product_data['link']
    
        if (
            product_data['link'] == "N/A"
            or "made-in-china.com" not in product_data['link']
            or product_data['link'].endswith(".en.made-in-china.com")  # company page only
        ):
            print(f"[INFO] Skipping non-product entry on page {page_num}")
            return False  # <-- Do not append or count this product
        # Price
        try:
            price_element = await product.query_selector('.price-info .price, .price-val')
            if price_element:
                product_data['price'] = (await price_element.inner_text()).strip()
        except:
            print("[WARN] Couldn't get price")

        # MOQ
        try:
            moq_element = await product.query_selector('.product-property .info:nth-of-type(2)')
            if moq_element:
                full_text = (await moq_element.inner_text()).strip()
                product_data['moq'] = re.sub(r'\(.*?\)', '', full_text).replace("MOQ", "").strip()
        except:
            print("[WARN] Couldn't get MOQ")

        # Company
        try:
            company_element = await product.query_selector('.company-name-wrapper .company-name-txt a span')
            if not company_element:
                company_element = await product.query_selector('.company-name-wrapper span')

            product_data['company'] = (await company_element.inner_text()).strip() if company_element else "N/A"
        except:
            print("[WARN] Couldn't get company")

        # Properties
        try:
            prop_elements = await product.query_selector_all('.property-list li')
            for prop in prop_elements:
                try:
                    # Label (text before the colon)
                    full_text = (await prop.inner_text()).strip()
                    label_text = full_text.split(":")[0].strip()

                    # Value (inside span.property-val)
                    value_element = await prop.query_selector('.property-val')
                    value_text = (await value_element.inner_text()).strip() if value_element else ""

                    product_data["properties"][label_text] = value_text
                except:
                    continue
        except:
            print("[WARN] Couldn't get properties")

        # Product details page
        if product_data['link'] != "N/A":
            context = page.context
            details_page = await context.new_page()
            product_details = await self.scrape_product_details(details_page, product_data['link'])
            match = re.search(r'/product/([^/]+)/', product_data['link'])
            if match:
                product_data['sku'] = match.group(1)  
           
            if product_details:
                product_data.update({
                    'basic_info': product_details.get('basic_info', {}),
                    'description': product_details.get('description', ''),
                    'images': product_details.get('images', []),
                    'categories':product_details.get('categories',""),
                    'price ranges':product_details.get('price_ranges',{}),
                    'Rating':product_details.get('Rating','N/A')
                })
            await details_page.close()

        product_data['type']="inquiry"
        product_data['variant']="N/A"

        self.products.append(product_data)
        return True



async def save_to_csv(products, filename):
    if not products:
        print("[WARN] No products to save")
        return
        
    # Include all possible fields in CSV
    fieldnames = ['name', 'link', 'price', 'moq', 'company', 'page_num', 'scraped_at']
    
    if any('properties' in p for p in products):
        fieldnames.append('properties')
    # Add detail fields if they exist in any product
    if any('basic_info' in p for p in products):
        fieldnames.append('basic_info')
    if any('description' in p for p in products):
        fieldnames.append('description')
    if any('images' in p for p in products):
        fieldnames.append('images')
    if any('categories' in p for p in products):
        fieldnames.append('categories')
    if any('price ranges' in p for p in products):
        fieldnames.append('price ranges')

    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        
        for product in products:
            row = {k: product.get(k, '') for k in fieldnames}
            # Convert dicts/lists to strings for CSV
            for k, v in row.items():
                if isinstance(v, (dict, list)):
                    row[k] = json.dumps(v, ensure_ascii=False)
            writer.writerow(row)
    
    print(f"[INFO] Saved {len(products)} products to {filename}")

    