from celery import shared_task
import uuid, os
import asyncio
import time
import logging
import csv
import json
import pandas as pd
logger = logging.getLogger(__name__)


from bot.china.made_in_china import MadeInChinaScraper
from bot.utils.scraper_utils import crawl_products_new
from bot.alibaba.products_page_scraper import h_to_x
from asgiref.sync import async_to_sync
from datetime import datetime
from django.conf import settings
from django.core.files import File
from channels.layers import get_channel_layer
from .models import ProductScrapeTask
from .serializers import ProductScrapeTaskSerializer
from django.utils import timezone
from django.conf import settings
from bot.utils.scrape_details import extract_product_info

@shared_task
def run_alibaba_scraper(final_url, search_name, scrape_task_id, user_id, num_pages, start_page, num_products, skip_in_page):

    channel_layer = get_channel_layer()
    group_name = f'user_{user_id}'
    try:
        logger.info(f"Started scraping: {search_name} (Task ID: {scrape_task_id})")

        html_filename = uuid.uuid4().hex[:16] + '.json'
        csv_filename = uuid.uuid4().hex[:16] + '.csv'
        
        time.sleep(3)
        logger.info("Starting async crawl...")
        output_dir = os.path.join(settings.MEDIA_ROOT, 'output_files')
        os.makedirs(output_dir, exist_ok=True)

        csv_file_path = os.path.join(output_dir, csv_filename)
        html_file_path = os.path.join(output_dir, html_filename)

        # Crawl and convert
        crawl_products_new(final_url, html_filename, num_pages, start_page)
        h_to_x(html_file=html_filename, csv_filename=csv_filename, num_products=num_products, scrape_task_id=scrape_task_id, user_id=user_id, skip_in_page=skip_in_page)
      
        # Check if CSV is empty
        with open(csv_file_path, 'r', encoding='utf-8') as f:
            reader = list(csv.reader(f))
            if len(reader) <= 1:  # Only header or completely empty
                logger.warning("CSV is empty. No products matched.")
                scrape_task = ProductScrapeTask.objects.filter(id=scrape_task_id).first()
                message = f"""
                "Your search '{search_name}' did not match any products. You may consider to:
                        - Check the spelling  
                        - Use fewer keywords  
                        - Use different keywords
                """
                if scrape_task:
                    scrape_task.status = "COMPLETED"
                    scrape_task.message = message
                    scrape_task.completed_at = timezone.now()
                    scrape_task.save()  
                os.remove(csv_file_path)
                os.remove(html_file_path)
                data = {
                    'event':'scrapper_complete',
                    'message':message,
                    'payload':ProductScrapeTaskSerializer(scrape_task).data
                }
                async_to_sync(channel_layer.group_send)(
                    group_name,
                    {
                        'type': 'scrapper_complete',
                        'message': message,
                        'data': data,
                    }
                )
                return {
                    "success": True,
                    "message": "No Products found with the given search parameters."
                }

        logger.info(f"Saving files: HTML={html_file_path}, CSV={csv_file_path}")
        with open(csv_file_path, 'rb') as f:
            scrape_task = ProductScrapeTask.objects.get(
                id=scrape_task_id
            )
            scrape_task.result_file.save(csv_filename, File(f))
            scrape_task.status = 'COMPLETED'
            scrape_task.message = 'Scrapping completed sucessfully'
            scrape_task.completed_at = timezone.now()
            scrape_task.save()
        os.remove(csv_file_path)
        os.remove(html_file_path)    

        logger.info(f"Files removed after saving. Task {scrape_task_id} complete.")

        message = "Task Completed Successfully !"
        data = {
            'event':'scrapper_complete',
            'message':message,
            'payload':ProductScrapeTaskSerializer(scrape_task).data
        }
        async_to_sync(channel_layer.group_send)(
            group_name,
            {
                'type': 'scrapper_complete',
                'message': message,
                'data': data,
            }
        )
        return {"success": True, "message":f"Products Scrapped Successfully", "scrape_task_id": str(scrape_task.id)}

    except Exception as e:
        logger.error(f"Error in scraping task {scrape_task_id}: {str(e)}", exc_info=True)

        scrape_task = ProductScrapeTask.objects.filter(
                id=scrape_task_id
        ).first()
        if scrape_task is not None:
            scrape_task.status = "FAILED"
            scrape_task.completed_at = timezone.now()
            scrape_task.save()
            message = f"Scraping Failed!..."
            data = {
                'event':'scrapper_failed',
                'message':message,
                'payload':ProductScrapeTaskSerializer(scrape_task).data
            }
            async_to_sync(channel_layer.group_send)(
                group_name,
                {
                    'type': 'scrapper_complete',
                    'message': message,
                    'data': data,
                }
            )
        return {"success": False, "message": str(e)}
    
from bot.utils.scrape_image import save_images_to_uploadthing

@shared_task
# def run_made_in_china_scraper(search_name, scrape_task_id, user_id, num_pages, start_page, num_products, skip_in_page):
def run_made_in_china_scraper(search_name, scrape_task_id, user_id, num_products, skip_in_page,filters):
    channel_layer = get_channel_layer()
    group_name = f'user_{user_id}'
    
    try:
        logger.info(f"Started scraping MadeInChina: {search_name} (Task ID: {scrape_task_id})")

        csv_filename = uuid.uuid4().hex[:16] + '.csv'
        output_dir = os.path.join(settings.MEDIA_ROOT, 'output_files')
        os.makedirs(output_dir, exist_ok=True)
        csv_file_path = os.path.join(output_dir, csv_filename)

        # Run the scraper
        scraper = MadeInChinaScraper()

        # logger.info("********************")
        # logger.info(f"max_pages = {num_pages}")
        # logger.info(f"start_page = {start_page}")
        # logger.info(f"num_products = {num_products}")
        products = asyncio.run(
            scraper.scrape_made_in_china(
                search_name,
                start_page=1,
                num_products=num_products,
                skip_in_page=skip_in_page,
                filters=filters
            )
        )
        
        # Process the results
        if not products:
            logger.warning("No products found on MadeInChina")
            scrape_task = ProductScrapeTask.objects.filter(id=scrape_task_id).first()
            message = f"""
            Your search '{search_name}' or filters did not match any products on MadeInChina. You may consider to:
                    - Check the spelling  
                    - Use fewer keywords
                    - Use different keywords
            """
            if scrape_task:
                scrape_task.status = "COMPLETED"
                scrape_task.message = message
                scrape_task.completed_at = timezone.now()
                scrape_task.save()  
            
            data = {
                'event':'scrapper_complete',
                'message':message,
                'payload':ProductScrapeTaskSerializer(scrape_task).data
            }
            async_to_sync(channel_layer.group_send)(
                group_name,
                {
                    'type': 'scrapper_complete',
                    'message': message,
                    'data': data,
                }
            )
            return {
                "success": True,
                "message": "No Products found with the given search parameters."
            }

        # Prepare CSV data
        csv_headers = [
            "ID", "SKU", "URL", "Title", "Image URL", "Description", "Price", "MOQ", "Company",
            "Rating", "Type", "Category", "Attributes", "Features", "Sample Info",
            "Range Price", "Variants", "Brand Name", "Unit", "MXQ"
        ]

        csv_data = []
        images_tasks = []

        for product in products[:num_products]:
            try:
                product_id = uuid.uuid4().hex[:16]
                images = product.get('images', [])
                
                images_tasks.append({
                    'image_urls': images,
                    'product_id': product_id
                })

                # Basic safe extraction
                sku = product.get('sku', 'N/A')
                url = product.get('link', 'N/A')
                title = product.get('name', 'N/A')
                description = product.get('description', 'N/A')
                price = product.get('price', 'N/A')
                moq = product.get('moq', 'N/A')
                company = product.get('company', 'N/A')
                rating = product.get('Rating', 'N/A')
                prod_type = product.get('type', 'N/A')
                category = product.get('categories', 'N/A')
                variant = product.get('variant', {})
                brand = product.get('company', 'N/A')
                unit = product.get('unit', 'piece')
                mxq = product.get('mxq', '')

                # Keep dict/list fields intact (use json.dumps so they save cleanly into CSV)
                attributes = product.get('properties', {})
                features = product.get('basic_info', {})
                sample_info = product.get('Sample Info', {})
                price_ranges = product.get('price ranges', [])
                variants = product.get('variants', {})

                # Append row
                csv_data.append([
                    product_id,
                    sku,
                    url,
                    title,
                    "N/A",
                    description,
                    price,
                    moq,
                    company,
                    rating,
                    prod_type,
                    category,
                    json.dumps(attributes, ensure_ascii=False),   # dict
                    json.dumps(features, ensure_ascii=False),     # dict
                    json.dumps(sample_info, ensure_ascii=False),  # dict
                    json.dumps(price_ranges, ensure_ascii=False), # list
                    json.dumps(variants, ensure_ascii=False),     # dict
                    brand,
                    unit,
                    mxq
                ])

            except Exception as e:
                logger.error(f"Error processing product: {str(e)}")
                continue

        # Write CSV file
        with open(csv_file_path, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(csv_headers)
            writer.writerows(csv_data)

        # Save the CSV file to the scrape task
        with open(csv_file_path, 'rb') as f:
            scrape_task = ProductScrapeTask.objects.get(id=scrape_task_id)
            scrape_task.result_file.save(csv_filename, File(f))
            scrape_task.status = 'COMPLETED'
            scrape_task.message = 'Scrapping completed successfully'
            scrape_task.completed_at = timezone.now()
            scrape_task.save()
        
        # Save images
        if settings.SCRAPE_IMAGE.lower() in ['yes','true','1']:
            save_images_to_uploadthing.delay(scrape_task_id, images_tasks, user_id)
        
        # Clean up
        os.remove(csv_file_path)
        
        # Send completion message
        message = "Task Completed Successfully!"
        data = {
            'event':'scrapper_complete',
            'message':message,
            'payload':ProductScrapeTaskSerializer(scrape_task).data
        }
        async_to_sync(channel_layer.group_send)(
            group_name,
            {
                'type': 'scrapper_complete',
                'message': message,
                'data': data,
            }
        )
        
        return {"success": True, "message": "Products Scrapped Successfully", "scrape_task_id": str(scrape_task.id)}

    except Exception as e:
        logger.error(f"Error in MadeInChina scraping task {scrape_task_id}: {str(e)}", exc_info=True)

        scrape_task = ProductScrapeTask.objects.filter(id=scrape_task_id).first()
        if scrape_task is not None:
            scrape_task.status = "FAILED"
            scrape_task.completed_at = timezone.now()
            scrape_task.save()
            message = "Scraping Failed!..."
            data = {
                'event':'scrapper_failed',
                'message':message,
                'payload':ProductScrapeTaskSerializer(scrape_task).data
            }
            async_to_sync(channel_layer.group_send)(
                group_name,
                {
                    'type': 'scrapper_complete',
                    'message': message,
                    'data': data,
                }
            )

        return {"success": False, "message": str(e)}


@shared_task
# def run_made_in_china_scraper(search_name, scrape_task_id, user_id, num_pages, start_page, num_products, skip_in_page):
def run_1688_scrapper(search_name, scrape_task_id, user_id, num_products, skip_in_page,filters):
    channel_layer = get_channel_layer()
    group_name = f'user_{user_id}'
    
    try:
        logger.info(f"Started scraping MadeInChina: {search_name} (Task ID: {scrape_task_id})")

        csv_filename = uuid.uuid4().hex[:16] + '.csv'
        output_dir = os.path.join(settings.MEDIA_ROOT, 'output_files')
        os.makedirs(output_dir, exist_ok=True)
        csv_file_path = os.path.join(output_dir, csv_filename)

        # Run the scraper
        scraper = MadeInChinaScraper()

        # logger.info("********************")
        # logger.info(f"max_pages = {num_pages}")
        # logger.info(f"start_page = {start_page}")
        # logger.info(f"num_products = {num_products}")
        products = asyncio.run(
            scraper.scrape_made_in_china(
                search_name,
                start_page=1,
                num_products=num_products,
                skip_in_page=skip_in_page,
                filters=filters
            )
        )
        
        # Process the results
        if not products:
            logger.warning("No products found on MadeInChina")
            scrape_task = ProductScrapeTask.objects.filter(id=scrape_task_id).first()
            message = f"""
            Your search '{search_name}' or filters did not match any products on MadeInChina. You may consider to:
                    - Check the spelling  
                    - Use fewer keywords
                    - Use different keywords
            """
            if scrape_task:
                scrape_task.status = "COMPLETED"
                scrape_task.message = message
                scrape_task.completed_at = timezone.now()
                scrape_task.save()  
            
            data = {
                'event':'scrapper_complete',
                'message':message,
                'payload':ProductScrapeTaskSerializer(scrape_task).data
            }
            async_to_sync(channel_layer.group_send)(
                group_name,
                {
                    'type': 'scrapper_complete',
                    'message': message,
                    'data': data,
                }
            )
            return {
                "success": True,
                "message": "No Products found with the given search parameters."
            }

        # Prepare CSV data
        csv_headers = [
            "ID", "Title", "URL", "Description", "Image URL","UploadThings URL", "Price", "MOQ", "Company",
            "Supplier Info", "Rating"
        ]
        
        csv_data = []
        images_tasks = []
        # products = products[skip_in_page:]
        for product in products[:num_products]:
            try:
                product_id = uuid.uuid4().hex[:16]
                images = product.get('images', [])
                
                # Prepare images for saving
                images_tasks.append({
                    'image_urls': images,
                    'product_id': product_id
                })
                
                # Format basic info
                basic_info = product.get('basic_info', {})
                description_parts = []
                if basic_info:
                    description_parts.extend([f"{k}: {v}" for k, v in basic_info.items()])
                if product.get('description'):
                    description_parts.append(product['description'])
                
                description = "\n".join(description_parts) if description_parts else "N/A"
                
                # Add to CSV data
                csv_data.append([
                    product_id,
                    product.get('name', 'N/A'),
                    product.get('link', 'N/A'),
                    description,
                    'N/A',  # Will be replaced with actual image paths after upload
                    product.get('price', 'N/A'),
                    product.get('moq', 'N/A'),
                    product.get('company', 'N/A'),
                    'N/A',  # Supplier info not available in the current scraper
                    'N/A'   # Rating not available in the current scraper
                ])
                
            except Exception as e:
                logger.error(f"Error processing product: {str(e)}")
                continue

        # Write to CSV
        with open(csv_file_path, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(csv_headers)
            writer.writerows(csv_data)

        # Save the CSV file to the scrape task
        with open(csv_file_path, 'rb') as f:
            scrape_task = ProductScrapeTask.objects.get(id=scrape_task_id)
            scrape_task.result_file.save(csv_filename, File(f))
            scrape_task.status = 'COMPLETED'
            scrape_task.message = 'Scrapping completed successfully'
            scrape_task.completed_at = timezone.now()
            scrape_task.save()
        
        # Save images
        save_images_to_uploadthing.delay(scrape_task_id, images_tasks, user_id) 
        
        # Clean up
        os.remove(csv_file_path)
        
        # Send completion message
        message = "Task Completed Successfully!"
        data = {
            'event':'scrapper_complete',
            'message':message,
            'payload':ProductScrapeTaskSerializer(scrape_task).data
        }
        async_to_sync(channel_layer.group_send)(
            group_name,
            {
                'type': 'scrapper_complete',
                'message': message,
                'data': data,
            }
        )
        
        return {"success": True, "message": "Products Scrapped Successfully", "scrape_task_id": str(scrape_task.id)}

    except Exception as e:
        logger.error(f"Error in MadeInChina scraping task {scrape_task_id}: {str(e)}", exc_info=True)

        scrape_task = ProductScrapeTask.objects.filter(id=scrape_task_id).first()
        if scrape_task is not None:
            scrape_task.status = "FAILED"
            scrape_task.completed_at = timezone.now()
            scrape_task.save()
            message = "Scraping Failed!..."
            data = {
                'event':'scrapper_failed',
                'message':message,
                'payload':ProductScrapeTaskSerializer(scrape_task).data
            }
            async_to_sync(channel_layer.group_send)(
                group_name,
                {
                    'type': 'scrapper_complete',
                    'message': message,
                    'data': data,
                }
            )

        return {"success": False, "message": str(e)}

def scrape_images(task_id, images_tasks):
    save_images_to_uploadthing.delay(task_id, images_tasks)
