import base64
import random
import os
import uuid
import json
import re
import requests
import csv
import os
import mimetypes
from urllib.parse import urlparse
from playwright.async_api import async_playwright
from playwright.sync_api import sync_playwright
from playwright_stealth.stealth import stealth_async, stealth_sync
from bs4 import BeautifulSoup
from bot.utils.scraper_utils import PROXY_LIST, parse_proxy_url, human_delay
from bot.serializers import ProductScrapeTaskSerializer
from django.conf import settings
from upyloadthing import UTApi, UTApiOptions
from bot.models import ProductScrapeTask, ProductImage
from celery import shared_task

from channels.layers import get_channel_layer
from asgiref.sync import async_to_sync
 
EXECUTABLE_PATH=settings.EXECUTABLE_PATH

def clean_json_text(raw_text):
    cleaned = re.sub(r'\s+//\s+', ' ', raw_text)
    cleaned = re.sub(r'\s+//', ' ', cleaned)
    cleaned = re.sub(r'//\s+', ' ', cleaned)
    cleaned = re.sub(r'^\s*//.*$', '', cleaned, flags=re.MULTILINE)
    return cleaned.strip()
 
def fetch_html_with_stealth(url: str, proxy: str = None) -> str:
    proxy_config = parse_proxy_url(proxy) if proxy else None
 
    with sync_playwright() as p:
        browser = p.chromium.launch(
            # headless=False,  # Change to True for production
            proxy=proxy_config,
            channel="chrome",
            args=[
                "--no-sandbox",
                "--disable-blink-features=AutomationControlled",
                "--disable-extensions",
            ],
            slow_mo=50,
            executable_path=EXECUTABLE_PATH
        )
 
        context = browser.new_context(
            user_agent=(
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                "AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36"
            ),
            viewport={"width": 1280, "height": 720},
            locale="en-US"
        )
 
        page = context.new_page()
        stealth_sync(page)
        page.goto(url, wait_until="domcontentloaded", timeout=40000)
        human_delay()

        html = page.content()
        browser.close()
        return html
 
def parse_and_print_product_info(html_content: str):
    soup = BeautifulSoup(html_content, "html.parser")
    scripts = soup.find_all("script", type="application/ld+json")
    data = {}
    for script in scripts:
        try:
            raw_text = script.string
            if not raw_text:
                continue
            cleaned_json_text = clean_json_text(raw_text)
            parsed = json.loads(cleaned_json_text)

            if isinstance(parsed, list):
                for item in parsed:
                    if item.get('@type') == 'Product':
                        data["images"] = item.get("image")
 
                        offer = item.get('offers', {})
                        reviews = item.get('review', [])
                        # for review in reviews:
                        #     rating = review.get('reviewRating', {})
                        #     print("Review by:", review.get('author', {}).get('name'))
                        #     print("Rating:", rating.get('ratingValue')) 
        except Exception as e:
            print("❌ Error parsing JSON:", e)
    return data
 
def scrap_detail_page(url):
    proxy = random.choice(PROXY_LIST)
 
    html = fetch_html_with_stealth(url, proxy)
 
    file_path = os.path.join(settings.BASE_DIR, 'media', 'output_files', f'{uuid.uuid4().hex[:10]}.html')
    with open(file_path, "w", encoding="utf-8") as f:
        f.write(html)

    try:
        data = parse_and_print_product_info(html)
        return data
    finally:
        # Remove the file after processing
        if os.path.exists(file_path):
            os.remove(file_path)

def get_utapi_object():
    token_data = {
        "apiKey": settings.UPLOADTHING_SECRET,
        "appId": settings.UPLOADTHING_APP_ID,
        "regions": [settings.UPLOADTHING_REGION]
    }
    encoded_token = base64.b64encode(json.dumps(token_data).encode("utf-8")).decode("utf-8")
    utapi = UTApi(UTApiOptions(token=encoded_token))
    return utapi

    
def upload_images_to_uploadthing(file_objs):
    utapi = get_utapi_object()
    try:
        response = utapi.upload_files(files=file_objs, content_disposition="inline")
        return response
    except Exception as e:
        print("Intenal Server Error while uploadig file :",str(e))

def emit_to_socket(type, message, data, group_name):
    channel_layer = get_channel_layer()
    async_to_sync(channel_layer.group_send)(
        group_name,
        {
            'type': type,
            'message': message,
            'data': data,
        }
    )

from io import BytesIO
import time

@shared_task
def save_images_to_uploadthing(task_id, images_dict, user_id=None):
    scrape_task = None
    try:
        scrape_task = ProductScrapeTask.objects.get(id=task_id)
    except:
        print(f"Error in save_images_to_uploadthing: Task not found")
        return
    if not scrape_task.result_file:
        print(f"No file found")
        time.sleep(50)
        try:
            scrape_task = ProductScrapeTask.objects.get(id=task_id)
        except:
            print(f"Error in save_images_to_uploadthing: Task not found")
            return
    csv_path = scrape_task.result_file.path

    message = "Images Processing Started!"
    data = {
        'event':'images_started',
        'message':message,
        'payload':ProductScrapeTaskSerializer(scrape_task).data
    }
    scrape_task.images_status = 'IN_PROGRESS'
    scrape_task.save()
    group_name = f'user_{user_id}'
    emit_to_socket(type='scrapper_complete', message=message, data=data, group_name=group_name)

    if not os.path.exists(csv_path):
        raise FileNotFoundError(f"CSV path does not exist: {csv_path}")

    try:
        # Load existing CSV data
        with open(csv_path, "r", encoding="utf-8", newline="") as f:
            reader = list(csv.reader(f))
            header = reader[0]
            rows = reader[1:]

        # Find index of product_id and image_urls column
        product_id_idx = header.index("ID")
        try:
            image_urls_idx = header.index("Image URL")
        except:
            image_urls_idx = header.index("Image URLs")
        processed = 0
        processed_obj = []
        # Process each product in images_dict
        for image_obj in images_dict:
            pid = image_obj.get("product_id")
            image_urls = image_obj.get("image_urls", [])
            if not image_urls or not pid:
                continue

            # Download all images
            file_objs = []
            for img_url in image_urls:
                try:
                    if not img_url:
                        continue
                    response = requests.get(img_url, timeout=30)
                    response.raise_for_status()
                    file_obj = BytesIO(response.content)

                    content_type = response.headers.get("Content-Type", "image/jpeg")
                    ext = mimetypes.guess_extension(content_type.split(";")[0]) or ".jpg"
                    
                    file_obj.name = f"{uuid.uuid4().hex}{ext}"
                    file_objs.append(file_obj)
                except Exception as e:
                    print(f"Error downloading {img_url}: {e}")

            if not file_objs:
                continue

            # Upload all images in one call
            upload_responses = upload_images_to_uploadthing(file_objs)
            uploaded_urls = [res.url for res in upload_responses if hasattr(res, "url")]

            # Update CSV row for this product_id
            for row in rows:
                if row[product_id_idx] == pid:
                    row[image_urls_idx] = ",".join(uploaded_urls)
                    print(f"Updated {pid} with {len(uploaded_urls)} images")
                    break

            processed += 1
            processed_obj.append({'id':pid, 'image_url':uploaded_urls[0]})

            if processed % 2 == 0:
                message = 'Some Images are Processed Succesfully!'
                data = {
                    'event':'images_processed',
                    'message':message,
                    'payload':processed_obj
                }
                emit_to_socket(type='scrapper_complete', message=message, data=data, group_name=group_name)
                processed_obj = []

        message = 'All Images are Processed Succesfully!'
        data = {
            'event':'images_processed',
            'message':message,
            'payload':processed_obj
        }
        emit_to_socket(type='scrapper_complete', message=message, data=data, group_name=group_name)
        # Save CSV back
        with open(csv_path, "w", encoding="utf-8", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(header)
            writer.writerows(rows)

        print("CSV updated successfully.")
        scrape_task.images_status = 'COMPLETED'
        scrape_task.save()
        message = 'Images Processed Succesfully!'
        data = {
            'event':'images_completed',
            'message':message,
            'payload':{}
        }
        emit_to_socket(type='scrapper_complete', message=message, data=data, group_name=group_name)
        return {'success':True, 'message':f'Images uploaded successfully for task : {scrape_task.id}'}

    except Exception as e:
        print(f"Error in save_images_to_uploadthing: {e}")
        scrape_task.images_status = 'FAILED'
        return {'success':False, 'message':f"Error in save_images_to_uploadthing: {e}"}