import json
import requests
import asyncio
from urllib.parse import quote
import urllib.parse
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup

LINGVA_API_BASE = "https://lingva.ml/api/v1"
COOKIES_FILE = "cookies.json"

def translate_text(text, source="zh", target="en"):
    url = f"{LINGVA_API_BASE}/{source}/{target}/{quote(text)}"
    response = requests.get(url)
    if response.ok:
        return response.json().get("translation", "")
    else:
        print("Translation failed:", response.status_code)
        return ""

def load_cookies(context):
    try:
        with open(COOKIES_FILE, "r", encoding="utf-8") as f:
            cookies = json.load(f)
            context.add_cookies(cookies)
            print("[✓] Cookies loaded.")
    except FileNotFoundError:
        print("[!] No cookies found, continuing without login session.")

def save_cookies(context):
    cookies = context.cookies()
    with open(COOKIES_FILE, "w", encoding="utf-8") as f:
        json.dump(cookies, f, indent=2)
    print("[✓] Cookies saved.")

def scrape_and_translate():
    url = "https://s.1688.com/selloffer/offer_search.htm?keywords=毛巾"
    
    with Stealth().use_sync(sync_playwright()) as p:
        browser = p.chromium.launch(headless=False)
        context = browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/114.0.0.0 Safari/537.36",
            locale="zh-CN",
            viewport={"width": 1280, "height": 800}
        )

        load_cookies(context)

        page = context.new_page()

        print(f"[→] Navigating to {url}")
        page.goto(url, wait_until="load", timeout=60000)
        page.wait_for_timeout(5000)

        if "login" in page.url:
            print("⚠️ Redirected to login page. Please log in manually.")
            page.pause()  # Manually scan QR code to log in
            save_cookies(context)
            return

        # Extract and translate
        chinese_text = page.inner_text("body")[:1000]
        print("Original Chinese:\n", chinese_text)

        translated = translate_text(chinese_text)
        print("\nTranslated English:\n", translated)

        save_cookies(context)
        browser.close()

import asyncio
import json
from playwright.async_api import async_playwright
from urllib import parse
import re


COOKIES_FILE = "data/cookies_1688.json"
# PRODUCTS_LIST_URL = "https://s.1688.com/selloffer/offer_search.htm?keywords=%C6%BB%B9%FB%CA%D6%BB%FA"  # change keyword
PRODUCTS_LIST_URL = "https://s.1688.com/selloffer/offer_search.htm?keywords="

def to_1688_keyword(word):

    if re.fullmatch(r"[A-Za-z0-9\s]+", word):
        return word
    
    gb2312_bytes = word.encode('gb2312')
    encoded = urllib.parse.quote_from_bytes(gb2312_bytes)
    return encoded


def translate_mymemory(text, from_lang="en", to_lang="zh-CN"):
    url = f"https://api.mymemory.translated.net/get?q={text}&langpair={from_lang}|{to_lang}"
    r = requests.get(url)
    return r.json()["responseData"]["translatedText"]

async def scrape():

    text = input("Enter the Search Please : ")
    
    converted = to_1688_keyword(text)
    url = PRODUCTS_LIST_URL + converted

    print("Now url is : ",url)

    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        context = await browser.new_context()

        # Load cookies
        with open(COOKIES_FILE, "r") as f:
            cookies = json.load(f)
        await context.add_cookies(cookies)

        page = await context.new_page()

        # # 1️⃣ Go to product list page
        # await page.goto(url, wait_until="domcontentloaded", timeout=40000)

        # await asyncio.sleep(10)
        # html = await page.content()
        # soup = BeautifulSoup(html, 'html.parser')

        # print("Soup is",soup)
        # main_div = soup.find('div', class_='feeds-wrapper')
        # if not main_div:
        #     print("No .feeds-wrapper found!")
        #     await browser.close()
        #     return
        
        # cards = main_div.find_all("a",class_="search-offer-wrapper")
        # product_links = []

        # print("Total cards",len(cards))
        # for anchor in cards:
        #     link = anchor['href'] if anchor.has_attr('href') else 'N/A'
        #     product_links.append(link)


        # print(f"Found {len(product_links)} product detail links:")
        # for link in product_links:
        #     print(link[:10] + '...')

        detail_link = "https://detail.1688.com/offer/738354436678.html?spm=a26352.13672862.offerlist.526.455a1e627I3EPd&cosite=-&tracelog=p4p&_p_isad=1&clickid=2c944416cfa0494093abf85ee83a68a2&sessionid=177fa46ae468f9708d1c7b89bc3d0562"
        await page.goto(detail_link, wait_until="domcontentloaded", timeout=40000)

        await asyncio.sleep(10)

        html = await page.content()
        soup = BeautifulSoup(html, "html.parser")

        # Example selectors—**you’ll need to adjust** based on actual page structure
        title = soup.find("div", class_="title-content").get_text(strip=True) if soup.find("div", class_="title-content") else None

        price_tag = soup.find_all("div", class_="price-component")
        price_tag = price_tag[1] if len(price_tag) > 1 else price_tag[0] if len(price_tag) == 1 else None
        price = 'N/A'
        if price_tag:
            price = price_tag.get_text(strip=True)
            price = price.replace('price','')

        # Collect all product images
        image_parent_tag = soup.find("div", class_="img-list-wrapper")
        image_tags = image_parent_tag.find_all("div", class_="od-gallery-turn-item-wrapper")
        images = [img.find('img')["src"] for img in image_tags if img.find('img').has_attr("src")]

        images = [img for img in images if 'cbu01.alicdn.com' in img]

        # Collect Supplier Name
        supplier_name = soup.find('h1').get_text(strip=True)

        # Collect Ratigns
        ratings_div = soup.find_all("div", class_="trade-info")[-1] if soup.find('div', class_="trade-info") else ""
        print("ratings div",ratings_div)
        ratings = ratings_div.find('em', class_="hl").get_text(strip=True) if ratings_div and ratings_div.find('em', class_="hl") else None
        ratings_count = ratings_div.find('span', class_="brackets").get_text(strip=True) if ratings else None
        # Collect all the attributes
        data = {}
        table_tag = soup.find("table")
        for tr in table_tag.find_all("tr"):
            cells = tr.find_all(["th", "td"])
            # Cells come in label, value, label, value order
            for i in range(0, len(cells), 2):
                label = cells[i].get_text(strip=True)
                value = cells[i+1].get_text(strip=True) if i+1 < len(cells) else ""
                data[label] = value


        print("Title:", title)
        print("Price:", price)
        print("Supplier Name:", supplier_name)
        print("Data:", data)
        print("Ratings:", ratings , ratings_count)
        print("Images:", images[:2])

        print("After Translation :\n")
        print("Title", translate_mymemory(title, from_lang='zh-CN', to_lang="en"))
        print("Supplier Name:", translate_mymemory(supplier_name, from_lang='zh-CN', to_lang="en"))
        print("Data", translate_mymemory(str(data), from_lang='zh-CN', to_lang="en"))
        print("Price", translate_mymemory(price, from_lang='zh-CN', to_lang="en"))

        await browser.close()



if __name__ == "__main__":
    asyncio.run(scrape())

