# import json
# import requests
# import asyncio
# from urllib.parse import quote
# import urllib.parse
# from playwright.sync_api import sync_playwright
# from bs4 import BeautifulSoup

# LINGVA_API_BASE = "https://lingva.ml/api/v1"
# COOKIES_FILE = "cookies.json"

# from deep_translator import GoogleTranslator


# def translate_text(text, source="zh-CN", target="en"):
#     try:
#         return GoogleTranslator(source=source, target=target).translate(text)
#     except Exception as e:
#         print("Translation failed:", e)
#         return text


# def load_cookies(context):
#     try:
#         with open(COOKIES_FILE, "r", encoding="utf-8") as f:
#             cookies = json.load(f)
#             context.add_cookies(cookies)
#             print("[✓] Cookies loaded.")
#     except FileNotFoundError:
#         print("[!] No cookies found, continuing without login session.")

# def save_cookies(context):
#     cookies = context.cookies()
#     with open(COOKIES_FILE, "w", encoding="utf-8") as f:
#         json.dump(cookies, f, indent=2)
#     print("[✓] Cookies saved.")

# def scrape_and_translate():
#     url = "https://s.1688.com/selloffer/offer_search.htm?keywords=毛巾"
    
#     with Stealth().use_sync(sync_playwright()) as p:
#         browser = p.chromium.launch(headless=False)
#         context = browser.new_context(
#             user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/114.0.0.0 Safari/537.36",
#             locale="zh-CN",
#             viewport={"width": 1280, "height": 800}
#         )

#         load_cookies(context)

#         page = context.new_page()

#         print(f"[→] Navigating to {url}")
#         page.goto(url, wait_until="load", timeout=60000)
#         page.wait_for_timeout(5000)

#         if "login" in page.url:
#             print("⚠️ Redirected to login page. Please log in manually.")
#             page.pause()  # Manually scan QR code to log in
#             save_cookies(context)
#             return

#         # Extract and translate
#         chinese_text = page.inner_text("body")[:1000]
#         print("Original Chinese:\n", chinese_text)

#         translated = translate_text(chinese_text)
#         print("\nTranslated English:\n", translated)

#         save_cookies(context)
#         browser.close()

# import asyncio
# import json
# from playwright.async_api import async_playwright
# from urllib import parse
# import re


# COOKIES_FILE = "data/cookies_1688.json"
# PRODUCTS_LIST_URL = "https://s.1688.com/selloffer/offer_search.htm?keywords=%C6%BB%B9%FB%CA%D6%BB%FA"  # change keyword
# # PRODUCTS_LIST_URL = "https://s.1688.com/selloffer/offer_search.htm?keywords="

# def to_1688_keyword(word):

#     if re.fullmatch(r"[A-Za-z0-9\s]+", word):
#         return word
    
#     gb2312_bytes = word.encode('gb2312')
#     encoded = urllib.parse.quote_from_bytes(gb2312_bytes)
#     return encoded


# def translate_mymemory(text, from_lang="en", to_lang="zh-CN"):
#     url = f"https://api.mymemory.translated.net/get?q={text}&langpair={from_lang}|{to_lang}"
#     r = requests.get(url)
#     return r.json()["responseData"]["translatedText"]

# async def scrape():

#     text = input("Enter the Search Please : ")
#     ch_text=translate_text(text, source='en', target="zh-CN")
#     converted = to_1688_keyword(ch_text)
#     url = PRODUCTS_LIST_URL + converted

#     print("Now url is : ",url)

#     async with async_playwright() as p:
#         browser = await p.chromium.launch(headless=False)
#         context = await browser.new_context()

#         # Load cookies
#         with open(COOKIES_FILE, "r") as f:
#             cookies = json.load(f)
#         await context.add_cookies(cookies)

#         page = await context.new_page()

#         # # 1️⃣ Go to product list page
#         # await page.goto(url, wait_until="domcontentloaded", timeout=40000)

#         # await asyncio.sleep(10)
#         # html = await page.content()
#         # soup = BeautifulSoup(html, 'html.parser')

#         # print("Soup is",soup)
#         # main_div = soup.find('div', class_='feeds-wrapper')
#         # if not main_div:
#         #     print("No .feeds-wrapper found!")
#         #     await browser.close()
#         #     return
        
#         # cards = main_div.find_all("a",class_="search-offer-wrapper")
#         # product_links = []

#         # print("Total cards",len(cards))
#         # for anchor in cards:
#         #     link = anchor['href'] if anchor.has_attr('href') else 'N/A'
#         #     product_links.append(link)


#         # print(f"Found {len(product_links)} product detail links:")
#         # for link in product_links:
#         #     print(link[:10] + '...')

#         detail_link = "https://detail.1688.com/offer/738354436678.html?spm=a26352.13672862.offerlist.526.455a1e627I3EPd&cosite=-&tracelog=p4p&_p_isad=1&clickid=2c944416cfa0494093abf85ee83a68a2&sessionid=177fa46ae468f9708d1c7b89bc3d0562"
#         await page.goto(detail_link, wait_until="domcontentloaded", timeout=40000)

#         await asyncio.sleep(10)

#         html = await page.content()
#         soup = BeautifulSoup(html, "html.parser")

#         # Example selectors—**you’ll need to adjust** based on actual page structure
#         title = soup.find("div", class_="title-content").get_text(strip=True) if soup.find("div", class_="title-content") else None

#         price_tag = soup.find_all("div", class_="price-component")
#         price_tag = price_tag[1] if len(price_tag) > 1 else price_tag[0] if len(price_tag) == 1 else None
#         price = 'N/A'
#         if price_tag:
#             price = price_tag.get_text(strip=True)
#             price = price.replace('price','')

#         # Collect all product images
#         image_parent_tag = soup.find("div", class_="img-list-wrapper")
#         image_tags = image_parent_tag.find_all("div", class_="od-gallery-turn-item-wrapper")
#         images = [img.find('img')["src"] for img in image_tags if img.find('img').has_attr("src")]

#         images = [img for img in images if 'cbu01.alicdn.com' in img]

#         # Collect Supplier Name
#         supplier_name = soup.find('h1').get_text(strip=True)

#         # Collect Ratigns
#         ratings_div = soup.find_all("div", class_="trade-info")[-1] if soup.find('div', class_="trade-info") else ""
#         print("ratings div",ratings_div)
#         ratings = ratings_div.find('em', class_="hl").get_text(strip=True) if ratings_div and ratings_div.find('em', class_="hl") else None
#         ratings_count = ratings_div.find('span', class_="brackets").get_text(strip=True) if ratings else None
#         # Collect all the attributes
#         data = {}
#         table_tag = soup.find("table")
#         for tr in table_tag.find_all("tr"):
#             cells = tr.find_all(["th", "td"])
#             # Cells come in label, value, label, value order
#             for i in range(0, len(cells), 2):
#                 label = cells[i].get_text(strip=True)
#                 value = cells[i+1].get_text(strip=True) if i+1 < len(cells) else ""
#                 data[label] = value


#         print("Title:", title)
#         print("Price:", price)
#         print("Supplier Name:", supplier_name)
#         print("Data:", data)
#         print("Ratings:", ratings , ratings_count)
#         print("Images:", images[:2])

#         print("After Translation :\n")
#         print("Title", translate_text(title, source='zh-CN', target="en"))
#         print("Supplier Name:", translate_text(supplier_name, source='zh-CN', target="en"))
#         print("Data", translate_text(str(data), source='zh-CN', target="en"))
#         print("Price", translate_text(price, source='zh-CN', target="en"))

#         await browser.close()



# if __name__ == "__main__":
#     asyncio.run(scrape())

#######try 2

# import asyncio
# import json
# import os
# from playwright.async_api import async_playwright

# COOKIES_FILE = "data/cookies_1688.json"

# TARGET_URL = "https://s.1688.com/selloffer/offer_search.htm?keywords=%C6%BB%B9%FB13%CA%D6%BB%FA"


# async def scrape_full_html():
#     async with async_playwright() as p:
#         browser = await p.chromium.launch(headless=False)
#         context = await browser.new_context(
#             user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
#                        "Chrome/114.0.0.0 Safari/537.36",
#             viewport={"width": 1280, "height": 800},
#             locale="zh-CN"
#         )

#         # Load cookies if available
#         try:
#             with open(COOKIES_FILE, "r", encoding="utf-8") as f:
#                 cookies = json.load(f)
#             await context.add_cookies(cookies)
#             print("[✓] Cookies loaded.")
#         except FileNotFoundError:
#             print("[!] Cookies file not found. Running without cookies.")

#         page = await context.new_page()
#         print(f"[→] Navigating to: {TARGET_URL}")

#         await page.goto(TARGET_URL, wait_until="domcontentloaded", timeout=60000)

#         # wait extra time for dynamic content (like product cards) to load
#         await asyncio.sleep(8)

#         html = await page.content()

#         # Save full HTML to file
#         output_file = "search_result1.html"
#         with open(output_file, "w", encoding="utf-8") as f:
#             f.write(html)

#         full_path = os.path.abspath(output_file)
#         print(f"[✓] Full HTML saved to: {full_path}")

#         await browser.close()


# if __name__ == "__main__":
#     asyncio.run(scrape_full_html())

### try 3 -------> product listing details are coming not url

# import os
# import csv
# import json
# from playwright.sync_api import sync_playwright
# from playwright_stealth import stealth_sync
# from bs4 import BeautifulSoup

# PROFILE_PATH = r"C:\Users\SMA63\AppData\Local\Google\Chrome\PlaywrightProfile"
# CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe"

# with sync_playwright() as p:
#     browser = p.chromium.launch_persistent_context(
#         PROFILE_PATH,
#         headless=False,
#         args=["--start-maximized"],
#         executable_path=CHROME_PATH,
#     )

#     page = browser.new_page()
#     stealth_sync(page)

#     url = "https://s.1688.com/selloffer/offer_search.htm?keywords=%C6%BB%B9%FB13%CA%D6%BB%FA"
#     page.goto(url)

#     # wait until products appear
#     page.wait_for_selector(".search-offer-item", timeout=60000)

#     html = page.content()
#     browser.close()

# # ------------------ PARSE WITH BEAUTIFULSOUP -------------------
# soup = BeautifulSoup(html, "html.parser")
# products = []

# for item in soup.select(".search-offer-item"):
#     product = {}

#     # Image
#     img = item.select_one(".ad-offer-img-wrapper img.main-img")
#     product["image"] = img["src"] if img else None

#     # Title
#     title = item.select_one(".offer-title-row .title-text")
#     product["title"] = title.get_text(strip=True) if title else None

#     # Price (combine integer + decimal if available)
#     price_main = item.select_one(".offer-price-row .text-main")
#     if price_main:
#         price_text = price_main.get_text(strip=True)
#         # append decimal part if present
#         decimal = price_main.find_next("div")
#         if decimal:
#             price_text += decimal.get_text(strip=True)
#         product["price"] = "¥" + price_text
#     else:
#         product["price"] = None

#     # Tags
#     tags = [tag.get_text(strip=True) for tag in item.select(".offer-tag-row .desc-text")]
#     product["tags"] = tags

#     # Shop
#     shop = item.select_one(".offer-shop-row .desc-text")
#     product["shop_name"] = shop.get_text(strip=True) if shop else None

#     shop_link = item.select_one(".offer-shop-row a")
#     product["shop_link"] = shop_link["href"] if shop_link else None

#     products.append(product)

# # ------------------ SAVE RESULTS -------------------
# # Save JSON
# with open("products.json", "w", encoding="utf-8") as f:
#     json.dump(products, f, indent=4, ensure_ascii=False)

# # Save CSV
# with open("products.csv", "w", encoding="utf-8", newline="") as f:
#     writer = csv.DictWriter(f, fieldnames=products[0].keys())
#     writer.writeheader()
#     writer.writerows(products)

# print(f"✅ Scraped {len(products)} products")
# print("📂 products.json and products.csv saved!")




####### try 4 trying to extract url from product listing page

# import os
# import csv
# import json
# from playwright.sync_api import sync_playwright
# from playwright_stealth import stealth_sync
# from bs4 import BeautifulSoup

# PROXIES="http://gxhfegdo:p6iwy5no05lc@23.95.150.145:6114,http://gxhfegdo:p6iwy5no05lc@198.23.239.134:6540,http://gxhfegdo:p6iwy5no05lc@45.38.107.97:6014,http://gxhfegdo:p6iwy5no05lc@107.172.163.27:6543,http://gxhfegdo:p6iwy5no05lc@64.137.96.74:6641,http://gxhfegdo:p6iwy5no05lc@45.43.186.39:6257,http://gxhfegdo:p6iwy5no05lc@154.203.43.247:5536,http://gxhfegdo:p6iwy5no05lc@216.10.27.159:6837,http://gxhfegdo:p6iwy5no05lc@136.0.207.84:6661,http://gxhfegdo:p6iwy5no05lc@142.147.128.93:6593"

# PROFILE_PATH = r"C:\Users\SMA63\AppData\Local\Google\Chrome\PlaywrightProfile"
# CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe"

# with sync_playwright() as p:
#     browser = p.chromium.launch_persistent_context(
#         PROFILE_PATH,
#         headless=False,
#         args=["--start-maximized"],
#         executable_path=CHROME_PATH,
#     )

#     page = browser.new_page()
#     stealth_sync(page)

#     url = "https://s.1688.com/selloffer/offer_search.htm?keywords=%C6%BB%B9%FB13%CA%D6%BB%FA"
#     page.goto(url)

#     # wait until products appear
#     page.wait_for_selector(".search-offer-item", timeout=60000)

#     html = page.content()
#     browser.close()

# # ------------------ PARSE WITH BEAUTIFULSOUP -------------------
# soup = BeautifulSoup(html, "html.parser")
# products = []

# for item in soup.select(".search-offer-item"):
#     product = {}

#     # ---------- PRODUCT URL (your 2 conditions) ----------
#     if item.name == "a":  # case 1: itself is <a>
#         product["product_url"] = item.get("href")
#     else:  # case 2: parent <a>
#         parent_a = item.find_parent("a")
#         product["product_url"] = parent_a.get("href") if parent_a else None

#     # Image
#     img = item.select_one(".ad-offer-img-wrapper img.main-img")
#     product["image"] = img["src"] if img else None

#     # Title
#     title = item.select_one(".offer-title-row .title-text")
#     product["title"] = title.get_text(strip=True) if title else None

#     # Price (combine integer + decimal if available)
#     price_main = item.select_one(".offer-price-row .text-main")
#     if price_main:
#         price_text = price_main.get_text(strip=True)
#         # append decimal part if present
#         decimal = price_main.find_next("div")
#         if decimal:
#             price_text += decimal.get_text(strip=True)
#         product["price"] = "¥" + price_text
#     else:
#         product["price"] = None

#     # Tags
#     tags = [tag.get_text(strip=True) for tag in item.select(".offer-tag-row .desc-text")]
#     product["tags"] = tags

#     # Shop
#     shop = item.select_one(".offer-shop-row .desc-text")
#     product["shop_name"] = shop.get_text(strip=True) if shop else None

#     shop_link = item.select_one(".offer-shop-row a")
#     product["shop_link"] = shop_link["href"] if shop_link else None

#     products.append(product)

# # ------------------ SAVE RESULTS -------------------
# if products:  # only save if data found
#     # Save JSON
#     with open("products.json", "w", encoding="utf-8") as f:
#         json.dump(products, f, indent=4, ensure_ascii=False)

#     # Save CSV
#     with open("products.csv", "w", encoding="utf-8", newline="") as f:
#         writer = csv.DictWriter(f, fieldnames=products[0].keys())
#         writer.writeheader()
#         writer.writerows(products)

#     print(f"✅ Scraped {len(products)} products")
#     print(product)
#     print("📂 products.json and products.csv saved!")
# else:
#     print("⚠️ No products found!")



########## try 5

import os
import csv
import json
import random
from urllib.parse import urlparse
from playwright.sync_api import sync_playwright
from playwright_stealth import stealth_sync
from bs4 import BeautifulSoup

# ------------------ PROXIES -------------------
PROXIES = "http://gxhfegdo:p6iwy5no05lc@23.95.150.145:6114,http://gxhfegdo:p6iwy5no05lc@198.23.239.134:6540,http://gxhfegdo:p6iwy5no05lc@45.38.107.97:6014,http://gxhfegdo:p6iwy5no05lc@107.172.163.27:6543,http://gxhfegdo:p6iwy5no05lc@64.137.96.74:6641,http://gxhfegdo:p6iwy5no05lc@45.43.186.39:6257,http://gxhfegdo:p6iwy5no05lc@154.203.43.247:5536,http://gxhfegdo:p6iwy5no05lc@216.10.27.159:6837,http://gxhfegdo:p6iwy5no05lc@136.0.207.84:6661,http://gxhfegdo:p6iwy5no05lc@142.147.128.93:6593"
PROXY_LIST = [p.strip() for p in PROXIES.split(",") if p.strip()]

# pick one proxy
proxy_url = random.choice(PROXY_LIST)
parsed = urlparse(proxy_url)

proxy = {
    "server": f"{parsed.scheme}://{parsed.hostname}:{parsed.port}",
    "username": parsed.username,
    "password": parsed.password,
}

print(f"🌐 Using proxy: {proxy}")

# ------------------ PROFILE PATH -------------------
PROFILE_PATH = r"C:\Users\SMA63\AppData\Local\Google\Chrome\PlaywrightProfile"
CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe"

with sync_playwright() as p:
    browser = p.chromium.launch_persistent_context(
        PROFILE_PATH,
        headless=False,
        executable_path=CHROME_PATH,
        args=["--start-maximized"],
        proxy=proxy,  # ✅ properly structured proxy
    )

    page = browser.new_page()
    stealth_sync(page)

    url = "https://s.1688.com/selloffer/offer_search.htm?keywords=%C6%BB%B9%FB13%CA%D6%BB%FA"
    page.goto(url, timeout=60000)

    # wait until products appear
    page.wait_for_selector(".search-offer-item", timeout=600000)

    html = page.content()
    browser.close()

# ------------------ PARSE WITH BEAUTIFULSOUP -------------------
soup = BeautifulSoup(html, "html.parser")
products = []

for item in soup.select(".search-offer-item"):
    product = {}

    # ---------- PRODUCT URL (2 conditions) ----------
    if item.name == "a":
        product["product_url"] = item.get("href")
    else:
        parent_a = item.find_parent("a")
        product["product_url"] = parent_a.get("href") if parent_a else None

    img = item.select_one(".ad-offer-img-wrapper img.main-img")
    product["image"] = img["src"] if img else None

    title = item.select_one(".offer-title-row .title-text")
    product["title"] = title.get_text(strip=True) if title else None

    price_main = item.select_one(".offer-price-row .text-main")
    if price_main:
        price_text = price_main.get_text(strip=True)
        decimal = price_main.find_next("div")
        if decimal:
            price_text += decimal.get_text(strip=True)
        product["price"] = "¥" + price_text
    else:
        product["price"] = None

    tags = [tag.get_text(strip=True) for tag in item.select(".offer-tag-row .desc-text")]
    product["tags"] = tags

    shop = item.select_one(".offer-shop-row .desc-text")
    product["shop_name"] = shop.get_text(strip=True) if shop else None

    shop_link = item.select_one(".offer-shop-row a")
    product["shop_link"] = shop_link["href"] if shop_link else None

    products.append(product)

# ------------------ SAVE RESULTS -------------------
if products:
    with open("products.json", "w", encoding="utf-8") as f:
        json.dump(products, f, indent=4, ensure_ascii=False)

    with open("products.csv", "w", encoding="utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=products[0].keys())
        writer.writeheader()
        writer.writerows(products)

    print(f"✅ Scraped {len(products)} products")
else:
    print("⚠️ No products found!")
