#!/usr/bin/env python3
"""
分析页面结构，找到正确的商品提取方式
"""

import asyncio
from playwright.async_api import async_playwright

COOKIES = [
    {"name": "gxyj_Sign-In-Token", "value": "Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJjb2RlIjpudWxsLCJ1c2VyX25hbWUiOm51bGwsImNvbXBhbnlOYW1lIjoiIiwiY2xpZW50X2lkIjoiVU5JRklDQVRJT04iLCJhY2NvdW50U3RhdHVzIjoxLCJpc0ZyZWV6ZSI6MiwidWlkIjoiMTM5OTkwNzMxMDg5NzI4NDUyOCIsInB1cmNoYXNlclByb3BlcnR5IjowLCJjb21wYW55Tm8iOiIzMDAwMTA2MDE3IiwiZ3JhbnRfdHlwZSI6Im11bHRpX3Bhc3N3b3JkIiwic2NvcGUiOlsiYWxsIl0sImxvZ2luTmFtZSI6ImNoaW5hZGF2aWQiLCJleHAiOjE3NzMxOTgyNzEsImp0aSI6IjJmM2FiMGZhLTg2MGItNGVlNS05MTM2LThjNDU4MGIzZjRlNSIsInN0YXRpb25JZCI6IjEiLCJhZG1pblR5cGUiOjEsImN1cnJlbnRTdGF0aW9uSWQiOiIxIiwiYWNjb3VudFR5cGUiOiIzIiwiYWNjTm8iOiJBQ0MyNjAzMTAzMDAwMDAwMDAwMDAwMDcyIiwiY29tcGFueVN0YXR1cyI6MSwiYXV0aG9yaXRpZXMiOlsicm9vdCJdLCJhdWQiOlsiMSJdLCJwaG9uZSI6IjE4MTEwMDc4NzYyIiwibWFpbklkIjoiMTM5OTkwNzMxMDg5NzI4NDUzMCIsInVzZXJuYW1lIjoiY2hpbmFkYXZpZCJ9.CctYfcFiUdPUdmXEV7StkAU9PlAWgM-0v4dk6L6beV3SK93HT-b5g9tHIpO45CZTPa2pXw3GMK5X3dg6v5Y9Olxl35DAXYWhQVC2NjI-qkpOIfSSBM8i-DXR_HyIWpyeXWJBTWk4BdWZZPt8wkWzD5SGwM2-whcVQt5phsDrB8c", "domain": ".fupin832.com"},
    {"name": "JSESSIONID", "value": "0BB906763F5D9BEEEEACA841CA0E9690", "domain": "ys.fupin832.com"},
]

async def main():
    print("🔍 分析页面结构...")
    
    pw = await async_playwright().start()
    browser = await pw.chromium.launch(headless=True, args=['--no-sandbox'])
    context = await browser.new_context(viewport={"width": 1920, "height": 1080})
    
    for c in COOKIES:
        await context.add_cookies([{"name": c["name"], "value": c["value"], "domain": c["domain"], "path": "/"}])
    
    page = await context.new_page()
    
    await page.goto("https://ys.fupin832.com/product/list?areaCode=410000", timeout=30000)
    await page.wait_for_load_state("networkidle")
    await page.wait_for_timeout(3000)
    
    # 截图
    await page.screenshot(path="analyze_page.png", full_page=True)
    print("  📸 截图已保存: analyze_page.png")
    
    # 分析结构
    analysis = await page.evaluate("""() => {
        const result = {
            totalLinks: document.querySelectorAll('a').length,
            linksWithHref: [],
            priceElements: [],
            possibleContainers: []
        };
        
        // 找到所有带href的链接
        document.querySelectorAll('a[href]').forEach(a => {
            const text = a.innerText?.substring(0, 80) || '';
            const href = a.href;
            if (text.length > 5 && text.length < 150) {
                result.linksWithHref.push({text, href: href.substring(0, 100)});
            }
        });
        
        // 找到包含价格的元素
        const priceRegex = /￥\\s*\\d+\\.?\\d*/;
        document.querySelectorAll('*').forEach(el => {
            const text = el.innerText?.trim() || '';
            if (priceRegex.test(text) && text.length < 200) {
                result.priceElements.push(text.substring(0, 100));
            }
        });
        
        // 查找可能的商品容器
        const containerSelectors = [
            '[class*="product"]',
            '[class*="goods"]',
            '[class*="item"]',
            '[class*="card"]',
            '.el-card',
            '.list-item'
        ];
        
        containerSelectors.forEach(sel => {
            const els = document.querySelectorAll(sel);
            if (els.length > 0) {
                result.possibleContainers.push({
                    selector: sel,
                    count: els.length,
                    sampleText: els[0]?.innerText?.substring(0, 100) || ''
                });
            }
        });
        
        // 获取页面标题和部分内容
        result.pageTitle = document.title;
        result.bodyTextSample = document.body.innerText.substring(0, 500);
        
        return result;
    }""")
    
    print(f"\n📊 分析结果:")
    print(f"  页面标题: {analysis.get('pageTitle')}")
    print(f"  总链接数: {analysis.get('totalLinks')}")
    print(f"  价格元素: {len(analysis.get('priceElements', []))} 个")
    
    print(f"\n🔗 前20个链接:")
    for i, link in enumerate(analysis.get('linksWithHref', [])[:20], 1):
        print(f"  {i}. {link.get('text', '')[:50]}")
    
    print(f"\n📦 可能的容器:")
    for c in analysis.get('possibleContainers', []):
        print(f"  - {c.get('selector')}: {c.get('count')} 个")
    
    print(f"\n💰 价格样例:")
    for p in analysis.get('priceElements', [])[:10]:
        print(f"  - {p[:80]}")
    
    await browser.close()

asyncio.run(main())
