#!/usr/bin/env python3
"""
832平台爬虫 - 直接调用API获取完整数据
"""

import requests
import csv
import json
from datetime import datetime

# 配置
CONFIG = {
    "output_file": "henan_products_api.csv",
    "area_code": "410000",  # 河南省
    "max_pages": 20,
    "page_size": 20
}

# 从Cookie获取的Token
HEADERS = {
    "Authorization": "Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJjb2RlIjpudWxsLCJ1c2VyX25hbWUiOm51bGwsImNvbXBhbnlOYW1lIjoiIiwiY2xpZW50X2lkIjoiVU5JRklDQVRJT04iLCJhY2NvdW50U3RhdHVzIjoxLCJpc0ZyZWV6ZSI6MiwidWlkIjoiMTM5OTkwNzMxMDg5NzI4NDUyOCIsInB1cmNoYXNlclByb3BlcnR5IjowLCJjb21wYW55Tm8iOiIzMDAwMTA2MDE3IiwiZ3JhbnRfdHlwZSI6Im11bHRpX3Bhc3N3b3JkIiwic2NvcGUiOlsiYWxsIl0sImxvZ2luTmFtZSI6ImNoaW5hZGF2aWQiLCJleHAiOjE3NzMxOTgyNzEsImp0aSI6IjJmM2FiMGZhLTg2MGItNGVlNS05MTM2LThjNDU4MGIzZjRlNSIsInN0YXRpb25JZCI6IjEiLCJhZG1pblR5cGUiOjEsImN1cnJlbnRTdGF0aW9uSWQiOiIxIiwiYWNjb3VudFR5cGUiOiIzIiwiYWNjTm8iOiJBQ0MyNjAzMTAzMDAwMDAwMDAwMDAwMDcyIiwiY29tcGFueVN0YXR1cyI6MSwiYXV0aG9yaXRpZXMiOlsicm9vdCJdLCJhdWQiOlsiMSJdLCJwaG9uZSI6IjE4MTEwMDc4NzYyIiwibWFpbklkIjoiMTM5OTkwNzMxMDg5NzI4NDUzMCIsInVzZXJuYW1lIjoiY2hpbmFkYXZpZCJ9.CctYfcFiUdPUdmXEV7StkAU9PlAWgM-0v4dk6L6beV3SK93HT-b5g9tHIpO45CZTPa2pXw3GMK5X3dg6v5Y9Olxl35DAXYWhQVC2NjI-qkpOIfSSBM8i-DXR_HyIWpyeXWJBTWk4BdWZZPt8wkWzD5SGwM2-whcVQt5phsDrB8c",
    "Content-Type": "application/json",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
    "Referer": "https://ys.fupin832.com/"
}

CSV_HEADERS = ["序号", "商品名称", "单价(元)", "销量", "供应商名称", "联系电话", "商品链接", "抓取时间"]

def try_api_endpoints():
    """尝试不同的API端点"""
    
    # 可能的API端点
    endpoints = [
        "https://ys.fupin832.com/frontweb/decorationcenter/searchEsRecommendData",
        "https://ys.fupin832.com/frontapi/product/queryProductPage",
        "https://ys.fupin832.com/api/product/search",
        "https://ys.fupin832.com/frontweb/productfrontcategory/getProductByCategoryId",
        "https://ys.fupin832.com/frontweb/search/searchProduct"
    ]
    
    products = []
    
    for url in endpoints:
        try:
            print(f"\n尝试API: {url}")
            
            # 构造请求参数
            payload = {
                "pageShow": CONFIG['page_size'],
                "nowPage": 1,
                "areaCode": CONFIG['area_code'],
                "provinceCode": CONFIG['area_code']
            }
            
            response = requests.post(url, headers=HEADERS, json=payload, timeout=10)
            
            print(f"  状态码: {response.status_code}")
            
            if response.status_code == 200:
                data = response.json()
                print(f"  响应大小: {len(str(data))} 字符")
                
                if data.get('retStatus') == '1' or data.get('code') == 200:
                    print(f"  ✅ 成功!")
                    # 解析数据
                    results = parse_api_response(data)
                    if results:
                        products.extend(results)
                        print(f"  提取到 {len(results)} 条数据")
        
        except Exception as e:
            print(f"  ❌ 失败: {str(e)[:80]}")
    
    return products

def parse_api_response(data):
    """解析API响应"""
    products = []
    
    try:
        ret_data = data.get('retData', {})
        
        # 尝试不同的数据格式
        if isinstance(ret_data, list):
            items = ret_data
        elif isinstance(ret_data, dict):
            items = ret_data.get('results', ret_data.get('list', ret_data.get('data', [])))
        else:
            items = []
        
        for item in items:
            product = {
                'name': item.get('productFullName', item.get('productName', item.get('name', ''))),
                'price': item.get('productPrice', item.get('price', '')),
                'sales': item.get('salesVolume', item.get('sales', item.get('sold', ''))),
                'supplier': item.get('companyName', item.get('supplierName', item.get('shopName', ''))),
                'contact': item.get('contactPhone', item.get('phone', item.get('tel', ''))),
                'url': f"https://ys.fupin832.com/product/detail/{item.get('productId', item.get('id', ''))}"
            }
            
            if product['name']:
                products.append(product)
    
    except Exception as e:
        print(f"解析错误: {e}")
    
    return products

def fetch_all_pages():
    """获取所有页面"""
    all_products = []
    
    # 主要API
    api_url = "https://ys.fupin832.com/frontweb/decorationcenter/searchEsRecommendData"
    
    for page in range(1, CONFIG['max_pages'] + 1):
        try:
            print(f"\n📄 第 {page}/{CONFIG['max_pages']} 页")
            
            payload = {
                "pageShow": CONFIG['page_size'],
                "nowPage": page,
                "areaCode": CONFIG['area_code']
            }
            
            response = requests.post(api_url, headers=HEADERS, json=payload, timeout=15)
            
            if response.status_code != 200:
                print(f"  请求失败: {response.status_code}")
                break
            
            data = response.json()
            
            if data.get('retStatus') != '1':
                print(f"  API返回错误: {data.get('retMessage', '未知错误')}")
                break
            
            products = parse_api_response(data)
            
            if not products:
                print(f"  无数据")
                break
            
            all_products.extend(products)
            print(f"  本页: {len(products)} 条 | 累计: {len(all_products)} 条")
        
        except Exception as e:
            print(f"  错误: {str(e)[:80]}")
            break
    
    return all_products

def get_product_detail(product_id):
    """获取商品详情（包括供应商信息）"""
    try:
        url = f"https://ys.fupin832.com/frontweb/productfrontproduct/getProductDetailByProductId"
        
        payload = {"productId": product_id}
        
        response = requests.post(url, headers=HEADERS, json=payload, timeout=10)
        
        if response.status_code == 200:
            data = response.json()
            
            if data.get('retStatus') == '1':
                detail = data.get('retData', {})
                
                return {
                    'supplier': detail.get('companyName', ''),
                    'contact': detail.get('contactPhone', detail.get('companyTel', '')),
                    'address': detail.get('companyAddress', '')
                }
    
    except Exception as e:
        print(f"获取详情失败: {e}")
    
    return {}

def save_to_csv(products):
    """保存到CSV"""
    if not products:
        print("⚠️ 无数据可保存")
        return
    
    # 去重
    seen = set()
    unique = []
    for p in products:
        key = p.get('name', '')[:30] + str(p.get('price', ''))
        if key not in seen:
            seen.add(key)
            unique.append(p)
    
    print(f"\n📊 去重后: {len(unique)} 条")
    
    with open(CONFIG['output_file'], 'w', newline='', encoding='utf-8-sig') as f:
        writer = csv.DictWriter(f, fieldnames=CSV_HEADERS)
        writer.writeheader()
        
        for idx, p in enumerate(unique, 1):
            writer.writerow({
                "序号": idx,
                "商品名称": p.get('name', ''),
                "单价(元)": p.get('price', ''),
                "销量": p.get('sales', ''),
                "供应商名称": p.get('supplier', ''),
                "联系电话": p.get('contact', ''),
                "商品链接": p.get('url', ''),
                "抓取时间": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            })
    
    print(f"✅ 已保存: {CONFIG['output_file']}")
    
    # 统计
    with_supplier = len([p for p in unique if p.get('supplier')])
    with_contact = len([p for p in unique if p.get('contact')])
    
    print(f"\n📈 统计:")
    print(f"  总数: {len(unique)}")
    print(f"  有供应商: {with_supplier}")
    print(f"  有联系方式: {with_contact}")
    
    # 预览
    print(f"\n📋 预览 (前10条):")
    for i, p in enumerate(unique[:10], 1):
        print(f"  {i}. {p.get('name', '')[:35]}")
        print(f"     💰 {p.get('price', '')}元 | 销量 {p.get('sales', '')}")
        if p.get('supplier'):
            print(f"     🏪 {p.get('supplier')[:30]}")
        if p.get('contact'):
            print(f"     📞 {p.get('contact')}")

def main():
    print("="*60)
    print("  832平台 河南商品爬虫 (API版)")
    print("="*60)
    
    # 先尝试所有API端点
    print("\n🔍 测试API端点...")
    products = try_api_endpoints()
    
    # 如果获取到数据，继续翻页
    if products:
        print(f"\n📥 开始批量获取...")
        all_products = fetch_all_pages()
        
        # 获取详情（前20个）
        print(f"\n📞 获取供应商详情...")
        for i, p in enumerate(all_products[:20]):
            if p.get('url'):
                # 从URL提取productId
                import re
                match = re.search(r'/detail/(\d+)', p['url'])
                if match:
                    product_id = match.group(1)
                    detail = get_product_detail(product_id)
                    p['supplier'] = detail.get('supplier', p.get('supplier', ''))
                    p['contact'] = detail.get('contact', '')
                    print(f"  [{i+1}] {p['name'][:25]}... → {p.get('supplier', 'N/A')[:20]}")
    else:
        all_products = []
    
    # 保存
    save_to_csv(all_products)
    
    print("\n🎉 完成!")

if __name__ == "__main__":
    main()