🛒 淘宝商品爬虫

📋 功能说明
  • 搜索淘宝商品信息
  • 获取商品价格、销量
  • 批量采集商品数据
💻 源代码
import requests
import json
import re

def search_taobao(keyword, page=1):
    """搜索淘宝商品"""
    url = 'https://s.taobao.com/search'
    params = {
        'q': keyword,
        'imgfile': '',
        'initiative_id': 'staobaoz',
        'ie': 'utf8',
        'page': page
    }
    
    headers = {
        'User-Agent': 'Mozilla/5.0 Chrome/91.0.4472.124',
        'Cookie': 'your_cookie_here'
    }
    
    res = requests.get(url, params=params, headers=headers)
    
    # 提取商品数据
    pattern = r'"nid":"(\d+)","title":"([^"]+)","price":"([\d.]+)",'
    matches = re.findall(pattern, res.text)
    
    products = []
    for match in matches:
        products.append({
            'nid': match[0],
            'title': match[1],
            'price': match[2]
        })
    
    return products

def get_product_detail(nid):
    """获取商品详情"""
    url = f'https://item.taobao.com/item.htm?id={nid}'
    headers = {'User-Agent': 'Mozilla/5.0'}
    return requests.get(url, headers=headers).text

# 示例
products = search_taobao('Python书籍')
for p in products[:10]:
    print(f"商品: {p['title']}, 价格: {p['price']}")
📦 运行环境
pip install requests
注意事项
  • ⚠️ 需要登录Cookie
  • ⚠️ 建议添加延时
  • ⚠️ 遵守robots协议