💻 源代码
import requests
from lxml import etree
import os
from concurrent.futures import ThreadPoolExecutor
def get_girl_images(page_count=10):
"""获取妹子图"""
headers = {
'User-Agent': 'Mozilla/5.0 Chrome/91.0.4472.124'
}
base_url = 'https://www.mzitu.com/page/'
os.makedirs('meizi', exist_ok=True)
for page in range(1, page_count + 1):
url = base_url + str(page)
res = requests.get(url, headers=headers)
html = etree.HTML(res.text)
# 获取图片链接
imgs = html.xpath('//div[@class="postlist"]//img/@data-original')
for i, img_url in enumerate(imgs):
img_data = requests.get(img_url, headers=headers).content
filename = f'meizi/{page}_{i}.jpg'
with open(filename, 'wb') as f:
f.write(img_data)
print(f'保存: {filename}')
# 多线程下载
with ThreadPoolExecutor(max_workers=10) as executor:
executor.submit(get_girl_images, 10)