💻 源代码
import requests
from bs4 import BeautifulSoup
import pandas as pd
def crawl_51job(keyword, city=''):
"""爬取51job招聘信息"""
url = f'https://search.51job.com/list/{city},000000,0000,00,9,99,{keyword},2,1.html'
headers = {
'User-Agent': 'Mozilla/5.0 Chrome/91.0.4472.124'
}
res = requests.get(url, headers=headers)
res.encoding = 'gbk'
soup = BeautifulSoup(res.text, 'html.parser')
jobs = []
items = soup.select('.j_joblist .e')
for item in items:
job = {
'title': item.select_one('.jname').text.strip(),
'company': item.select_one('.cname').text.strip(),
'salary': item.select_one('.sal').text.strip(),
'location': item.select_one('.inbox .d').text.strip().split('|')[0],
'experience': item.select_one('.inbox .d').text.strip().split('|')[1] if len(item.select_one('.inbox .d').text.strip().split('|')) > 1 else ''
}
jobs.append(job)
return jobs
# 示例: 分析Python岗位
jobs = crawl_51job('Python')
df = pd.DataFrame(jobs)
print(f"共爬取 {len(jobs)} 条数据")
print(df.head())