💻 源代码
import requests
import re
from lxml import etree
def get_jianshu_user_id(username):
"""获取用户ID"""
url = f'https://www.jianshu.com/u/{username}'
headers = {'User-Agent': 'Mozilla/5.0'}
res = requests.get(url, headers=headers)
html = etree.HTML(res.text)
user_id = html.xpath('//div[@class="_1u3F7"]/@data-user-id')
return user_id[0] if user_id else None
def get_user_articles(user_id, page=1):
"""获取用户文章列表"""
url = f'https://www.jianshu.com/users/{user_id}/timeline'
params = {'page': page}
headers = {'User-Agent': 'Mozilla/5.0'}
res = requests.get(url, params=params, headers=headers).json()
articles = []
for item in res:
article = {
'title': item['object']['data']['title'],
'slug': item['object']['data']['slug'],
'publish_time': item['object']['data']['publish_time'],
'likes_count': item['object']['data']['likes_count']
}
articles.append(article)
return articles
def get_article_content(slug):
"""获取文章详细内容"""
url = f'https://www.jianshu.com/p/{slug}'
headers = {'User-Agent': 'Mozilla/5.0'}
res = requests.get(url, headers=headers)
html = etree.HTML(res.text)
content = html.xpath('//div[@class="article"]//text()')
return ''.join(content)
# 示例
user_id = get_jianshu_user_id('username')
articles = get_user_articles(user_id)
for article in articles:
print(f"标题: {article['title']}")