💻 源代码
import requests
from bs4 import BeautifulSoup
import os
import time
class NovelCrawler:
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
self.novels = []
def search(self, keyword):
"""搜索小说"""
url = f'https://www.xs84.com/search.php?keyword={keyword}'
resp = requests.get(url, headers=self.headers)
soup = BeautifulSoup(resp.text, 'html.parser')
return self.novels
def download_chapters(self, novel_url, start=1, end=None):
"""下载章节内容"""
chapters = []
page = start
while True:
chapter_url = f'{novel_url}/{page}.html'
resp = requests.get(chapter_url, headers=self.headers)
if resp.status_code != 200:
break
soup = BeautifulSoup(resp.text, 'html.parser')
content = soup.find('div', {'id': 'content'})
if content:
chapters.append(content.get_text())
if end and page >= end:
break
page += 1
time.sleep(1)
return chapters
def save_to_file(self, chapters, filename):
"""保存为TXT文件"""
with open(filename, 'w', encoding='utf-8') as f:
for chapter in chapters:
f.write(chapter + '\n\n')
if __name__ == '__main__':
crawler = NovelCrawler()
results = crawler.search("斗破苍穹")
for novel in results:
print(novel['title'], novel['url'])