💻 源代码
import requests
from bs4 import BeautifulSoup
import re
import os
class MingRiSpider:
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 Chrome/91.0.4472.124'
}
self.base_url = 'https://www.mingriwx.com'
def get_book_list(self):
"""获取小说列表"""
url = f'{self.base_url}/sort/'
res = requests.get(url, headers=self.headers)
soup = BeautifulSoup(res.text, 'html.parser')
books = []
for item in soup.select('.book-list li'):
link = item.select_one('a')
books.append({
'title': link.text,
'url': self.base_url + link['href']
})
return books
def get_chapters(self, book_url):
"""获取章节列表"""
res = requests.get(book_url, headers=self.headers)
soup = BeautifulSoup(res.text, 'html.parser')
chapters = []
for item in soup.select('.chapter-list a'):
chapters.append({
'title': item.text,
'url': self.base_url + item['href']
})
return chapters
def get_content(self, chapter_url):
"""获取章节内容"""
res = requests.get(chapter_url, headers=self.headers)
soup = BeautifulSoup(res.text, 'html.parser')
content = soup.select_one('#content').get_text()
return content
def download_book(self, book_url, save_path):
"""下载整本小说"""
chapters = self.get_chapters(book_url)
with open(save_path, 'w', encoding='utf-8') as f:
for ch in chapters:
print(f'下载: {ch["title"]}')
content = self.get_content(ch['url'])
f.write(f'\n\n{ch["title"]}\n\n')
f.write(content)
f.write('\n')
# 使用
spider = MingRiSpider()
books = spider.get_book_list()
print(f"共 {len(books)} 本小说")