📖 明日小说爬虫

📋 功能说明
  • 爬取明日小说网
  • 批量下载小说章节
  • 保存为TXT格式
💻 源代码
import requests
from bs4 import BeautifulSoup
import re
import os

class MingRiSpider:
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 Chrome/91.0.4472.124'
        }
        self.base_url = 'https://www.mingriwx.com'
        
    def get_book_list(self):
        """获取小说列表"""
        url = f'{self.base_url}/sort/'
        res = requests.get(url, headers=self.headers)
        soup = BeautifulSoup(res.text, 'html.parser')
        
        books = []
        for item in soup.select('.book-list li'):
            link = item.select_one('a')
            books.append({
                'title': link.text,
                'url': self.base_url + link['href']
            })
        return books
    
    def get_chapters(self, book_url):
        """获取章节列表"""
        res = requests.get(book_url, headers=self.headers)
        soup = BeautifulSoup(res.text, 'html.parser')
        
        chapters = []
        for item in soup.select('.chapter-list a'):
            chapters.append({
                'title': item.text,
                'url': self.base_url + item['href']
            })
        return chapters
    
    def get_content(self, chapter_url):
        """获取章节内容"""
        res = requests.get(chapter_url, headers=self.headers)
        soup = BeautifulSoup(res.text, 'html.parser')
        
        content = soup.select_one('#content').get_text()
        return content
    
    def download_book(self, book_url, save_path):
        """下载整本小说"""
        chapters = self.get_chapters(book_url)
        
        with open(save_path, 'w', encoding='utf-8') as f:
            for ch in chapters:
                print(f'下载: {ch["title"]}')
                content = self.get_content(ch['url'])
                f.write(f'\n\n{ch["title"]}\n\n')
                f.write(content)
                f.write('\n')
                
# 使用
spider = MingRiSpider()
books = spider.get_book_list()
print(f"共 {len(books)} 本小说")
📦 运行环境
pip install requests beautifulsoup4
功能特点
  • ✅ 批量下载
  • ✅ UTF-8编码
  • ✅ 进度显示