Python爬虫实践：漫客栈数据采集与对接漫城API

import requests

import re

from urllib.parse import urlparse, urlunparse

url = "https://www.mkzhan.com/category/?is_free=1&page=1"

# 发送HTTPS请求

response = requests.get(url)

content = response.text

# 使用正则表达式匹配目标地址和名称

pattern = r'<p class="comic__title"><a href="https://blog.csdn.net/xinjiez/article/details/([^"]+)"[^>]*>(.*?)</a></p>'

matches = re.findall(pattern, content)

# 打印匹配的地址和名称

for match in matches:

address = "https://www.mkzhan.com" + match[0]

name = match[1]

print("地址:", address)

print("名称:", name)

# 发送章节的GET请求

chapter_response = requests.get(address)

chapter_content = chapter_response.text

# 使用正则表达式匹配章节地址和id

特别提示：本信息由相关用户自行提供，真实性未证实，仅供参考。请谨慎采用，风险自负。

点赞 0举报收藏 0评论 0

0 条相关评论

相关最新动态

推荐最新动态

点击排行