import requests
import re
from urllib.parse import urlparse, urlunparse
url = "https://www.mkzhan.com/category/?is_free=1&page=1"
# 发送HTTPS请求
response = requests.get(url)
content = response.text
# 使用正则表达式匹配目标地址和名称
pattern = r'<p class="comic__title"><a href="https://blog.csdn.net/xinjiez/article/details/([^"]+)"[^>]*>(.*?)</a></p>'
matches = re.findall(pattern, content)
# 打印匹配的地址和名称
for match in matches:
address = "https://www.mkzhan.com" + match[0]
name = match[1]
print("地址:", address)
print("名称:", name)
# 发送章节的GET请求
chapter_response = requests.get(address)
chapter_content = chapter_response.text
# 使用正则表达式匹配章节地址和id