import re
import requests
def get_article_info(url):
# 获取整个页面 html 内容
r = requests.get(url)
html = r.text
# 获取页面标题
re_s = '<title>(.*)</title>'
title_info = re.findall(re_s, html)
title = title_info[0]
# 获取左侧菜单 html 内容
re_s = '<ul class="list-group list-group-flush">(.*?)</ul>'
menu_html_info = re.findall(re_s, html, re.DOTALL)
menu_html = menu_html_info[0]
# 从左侧菜单 html 内容中获取所有菜单信息
re_s = '<a href="(.*)" class="text-secondary">(.*)</a>'
menu_info = re.findall(re_s, menu_html)
return title, menu_info
if __name__ == '__main__':
url = 'https://python-abc.xyz/basic/5995/'
title, menu_info = get_article_info(url)
print('文章标题为:', title)
print('菜单列表如下:')
for menu in menu_info:
print('链接:https://python-abc.xyz{};标题:{}'.format(*menu))