作者在 2018-08-25 19:24:45 发布以下内容
# _*_ coding:utf-8_*_
import requests
from bs4 import BeautifulSoup
import re
def download_page(url):
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0"}
r = requests.get(url, headers=headers)
return r.content
def get_content(html_text):
soup =BeautifulSoup(html_text, 'html.parser')
a_lis=soup.find_all('a', href=re.compile(r'^/song/'))#列表
sname=dict()#字典
for s in a_lis:
a_name=s.string
ass_href=''.join(['http://music.taihe.com',s['href']])
sname[a_name]=ass_href
print('歌名:{} =>链接:{}\n'.format( a_name,ass_href))
print(sname)
return sname
def output_html(sname):
fout = open('music.html', 'w')
fout.write("<html>")
fout.write("<body>")
fout.write("<table border='1'>")
for data in sname:
fout.write("<tr>")
#fout.write("<td>%s</td>" % data)
fout.write("<td>{}</td>".format(data))
fout.write("<td>{}</td>".format(sname[data]))
fout.write("<td><a href={}>去听一下</a></td>".format(sname[data]))
fout.write("</tr>")
fout.write("</table>")
fout.write("</body>")
fout.write("</html>")
fout.close()
def main():
urls=['http://music.taihe.com/']#可添加链接
for url in urls:
html_text = download_page(url)
sname= get_content(html_text)
output_html(sname)
if __name__ == '__main__':
main()