python爬取網易雲音樂歌單音樂
阿新 • • 發佈:2017-05-14
string attrs default textarea bsp color read contents dom
在網易雲音樂中第一頁歌單的url:http://music.163.com/#/discover/playlist/
依次第二頁:http://music.163.com/#/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset=35
依次第三頁:http://music.163.com/#/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset=70
然後從歌單的查看框架的源代碼:
從圖中的源代碼可以得到每個歌單的url:eg:http://music.163.com/#/playlist?id=696806036
然後相應的歌單頁面中可以得到歌單中每首歌的名字和歌手:
然後歌名歌手,在百度音樂搜索api接口獲得songid,api是url = "http://sug.music.baidu.com/info/suggestion"
截圖來自:貼吧
然後從獲取到的songid從百度音樂免費API接口:http://music.baidu.com/data/music/fmlink,獲取songLink進行下載,並且將songLink保存到本地.flac
eg;http://music.baidu.com/data/music/fmlink?rate=320&songIds=242078437&type=flac
下載結果:
代碼:
# -*- coding: utf-8 -*- import re import urllib import urllib2 import os import stat import itertools import re import sys import requests import json import time import socket import urlparse import csv import random from datetime import datetime, timedelta import lxml.html from zipfile importZipFile from StringIO import StringIO from downloader import Downloader from bs4 import BeautifulSoup from HTMLParser import HTMLParser from itertools import product import sys reload(sys) sys.setdefaultencoding(‘utf8‘) URL = ‘http://music.163.com‘ NUM = 5 def download(url, user_agent=‘wswp‘, num_try=2): headers = {‘User_agent‘: user_agent} request = urllib2.Request(url, headers=headers) try: html = urllib2.urlopen(request).read() except urllib2.URLError as e: print ‘Download error‘, e.reason html = None if num_try > 0: if hasattr(e, ‘code‘) and 500 <= e.code < 600: return download(url, user_agent, num_try - 1) return html def get_song_list(url): html = download(url) res = r‘<ul class="f-hide">(.*?)</ul>‘ mm = re.findall(res,html,re.S | re.M) #print mm res = r‘<li><a .*?>(.*?)</a></li>‘ song_list = re.findall(res, html,re.S | re.M) return song_list #獲取網易雲歌單 eg:/playlist?id=706469943 def get_play_list(html): soup = BeautifulSoup(html, "html.parser") results = soup.find_all(name=‘a‘, attrs={‘class‘: ‘tit f-thide s-fc0‘}) list = [] for each in results: ee = each.get(‘href‘) list.append(ee) return list def download_music(url, song_name): print "Downloading song_name:" + song_name path = "songs" if not os.path.isdir(path): os.mkdir(path) f = open(path + ‘/‘ + song_name + ‘.flac‘, ‘wb‘) f.write(download(url)) f.close() def download_song(song_name,singer): url = "http://sug.music.baidu.com/info/suggestion" #百度音樂搜索獲得songid mess = song_name + singer payload = {‘word‘: mess, ‘version‘: ‘2.1.1‘, ‘from‘: ‘0‘} r = requests.get(url, params=payload) contents = r.text d = json.loads(contents, encoding="utf-8") #print d if (‘data‘ not in d): print "do not have flac" return 0 if (‘song‘ not in d["data"]): print "do not have flac" return 0 song_id = d["data"]["song"][0]["songid"] print "song_id:"+song_id url = "http://music.baidu.com/data/music/fmlink" #百度音樂免費api接口 ‘‘‘ http://music.baidu.com/data/music/fmlink?rate=320&songIds=242078437&type=&callback=cb_download&_t=1468380564513&format=json ‘‘‘ payload = {‘songIds‘: song_id, ‘type‘: ‘mp3‘} r = requests.get(url, params=payload) contents = r.text try: d = json.loads(contents, encoding="utf-8") except: return 0 if d is not None and ‘data‘ not in d or d[‘data‘] == ‘‘: return 0 songlink = d["data"]["songList"][0]["songLink"] if (len(songlink) < 10): print "do not have flac" return 0 print "Song Source: " + songlink download_music(songlink,mess) def get_song_singer(url): html = download(url) soup = BeautifulSoup(html, "html.parser") results = soup.find_all(name=‘textarea‘, attrs={‘style‘: ‘display:none;‘}) mess = str(results[0]) tt = len(‘<textarea style="display:none;">‘) result = mess[tt:] tt = len(‘</textarea>)‘)-1 resu = result[:-tt] list = json.loads(resu, encoding="utf-8") singer_list = [] for each in list: singer_list.append(each["artists"][0]["name"]) return singer_list if __name__ == ‘__main__‘: num = 0 for flag in range(1,5): if flag > 1: page = (flag - 1) * 35 url = ‘http://music.163.com/discover/playlist/?order=hot&cat=%E5%85%A8%E9%83%A8&limit=35&offset=‘+str(page) else: url = ‘http://music.163.com/discover/playlist‘ print url html = download(url) list = get_play_list(html) for i in list: song_list_url = URL + i print song_list_url singer_list = get_song_list(song_list_url) singer_name = get_song_singer(song_list_url) tt = len(singer_list) mm = len(singer_name) index = min(tt,mm) num = num + mm for j in range(0, index): print singer_name[j] print singer_list[j] download_song(singer_list[j],singer_name[j]) print "\n" print "Download " + str(num) + " music\n"
python爬取網易雲音樂歌單音樂