1. 程式人生 > >python 爬蟲(爬取網頁的img並下載)

python 爬蟲(爬取網頁的img並下載)

from urllib.request import urlopen # 引用第三方庫
import requests #引用requests/用於訪問網站(沒安裝需要安裝)
from pyquery import PyQuery as pq #引用PyQuery用於 解析
# def get_url():
# #     url = 'https://s2.app1108017098.qqopenapp.com/?from=singlemessage&isappinstalled=0'
#     url = 'https://hm.baidu.com/hm.gif?cc=0&ck=1&cl=24-bit&ds=1920x1080&vl=328&ep=%7B%22netAll%22%3A168%2C%22netDns%22%3A23%2C%22netTcp%22%3A137%2C%22srv%22%3A49%2C%22dom%22%3A497%2C%22loadEvent%22%3A910%7D&et=87&ja=0&ln=zh-cn&lo=0&lt=1545373503&rnd=931647730&si=85ae31d909724b3fa587d26d1a21ab94&v=1.2.35&lv=2'
# def get(): # ret = urlopen(url).read()# 獲取網頁的原始碼 # print(ret) # f = open('te.txt', 'wb') # 若是'wb'就表示寫二進位制檔案 # f.write(ret) # f.close() # return get#返回函式 # # get_func = get_url()#函式接收 # get_func()#輸出 呼叫 # import requests # from pyquery import PyQuery as pq
#爬取網頁 html = requests.get('http://www.4399.com/flash/gamehw.htm').content.decode("gb2312") #解析 doc = pq(html) # f = open('te.txt', 'w',encoding='utf-8') # 若是'wb'就表示寫二進位制檔案 # f.write(html) # f.close() # 選擇器 篩選語法 adc = doc('.tm_list > li > a').items() for adv in adc: # url = adv.find('img') #
找到html下的img 和 src url_q = adv.find('img').attr('lz_src') if url_q == None: url_q = adv.find('img').attr('src') #訪問地址 url_a = requests.get(url =url_q).content #拿到HTML下的名稱 name = adv.find('b').text() print("下載的影象:%s,地址為 %s" %(name,url_q)) #開啟檔案目錄 with open("./1/"+name+".jpg",'wb') as file: #儲存圖片 file.write(url_a) print("圖片下載完畢")