1. 程式人生 > 實用技巧 >爬取百度圖片

爬取百度圖片

import User_Agent #自己的編寫的請求頭
import requests
import os
from urllib import parse
from time import time, sleep
import json

img_url_list = []

url = 'https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&word={}&pn={}&{}'
headers = {'User-Agent':User_Agent.ua()}

name = input('請輸入您要下載的圖片:')
pn = int(input('要下載幾頁?:'))
if not os.path.exists('D:\IDEAXiangMu\python\images\'+name+'\'):
os.makedirs('D:\IDEAXiangMu\python\images\'+name+'\')
p_name = parse.quote(name)

for x in range(1,pn+1):
pn = x*30
now =time()
# response=requests.get(url=url,headers=headers)
response = requests.get(url=url.format(p_name,pn,now) ,headers=headers)

d=json.loads(response.text)

for y in range(len(d["data"])-1):

    img_url_list.append(d['data'][y]['thumbURL'])

cnt = 0
page = 0
for x in img_url_list:
houzhui = x.split('.')[-1]
cnt += 1
if cnt % 30 == 0:
page += 1
print('%s第%d頁下載成功' % (name, page))
sleep(1)
response = requests.get(url=x, headers=headers)

with open('本地路徑'+name+'\\'+name+str(cnt)+ '.' + houzhui, 'wb') as f:     
    f.write(response.content)
print('正在獲取'+str(cnt)+'資料')