爬去圖片
阿新 • • 發佈:2018-01-20
http new XML re.sub ring pri 劃線 src img
#coding=utf-8 import urllib.request from bs4 import BeautifulSoup from urllib import error import re def validateTitle(title): rstr = r"[\/\\\:\*\?\"\<\>\|]" # ‘/ \ : * ? " < > |‘ new_title = re.sub(rstr, "_", title) # 替換為下劃線 return new_title for j in range(1,151637): url_origin = "http://www.7160.com/meinv/"+str(j) for i in range(1,30): if i == 1 : url = url_origin+"/index.html" else: url = url_origin+"/index_"+str(i)+".html" request = urllib.request.Request(url) try: res = urllib.request.urlopen(request) soup = BeautifulSoup(res,‘lxml‘) title_obj = soup.find(attrs={"class":"picmainer"}) if title_obj is not None: print(url) title = title_obj.h1.string content = soup.find(‘img‘) src = content.get("src") file_name = validateTitle(title)+".jpg" urllib.request.urlretrieve(src, file_name) print(file_name+"保存成功") except error.URLError as e: print(e.reason)
爬去圖片