Python批量爬取堆糖網圖片

阿新 • • 發佈：2018-12-20

import urllib.parse
import requests   #第三方請求庫
import json      
import jsonpath  #處理json檔案的的提取庫
from bs4 import BeautifulSoup
import os
import urllib
import re

label = 'AI'
label = urllib.parse.quote(label)
#https://www.duitang.com/napi/blog/list/by_search/?kw=%E6%A0%A1%E8%8A%B1&type=feed&include_fields=top_comments%2Cis_root%2Csource_link%2Citem%2Cbuyable%2Croot_id%2Cstatus%2Clike_count%2Clike_id%2Csender%2Calbum%2Creply_count%2Cfavorite_blog_id&_type=&start=24&_=1541772636388
url = 'https://www.duitang.com/search/?kw={}&start{}'
    
os.path.abspath('D:/Python/AI') 
for i in range(0, 24, 240):  #進行翻頁程式碼迭代
    u = url.format(label,i)
    r = requests.get(u)
    print(len(r.text))
    print(r.text)
    print(r.encoding)
    soup = BeautifulSoup(r.text, 'html.parser')
    print(len(soup))
    se = soup.findAll('a',{'class':{'a'}})
    print(se)
          
    for ii in se:
        
        print(ii.img.get('alt'),ii.img.get('src'))
        img =re.findall('https://b-ssl.duitang.com/uploads/item/(.*?).thumb.224_0.(.*?)',ii.img.get('src')) 
        file_path= 'D:/Python/AI'
        file_name = img[0][0]
        print(type(file_name))
        
##        file_suffix2 = file_suffix1.split( )
##        print(file_suffix2)
        #print(os.path.splitext(ii.img.get('src'))[1])
        #filename = '{}{}{}{}'.format(file_path,os.sep,file_name,file_suffix)
        #print(filename)
        #file_suffix = os.path.splitext(ii.img.get('src'))[5]
        #print(file_suffix)
        #img_name = img[0][0]
        #print(img_name)
        #filename = '{}{}{}{}'.format(file_path,os.sep,file_name,file_suffix)
        #os.makedirs(file_path)
        #urllib.request.urlretrieve(ii.img.get('src'),filename = filename)
##        with urllib.request.urlopen(ii.img.get('src'), timeout=30) as response,open(filename, 'wb') as f_save:
##            f_save.write(response.read())
##            f_save.flush()
##            f_save.close()
##            print("成功")

       # file_suffix = os.path.splitext(img_url)[1]
    #print(file_suffix)
        #拼接圖片名（包含路徑）
    #filename = '{}{}{}{}'.format(file_path,os.sep,file_name,file_suffix)
    #print(filename)
       #下載圖片，並儲存到資料夾中
    #urllib.request.urlretrieve(img_url,filename=filename)
  
    file_path='D:/book/img'
    file_name ="pyt"
    
   
##import os,stat
##import urllib.request
## 
##img_url="https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1516371301&di=d99af0828bb301fea27c2149a7070" \
##        "d44&imgtype=jpg&er=1&src=http%3A%2F%2Fupload.qianhuaweb.com%2F2017%2F0718%2F1500369506683.jpg"
##file_path='D:/book/img'
##file_name ="pyt"
## 
##try:
##    #是否有這個路徑
##    if not os.path.exists(file_path):
##    #建立路徑
##        os.makedirs(file_path)
##        #獲得圖片字尾
##    file_suffix = os.path.splitext(img_url)[1]
##    print(file_suffix)
##        #拼接圖片名（包含路徑）
##    filename = '{}{}{}{}'.format(file_path,os.sep,file_name,file_suffix)
##    print(filename)
##       #下載圖片，並儲存到資料夾中
##    urllib.request.urlretrieve(img_url,filename=filename)
## 
##except IOError as e:
##    print("IOError")
##except Exception as e:
##    print("Exception")
##
##
##二：利用讀寫操作寫入檔案，具體程式碼：
##
##import os,stat
##import urllib.request
## 
##for i in range(1,3):
##    if not os.path.exists("./rym"):
##        print("不純在")
##        os.makedirs("./rym")
## 
##    else:
##        print("存在")
##        os.chmod("D:/imagss",777)
## 
## 
##        with urllib.request.urlopen("https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1516371301&di=d99af0828b"
##                                    "b301fea27c2149a7070d44&imgtype=jpg&er=1&src=http%3A%2F%2Fupload.qianhuaweb.com%2F2017%2F0718%"
##                                    "2F1500369506683.jpg", timeout=30) as response, open("./rym/lyj.png"
##                , 'wb') as f_save:
##            f_save.write(response.read())
##            f_save.flush()
##            f_save.close()
##            print("成功")

Python批量爬取堆糖網圖片

import urllib.parse import requests #第三方請求庫 import json import jsonpath #處理json檔案的的提取庫 from bs4 import BeautifulSoup import os im

零基礎爬取堆糖網圖片（一）

## 零基礎爬取堆糖網圖片（一） ### 全文介紹：首先**堆糖網**是一個美圖桌布興趣社群，有大量的~~美女~~圖片今天我們實現搜尋關鍵字爬取堆糖網上相關的美圖。當然我們還可以實現多執行緒爬蟲，加快爬蟲爬取速度 ![](https://img2020.cnblogs.com/blog/1579925/

利用Python批量爬取XKCD動漫圖片，並批量儲存

import requests, os, bs4 url = 'https://xkcd.com' os.makedirs('xkcd',exist_ok = True) while not url.endswith('#'): # download the page

python3網絡爬蟲（2.1）：爬取堆糖美女

pre 線程 span 需要 pic ring clas lin chrome 額，明明記得昨晚存了草稿箱，一覺醒來沒了，那就簡寫點（其實是具體怎麽解釋我也不太懂/xk，純屬個人理解，有錯誤還望指正）環境：　　版本：python3 　　IDE：pycharm201

用Python批量爬取妹紙圖片

通過Python編寫爬蟲，批量爬取妹紙圖片，本文的爬蟲實現爬取妹子圖網站（http://www.mzitu.com/zipai/）中妹子自拍欄目中所有妹子的圖片。開啟自拍欄目地址http://www.mzitu.com/zipai/後，我們發現當前頁面預

Python資料爬蟲學習筆記（11）爬取千圖網圖片資料

需求：在千圖網http://www.58pic.com中的某一板塊中，將一定頁數的高清圖片素材爬取到一個指定的資料夾中。分析：以數碼電器板塊為例 1.檢視該板塊的每一頁的URL：注意到第一頁是“0-1.html”，第二頁是“0-2.html”，由

爬蟲03 爬取堆糖圖片並儲存到本地

# -*- coding: utf-8 -*- import urllib import urllib2 import re i=0 page = 1 url = 'http://www.duitan

爬蟲練習3 爬取堆糖網校花照片

ring http 正在 usr sts 多線程 src 技術 strings 知識點：多線程的實現圖片的下載及寫入字符串高級查找了解動態加載和jsonrequest 的用法獲取數據的api‘https://www.duitang.com/napi/blog/lis

scrapy爬取校花網圖片

xiaohua.py # -*- coding: utf-8 -*- import scrapy from pyquery import PyQuery from scrapy.http import Request from ..items import XiaohuarItem class

python學習爬取中華英才網工作職位

一、技能：（1）scrapy爬蟲的原理；（2）xpath獲取網頁資訊以及正則表示式的使用；二、程式碼：（1）建立工程：　　scrapy startproject wuyoujob1 　　在spi

利用C#爬取煎蛋網圖片

本程式還有待優化，我只爬取了每個頁面的第一張圖片，你們可以自己更新優化程式碼以實現全站爬取的功能。主要用到的名稱空間有： using System; using System.Collections.Generic; using System.ComponentModel; usi

用python來爬取中國天氣網北京，上海，成都8-15天的天氣

2 爬取北京，上海，成都的天氣 from bs4 import BeautifulSoup import random import requests import socket impo

【轉】寫一個簡單的爬蟲來批量爬取新浪網的新聞

工具：Anaconda 先進入該頁，新浪新聞：http://news.sina.com.cn/china/ 往下翻，找到這樣的最新訊息先爬取單個頁面的資訊：（隨便點一個進去），該新聞網址：http://news.sina.com.cn/c/nd/2018-06-08/doc-ihcscwxa1

python爬蟲爬取詩詞名句網

使用requests庫，xpath庫 import requests import time from lxml import etree # 去請求頁面的函式 def request_Header(url): headers = { 'User

Python批量爬取小說

利用BeautifulSoup批量爬取筆趣閣小說。 from bs4 import BeautifulSoup import urllib.request import re import os import threading import time # 通過

python爬蟲爬取鬥圖網最新表情包（第二篇）

上一篇文章爬的表情包是套圖，發現還有一千多頁的最新表情包。兩者的網頁結構有點區別，程式碼需要整改下，看下頁面，規律也比較好找。非常氣憤，上一個部落格被其他爬走了，還是一個培訓機構，插了自己的廣告！所有的表情圖片都是在標籤下，數了一下每一頁都是17行，

Python爬蟲爬取網站上的圖片

Python scrapy 爬取拉勾網招聘資訊

週末折騰了好久，終於成功把拉鉤網的招聘資訊爬取下來了。現在總結一下！環境： windows 8.1 + python 3.5.0 首先使用 scrapy 建立一個專案： E:\mypy> scrapy startproject lgjob 建立後目錄結構：

Python爬蟲-爬取慕課網課程

Python爬取網路圖片使用正則表示式解析Html格式的檔案(其他更好的方法以後會繼續更新) 獲取慕課網課程圖片從網站上獲取課程圖片首先檢視頁面html程式碼圖2 html程式

Python批量爬取堆糖網圖片

相關推薦