1. 程式人生 > 實用技巧 >scrapy的中介軟體(下載中介軟體)

scrapy的中介軟體(下載中介軟體)

# 下載中介軟體
-process_request:返回不同的物件,後續處理不同(加代理...)


class CnblogsDownloaderMiddleware:
    @classmethod
    def from_crawler(cls, crawler):
        pass
    def process_request(self, request, spider):
        # Called for each request that goes through the downloader
        # middleware.

        # Must either:
# - return None: continue processing this request # - or return a Response object # - or return a Request object # - or raise IgnoreRequest: process_exception() methods of # installed downloader middleware will be called # 1 更換請求頭from scrapy.http.headers import Headers
# 方式一: # request.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' # 方式二:使用fake-useragent # pip3 install fake-useragent # from fake_useragent import UserAgent # request.headers['User-Agent'] = UserAgent().random
# print(request.headers) # 2 加cookie ---cookie池 # 假設你你已經搭建好cookie 池了, # print('00000--',request.cookies) # request.cookies={'username':'asdfasdf'} # 3 加代理 # print(request.meta) # request.meta['download_timeout'] = 20 # request.meta["proxy"] = 'http://218.22.7.62:53281'