1. 程式人生 > >python爬蟲練習1:豆瓣電影TOP250

python爬蟲練習1:豆瓣電影TOP250

import ria fff python top font beautiful code pen

項目1:實現豆瓣電影TOP250標題爬取:
 1 from urllib.request import urlopen
 2 from bs4 import BeautifulSoup
 3 import re
 4 
 5 class doubanSpider():
 6     def __init__(self):
 7         """
 8         初始化,
 9         頁碼,URL,存儲數據,
10         """
11         self.page = 0;
12         # "http://movie.douban.com/top250?start=25&filter=&type=" 第二頁
13 # 第一頁 14 self.cur_url = "http://movie.douban.com/top250?start=0&filter=&type=" 15 self.datas = [] 16 17 def claw(self): 18 while self.page<10: 19 self.downloadURL() 20 self.updateURL() 21 self.output() 22 23 def updateURL(self):
24 self.page+=1 25 self.cur_url.replace("start=0","start="+str(self.page*25)) 26 27 def downloadURL(self): 28 html = urlopen(self.cur_url) 29 bsObj = BeautifulSoup(html,"html.parser") 30 datas = bsObj.findAll("span", {"class": "title"}) 31 for data in
datas: 32 str = data.get_text() 33 if "\xa0/\xa0" not in str: 34 self.datas.append(str) 35 36 def output(self): 37 num = 1 38 for data in self.datas: 39 print("TOP"+str(num)+": " +data) 40 num+=1 41 42 if __name__ == "__main__": 43 print("豆瓣電影TOP250:python抓取") 44 myspider = doubanSpider() 45 myspider.claw()

技術分享

python爬蟲練習1:豆瓣電影TOP250