Python爬取彩票雙色球並存到把數據excel表格裏
阿新 • • 發佈:2018-04-19
python 爬蟲 環境:python3
目的:爬取15年的雙色球開獎號以及期數和開獎日期:
上代碼
#!/usr/bin/env python3 #-*-coding:utf-8-*- # @Author : 杜文濤 # @Time : 2018/4/19 16:01 # @File : cpssq.py #彩票雙色球數據 import requests import re import xlwt import time def get_all_page(): global all_page url = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html" reponse = requests.get(url=url) reponse.encoding='utf-8' html = reponse.text all_page = int(re.findall(r"class=\"pg\".*?<strong>(.*?)</strong>",html)[0]) return all_page def get_num(): k = -1 f = xlwt.Workbook(encoding='utf-8') sheet01 = f.add_sheet(u'sheel1', cell_overwrite_ok=True) for page_num in range(1,all_page): url = "http://kaijiang.zhcw.com/zhcw/html/ssq/list_"+str(page_num)+".html" reponse = requests.get(url=url) time.sleep(5) reponse.encoding = 'utf-8' html = reponse.text rule = r"<tr>.*?<td align=\"center\">(.*?)</td>.*?<td align=\"center\">(.*?)</td>.*?<td align=\"center\" style=\"padding-left:10px;\">.*?<em class=\"rr\">(.*?)</em>.*?<em class=\"rr\">(.*?)</em>.*?<em class=\"rr\">(.*?)</em>.*?<em class=\"rr\">(.*?)</em>.*?<em class=\"rr\">(.*?)</em>.*?<em class=\"rr\">(.*?)</em>.*?<em>(.*?)</em></td>" num = re.findall(rule, html, re.S | re.M) # f = xlwt.Workbook(encoding='utf-8') # sheet01 = f.add_sheet(u'sheel1', cell_overwrite_ok=True) sheet01.write(0, 0, "日期") sheet01.write(0, 1, "期數") sheet01.write(0, 2, "第一個紅球") sheet01.write(0, 3, "第二個紅球") sheet01.write(0, 4, "第三個紅球") sheet01.write(0, 5, "第四個紅球") sheet01.write(0, 6, "第五個紅球") sheet01.write(0, 7, "第六個紅球") sheet01.write(0, 8, "藍球") print("正在寫入第%s頁" % (page_num)) for i in range(0,len(num)): k += 1 sheet01.write(k + 1, 0, num[i][0]) sheet01.write(k + 1, 1, num[i][1]) sheet01.write(k + 1, 2, num[i][2]) sheet01.write(k + 1, 3, num[i][3]) sheet01.write(k + 1, 4, num[i][4]) sheet01.write(k + 1, 5, num[i][5]) sheet01.write(k + 1, 6, num[i][6]) sheet01.write(k + 1, 7, num[i][7]) sheet01.write(k + 1, 8, num[i][8]) f.save("雙色球統計結果.xls") if __name__ == '__main__': get_all_page() get_num()
運行後的結果:
說明:最後一頁數據較少,就沒要;如果想要修改成下面的代碼:
for page_num in range(1,all_page+1):
關註微信公眾號回復“彩票”獲取源代碼和數據
微信公眾號:
Python爬取彩票雙色球並存到把數據excel表格裏