Python爬網常見方法:find_all與re的結合使用
阿新 • • 發佈:2020-07-16
import re from bs4 import BeautifulSoup htmlDoc='''<!DOCTYPE html><html><head><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><title>標題</title><link rel="stylesheet" href=""></head><body><h2>航天大學</h2><ol><li>abc</li><li id="myid">12344</li><li>12abcd34</li><li class="myred">55aaaa555</li><li class="myred">6789eee</li><li data-x="cs">fff</li><li>ggg</li><li>hhh</li><li>6789ABCD</li></ol></body></html>''' soup = BeautifulSoup(htmlDoc, "html.parser") print(soup.find_all(string=re.compile('航天'))) print(soup.find_all('meta',{'charset':re.compile('utf')})) print(soup.find_all(string=re.compile('\d'))) print(soup.find_all(string=re.compile('\D'))) print(soup.find_all(string=re.compile('^1'))) print(soup.find_all(string=re.compile('1\w\w4')))
re