python內建模組之正則re模組&collections模組&time&datetime模組

阿新 • • 發佈：2021-11-25

1. re模組

在python中想使用正則必須藉助於模組，而 re就是其中之一。

import re

# 1. re.findall('正則表示式', '待匹配的文字')
# 根據正則匹配出所有符合條件的資料
res = re.findall('a', 'eva jason joshua jack')
print(res)  # 返回值是列表，沒找到則返回空列表[]
# ['a', 'a', 'a', 'a']

# 2. re.search('正則表示式', '待匹配的文字')
# 根據正則匹配到一個符合條件的就結束
res1 = re.search('a', 'eva jason joshua jack')
print(res1) # <re.Match object; span=(2, 3), match='a'>
# 產生結果物件，如果沒有符合條件的資料，返回None，並且使用group會直接報錯。
print(res1.group())  # 獲取真正的結果 ：a


# 3. re.match('正則表示式','待匹配的文字')
# 根據正則從 頭 開始匹配，（文字內容必須在開頭匹配上）
res2 = re.match('a', 'badc')
print(res2)
# 產生結果物件，如果沒有符合條件的資料，返回None，並且使用group會直接報錯。
print(res2.group())  # 獲取真正的結果
if res2:
	print(res2.group())
else:
	print('不好意思，沒找到')

re模組其他方法

import re

1.
res = re.split('[ab]','abcd')
print(res)   # ['','','cd']
# 以a和b為切割條件分割字串


2.
res1 = re.sub('\d','H','eva3jason4yuan4', 1)
# 替換正則匹配到的內容，可以控制匹配個數。
print(res1)  # evaHjason4yuan4
# 類似於字串型別的replace方法，

3.
res2 = re.subn('\d','H','eva3jason4yuan4')
print(res2)  # ('evaHjasonHyuanH', 3)
# 替換並告訴我們用sub替換了幾處內容，返回的是元組

4."""常用的"""
obj = re.compile('\d{3}')
# 該方法用於將正則表示式預編譯，後續重複千萬次使用的時候，可以直接用！
res3 = obj.search('ajcj23o8a933')
res4 = obj.findall('lo837k38274')
print(res3.group(), res4)  # 933 ['837', '382']


5."""常用的"""
res5 = re.finditer('\d', 'ds3s4784a')
print(res)  # 返回迭代器物件
# 先for迴圈再group取值
print( [i.group() for i in res ] )
# 找不到時返回空列表

re擴充套件---分組優先機制

import re

1.
res = re.search('^[1-9](\d{14})(\d{2}[0-9x])?$','110105199812067023')
print(res.group())  # 110105199812067023
print(res.group(1))  # 10105199812067
print(res.group(2))  # 023

2. re.findall針對分組優先展示，無名分組
# 重要
res1 = re.findall('^[1-9]\d{14}(\d{2}[0-9x])?$','110105199812067023')
print(res1)  # [023]

res2 = re.findall('^[1-9]\d{14}(?:\d{2}[0-9x])?$','110105199812067023')
print(res2)  # ['110105199812067023']
# 括號內最前方加 ?: 取消分組優先展示


3. 起別名，有名分組，小括號
# 重要
res3 = re.search('^[1-9](?P<othername>\d{14})(?P<kkk>\d{2}[0-9x])?$','110105199812067023')
# ?P<別名>
print(res3)
#<re.Match object; span=(0, 18), match='110105199812067023'>

print(res3.group()) # 110105199812067023

print(res3.group(1)) # 10105199812067
print(res3.group('othername')) # 10105199812067
print(res3.group('kkk'))  # 023

4.
res4 = re.findall('^[1-9](?P<xxx>\d{14})(?P<kkk>\d{2}[0-9x])?$','110105199812067023')
print(res4)  # [('10105199812067', '023')]

re實戰之爬取紅牛分公司資料

import re


# 讀取待匹配的資料
with open(r'redbull.txt', 'r', encoding='utf8') as f:
    # redbull.txt為讀取到的紅牛分支公司網頁原始碼
    data = f.read()
    # print(data)
    # 利用正則匹配資料
    # 分公司名稱
    title_list = re.findall('<h2>(.*?)</h2>', data)
    # print(title_list)
    # 分公司地址
    address_list = re.findall("<p class='mapIco'>(.*?)</p>", data)
    # print(address_list)
    # 分公司郵箱
    email_list = re.findall("<p class='mailIco'>(.*?)</p>", data)
    # print(email_list)
    # 分公司電話
    phone_list = re.findall("<p class='telIco'>(.*?)</p>", data)

res = zip(title_list, address_list, email_list, phone_list)
for data_tuple in res:
    print("""
    公司名稱：{}，
    公司地址：{}，
    公司郵編：{}，
    公司電話：{}
    """.format(data_tuple[0], data_tuple[1], data_tuple[2], data_tuple[3]))

4. collections模組

該模組內部提供了一些高階的資料型別

namedtuple: 具名元組

from collections import namedtuple
"""
namedtuple('名稱',[名字1,名字2...])
namedtuple('名稱',[名字1,名字2...])
"""
# 基本用法
point = namedtuple('座標',['東經','北緯'])
res = point(118.88, 23.5)
print(res)
print(res.東經)
print(res.北緯)
"""
座標(東經=116.88, 北緯=23.5)
116.88
23.5"""

point1 = namedtuple('座標','x y z')
res = potin1(1,2,3)
print(res)
print(res.x)
print(res.y)
print(res.z)

"""
座標(x=1, y=2, z=3)
1
2
3
"""

duque：佇列與雙端佇列

import queue
from collections import deque

# 初始化佇列
q = queue.Queue(5)  # 5個位置
# 往佇列中新增元素
q.put('fir')
q.put('sec')
q.put('thr')

# 從佇列中獲取元素
print(q.get())
print(q.get())
print(q.get())
print(q.get())  # 值 取沒了就會原地等待


# 雙端佇列
q = deque([11,22,33])
print(q)
q.append(44)        # 從右邊新增
q.appendleft(55)    # 從左邊新增

print(q.pop())      # 從右邊取值
print(q.popleft())  # 從左邊取值

Counter：計數器，主要用來計數

from collections import Couter

# 統計各字元出現的次數
res = 'acbakjcabhacbakcbalcbacbaacdfha'
new_dict = {}
for i in res:
	if i not in new_dict:
		new_dict[i] = 1
	else:
		new_dict[i] += 1
print(new_dict)

ret = Counter(res)
print(ret)
"""
{'a': 6, 'b': 6, 'c': 5, 'h': 3}
Counter({'a': 6, 'b': 6, 'c': 5, 'h': 3})
"""

OrderedDict：有序字典

from collections import OrderedDict
normal_dict = dict([('name','joshua'),('pwd',123),('hobby','study')])
print(normal_dict)

order_dict = OrderedDict([('name','joshua'),('pwd',123),('hobby','study')])
print(order_dict)
OrderedDict 的Key會按照插入的順序，不是按照key排序

defaultdict：帶有預設值的字典

l = [11,22,33,44,55,66,77,88,99]
li_dic = {'k1':[], 'k2':[]}

for i in l:
	if i > 66:
		li_dic['k1'].append(i)
	else:
		li_dic['k2'].append(i)

# 預設值字典
from collections import defaultdict
values = [11,22,33,44,55,66,77,88,99]
my_dict = defaultdict(list)
for value in values:
	if value > 60:
		my_dict['k1'].append(value)
	else:
		my_dict['k2'].append(value)
print(my_dict)

5. time模組

時間三種表現形式：

時間戳（秒數）1970-1-1開始
結構化時間（一般給機器看的）
格式化時間（一般給人看的）
三種時間是可以相互轉換的！！！

import time

time.sleep(3)  # 原地阻塞指定秒數
time.time()    # 獲取時間戳時間

# 格式化時間
print(time.strftime('%Y-%m-%d'))
print(time.strftime('%Y-%m-%d %H:%M:%S'))
print(time.strftime('%Y-%m-%d %X'))  # 同上

# 更多時間相關符號，存在易查詢的位置。

# 結構化時間
print(time.localtime())  # 本地時間
print(time.gmtime())  # utf時間，能將時間戳轉換成結構化時間

6. datetime模組

from datetime import date,datetime, timezone, timedelta

print(date.today())
print(datetime.today())
# 等同於print(datetime.now())
"""
date 年月日
datetime 年月日時分秒
time 時分秒
"""
res = datetime.today()
print(res.year)
print(res.month)
print(res.day)
print(res.weekday())      # 星期0-6
print(res.isoweekday())   # 星期1-7

"""時間差 timedelta"""
ctime = datetime.today()
time_tel = timedelta(days=2)
print(ctime - time_tel)
print(time_tel + time)

"""
日期物件 = 日期物件 +/- timedelta物件
timedelta物件 = 日期物件 +/- 日期物件
"""

print(datetime.now())
print(datetime.utcnow())


# 格式轉換
# 字串格式時間 ---> datetime格式時間
text = '2021-11-11'
v1 = datetime.strptime(text, '%Y-%m-%d')
print(v1)

# datetime格式  ---> 轉換為字串格式
v1 = datetime.now()
val = v1.strftime('%Y-%m-%d %H:%M:%S')
print(val)

# 時間戳格式 ---> 轉換為datetime格式
ctime = time.time()
v1 = datetime.fromtimestamp(ctime)
print(v1)

# datetime格式 ---> 轉換為時間戳格式
v1 = datetime.now()
val = v1.timestamp()
print(val)