python讀取word文件,插入mysql資料庫的示例程式碼
阿新 • • 發佈:2020-01-09
表格內容如下:
1、實現批量匯入word文件,取文件標題中的數字作為編號
2、除取上面打鉤的內容需要匹配出來入庫入庫,其他內容全部直接入庫mysql
# wuyanfeng # -*- coding:utf-8 -*- # 讀取docx中的文字程式碼示例 import docx import pymysql import re import os # 建立資料庫連結 conn = pymysql.connect( host='rm-bp1vu5d84dg12c6d59o.mysql.rds.aliyuncs.com',port=3306,user='root',passwd='wYf092415*',db='pays',charset='utf8',) # 建立遊標 cursor = conn.cursor() #切片函式 def section(info,key,len11): a = len(info) print(a,type(a)) d = [] e = 0 g = -1 i = 0 task_class=[] while i < len(info): # for i in range(len(info)): # i+=1 print("i::::",i) try: #c = info.index("a",e) #print("c:::::",c) c = info.index(key,c) print("c型別判斷",type(c)) except ValueError: print(ValueError) try: if (c != '') & (g < int(c)): d.append(c) g = c i = c + 1 print("illlldddd:",i) e = c + 1 continue elif (c == ''): break except UnboundLocalError: print(UnboundLocalError) return task_class break print("d",d,type(d)) print(d[0],type(d[0])) print("d的長度:",len(d)) #開始切片 if len(d) != 0: for j in range(len(d)): print("info11:::",info,type(info)) info = ''.join(info) print("info222:::",type(info)) print("d[%d]"%j,d[j]) #print("d[j]:5"%j,info[d[j]:5]) llll = info[d[j]+1:d[j]+5] print("d[%d]:5" % j,llll) task_class.append(llll) print("task_class::11",task_class) task_class=",".join(task_class) print("str1112222",task_class) return task_class def insettable(file): print("file::::::::::::::::::::",file) print("type::::::::::::::::::::",type(file)) # file1 = file # file1 = str(file1) ddd = re.findall("知識庫\\\(\d+)",file) print("ddd:::::::::::",ddd) print("ddd[0]:::",ddd[0]) ddd = int(ddd[0]) print("ddd::::",type(ddd)) file = docx.Document(file) # 讀取表格: t = file.tables[0] print(t) print("1:",t.cell(0,0).text) # 1 cell1 = t.cell(0,0).text print("tyep::::",type(t.cell(0,0).text)) print("2:",1).text) # 2 cell2 = t.cell(0,1).text print("2:",2).text) # 2 cell3 = t.cell(0,2).text print("2:",3).text) # 2 cell4 = t.cell(0,3).text print("cell4:::::::::",cell4) print("3:",t.cell(1,0).text) # 3 cell5 = t.cell(1,0).text print("4:",2).text) # 4 cell6 = t.cell(1,2).text print("5:",3).text) # 5 task_type = t.cell(1,3).text # task_type = re.findall('.*[☑√](.*)$',cell7) # task_type = ''.join(cell7) print("task_type111111:",task_type) # task_class = task_class[0:4] '''低階處理方式 a = int(task_type.count("☑")) print("a|||||||",a,type(a)) b = int(task_type.count("√")) print("b|||||||",b,type(a)) if (a == 1) | (b == 1): print("111111111111111111") # task_type = re.findall('.*[☑√](.*)$',task_type) task_type = re.findall('.*[☑√](.*)$',task_type) print("task_type1",task_type) task_type = ''.join(task_type) print("task_type2",task_type) task_type = task_type[0:4] print("task_type3:d:%s,b=%d" % (a,b),task_type) elif (a == 0) & (b == 0): print("2222222222222222222") task_type = '法定職責' print("a:%s,b=%s" % (a,task_type) elif (a == 2) | (b == 2): print("333333333333333333333") task_type = '法定職責,工作職責 ' print("a:%s,b=%s" % (a,task_type) ''' #呼叫切片函式 task_type1 = section(task_type,"√",4) task_type2 = section(task_type,"☑",4) task_type1 = "".join(task_type1) task_type2 = "".join(task_type2) print("task_type1:::",task_type1,type(task_type1)) print("task_type2:::",task_type2,type(task_type2)) if task_type1.strip()!="": task_type = task_type1 print("task_type111:::",task_type1) elif task_type2.strip()!="": task_type = task_type2 print("task_type222:::",task_type2) print("6:",4).text) # 6 cell8 = t.cell(1,4).text print("7:",t.cell(2,1).text) # 7 cell9 = t.cell(2,1).text # 獲取文件物件 # file = docx.Document("D:\\配置庫\\公案APP\\1.2 系統規格\\知識庫\\14人員死亡先期處置.docx") print("段落數:" + str(len(file.paragraphs))) # 段落數為13,每個回車隔離一段 lenn = len(file.paragraphs) print("len:",lenn) # 輸出每一段的內容 for para in file.paragraphs: print(para.text) # 輸出段落編號及段落內容 for i in range(len(file.paragraphs)): print("第" + str(i) + "段的內容是:" + file.paragraphs[i].text) list6 = [] for i in range(len(file.paragraphs)): if 0 == i: print("i:",i) lis0 = file.paragraphs[i].text print("list0:",lis0) print(type(lis0)) elif 1 == i: print("i:",i) task_class = file.paragraphs[i].text print("lis1",task_class,type(task_class)) '''低階處理方式 print("task_class111111:",task_class) c = int(task_class.count("☑")) task_class = ''.join(task_class) #print(task_class.index('☑')) print("c|||||||",c,type(c)) d = int(task_class.count("√")) print(task_class.index('√')) print("d|||||||",type(d)) task_class = re.findall(r'[☑√](?:.*)',task_class) task_class = ''.join(task_class) task_class = task_class[1:5] print("task_class",task_class) ''' #呼叫切片函式 task_class1 = section(task_class,4) task_class2 = section(task_class,4) task_class1 = "".join(task_class1) task_class2 = "".join(task_class2) print("task_class1:::",task_class1,type(task_class1)) print("task_class2:::",task_class2,type(task_class2)) if task_class1.strip()!="": task_class = task_class1 print("task_class11:::",task_class1) elif task_class2.strip()!="": task_class = task_class2 print("task_class22:::",task_class2) if 2 == i: print("i:",i) lis2 = file.paragraphs[i].text print("lis2",lis2) print(type(lis2)) preparer = re.findall('填表單位:(.*?)$',lis2) preparer = ''.join(preparer) print("preparer:%s" % preparer) # elif 3 == i: # print("i:",i) # lis3 = file.paragraphs[i].text elif 3 == i: print("i:",i) lis4 = file.paragraphs[i].text print("lis4",lis4) print(type(lis4)) elif 3 < i < lenn - 1: print("i:",i) print(file.paragraphs[i].text) print(type(file.paragraphs[i].text)) # list6[i-5] = list6.append(file.paragraphs[i].text) list6.append(str(file.paragraphs[i].text).strip('\xa0')) # list6.append("%s\n" % str(file.paragraphs[i].text).strip('\xa0')) print(list6) key_steps = "\n".join(list6) # print("key_steps:\n",key_steps.strip('\n')) cursor.execute( "insert into `t_knowledge_base` (`no`,`preparer`,`task_class`,`task_name`,`task_specification`,`task_type`,`task_desc`,`task_basis`,`key_steps`) values ('%d','%s',NULL,'%s')" % ( ddd,preparer,cell2,cell4,task_type,cell9,key_steps)) conn.commit() def traverse(f): fs = os.listdir(f) for f1 in fs: tmp_path = os.path.join(f,f1) if not os.path.isdir(tmp_path): print('檔案: %s' % tmp_path) insettable(tmp_path) else: print('資料夾:%s' % tmp_path) traverse(tmp_path) path = 'D:\\配置庫\公案APP\\1.2 系統規格\\知識庫' traverse(path) #單檔案調測 # path = 'D:\\配置庫\\公案APP\\1.2 系統規格\\知識庫\\14人員死亡先期處置.docx' # insettable(path) # 關閉遊標 cursor.close() # 關閉連線 conn.close()
以上這篇python讀取word文件,插入mysql資料庫的示例程式碼就是小編分享給大家的全部內容了,希望能給大家一個參考,也希望大家多多支援我們。