python通過下載連結可以下載成excel,直接將資料寫入資料庫中
阿新 • • 發佈:2018-11-02
from urllib.request import urlopen import sys import datetime import psycopg2 import os # 用來操作資料庫的類 class GPCommand(object): # 類的初始化 def __init__(self): self.hostname = 'XXXXXXXXX' self.username = 'XXXXXX' self.password = 'XXXXXXXXX' self.database = 'XXXX' def connectGp(self): try: #連結資料庫 #讀取配置利用connect連結資料庫 self.connect = psycopg2.connect( host=self.hostname, user=self.username, password=self.password, dbname=self.database ) #建立一個新的cursor self.cursor = self.connect.cursor() print("connect gp successful."+'\n' + '資料庫連線成功') return ('con_successful') except psycopg2.Error: error = 'Failed to setup Postgres environment.\n{0}'.format(sys.exc_info()) print('connect gp error.'+'\n' + '資料庫連線失敗') return 'con_error'+ error #關閉資料庫 def closeMysql(self): self.cursor.close() self.connect.close() print("資料庫已關閉") #插入資料 def insert_data(self,dict): try: date = escape_character(dict['date']) name = escape_character(dict['name']) email = escape_character(dict['email']) institution = escape_character(dict['institution']) validatecode = escape_character(dict['validatecode']) formhash = escape_character(dict['formhash']) return_message = escape_character(dict['return_message']) return_url = escape_character(dict['return_url']) url_type = dict['url_type'] insertsql = "INSERT INTO dw_ana.whitepaper_application_note_leads_report (date,name,email,institution,validatecode,formhash,return_message,return_url,load_dt,url_type) " \ "values('%s','%s','%s','%s','%s','%s','%s','%s',now(),'%s') " % (date,name, email, institution, validatecode, formhash, return_message,return_url,url_type) self.cursor.execute(insertsql) self.connect.commit() #print('success') except Exception as e: print(e) os._exit(0) def delete_date(self): delete_sql = 'delete from dw_ana.whitepaper_application_note_leads_report ' \ 'where to_char(cast(date as date),\'YYYY-MM\') = to_char(cast(CURRENT_DATE - interval \'1 month\' as date),\'YYYY-MM\')' self.cursor.execute(delete_sql) def escape_character(string): script = '' script_tmp = string if "'" in script_tmp: nops = [] new_loop = [] itemplist = list(script_tmp) for i in range(len(itemplist)): if itemplist[i] == "'": nops.append(i) for item in nops: new_loop.append(item + nops.index(item)) for i in new_loop: itemplist.insert(i, "'") script = "".join(itemplist) else: script = script_tmp return script def last_month(now_time): last_month = now_time.month - 1 last_year = now_time.year if last_month == 0: last_month = 12 last_year -= 1 month_time = datetime.datetime(month=last_month, year=last_year, day=now_time.day) return month_time def main(): today = datetime.date.today() today = last_month(today) #tod = today.replace(month = 9) #str_day = today.strftime("%Y%m") str_day = today.strftime("%Y%m") # print(str_day) gpCommand = GPCommand() connect_result = gpCommand.connectGp() gpCommand.delete_date() urls = ['https://www.xxxxx.com/monthly/down?file=protein-white-paper11','https://www.xxxx.com/monthly/down?file=protein-application-note11'] # for url in urls: url_type = url.split('=')[1] if url == 'https://www.xxxxxx.com/monthly/down?file=protein-white-paper': results = [] content = urlopen(url + str_day + '.xls').read() info = content.decode('utf-8') res = info.split('\r\n') for i in range(1,len(res)): result = res[i].split('\t') # print(result) if len(result) < 7 : pass else: results.append(result) for item in results: #print(item) dict = { "date": item[0], "name":item[1][1:len(item[1])-1], "email":item[2][1:len(item[2])-1], "institution":item[3][1:len(item[3])-1], "validatecode":item[4][1:len(item[4])-1], "formhash":item[5][1:len(item[5])-1], "return_message":item[6][1:len(item[6])-1], "return_url":item[7][1:len(item[7])-1], "url_type":url_type } gpCommand.insert_data(dict) else: results = [] content = urlopen(url + str_day + '.xls').read() info = content.decode('utf-8') # print(info) res = info.split('\r\n') for i in range(1, len(res)): result = res[i].split('\t') # print(result) if len(result) < 6: pass else: results.append(result) for item in results: dict1 = { #'date', 'name', 'email', 'institution', 'formhash', 'return_message', 'return_url' "date": item[0], "name": item[1][1:len(item[1]) - 1], "email": item[2][1:len(item[2]) - 1], "institution": item[3][1:len(item[3]) - 1], "validatecode": '', "formhash": item[4][1:len(item[4]) - 1], "return_message": item[5][1:len(item[5]) - 1], "return_url": item[6][1:len(item[6]) - 1], "url_type": url_type } gpCommand.insert_data(dict1) gpCommand.closeMysql() # 關閉連線 main()