scrapy mysql的同步插入與非同步插入
阿新 • • 發佈:2018-12-16
主要程式碼是在Pipeline中進行編寫,上完整程式碼:
同步插入程式碼:
# 同步插入
class MysqlPipeline2(object):
"""
同步操作
"""
def __init__(self):
# 建立連線
self.conn = pymysql.connect('localhost', 'root', 'Abcd1234', 'test') # 有中文要存入資料庫的話要加charset='utf8'
# 建立遊標
self.cursor = self.conn.cursor( )
def process_item(self, item, spider):
# sql語句
insert_sql = """
insert into test_zxf(quote,author,tags,born_date,born_location) VALUES(%s,%s,%s,%s,%s)
"""
# 執行插入資料到資料庫操作
self.cursor.execute(insert_sql, (item['quote'], item['author'], item['tags'] , item['born_date'],
item['born_location']))
# 提交,不進行提交無法儲存到資料庫
self.conn.commit()
def close_spider(self, spider):
# 關閉遊標和連線
self.cursor.close()
self.conn.close()
mysql非同步插入程式碼:
class MysqlPipeline(object):
def __init__(self):
self.host = settings.DB_HOST
self.port = settings.DB_PORT
self.user = settings.DB_USER
self.pwd = settings.DB_PWD
self.name = settings.DB_NAME
self.charset = settings.DB_CHARSET
self.connect()
def connect(self):
self.conn = pymysql.connect(host=self.host,
port=self.port,
user=self.user,
password=self.pwd,
db=self.name,
charset=self.charset)
self.cursor = self.conn.cursor()
def close_spider(self, spider):
self.conn.close()
self.cursor.close()
def process_item(self, item, spider):
try:
print('在插入 *************************************** ')
# sql = 'insert into zb_contest_data(craw_date, cd_filename, cd_content, create_time) values("%s", "%s", "%s", "%s")' % (
# item['craw_date'], item['file_name'], str(item['file_content']), item['create_time'])
file_count = pymysql.escape_string(item['file_content'])
sql = 'insert into zb_contest_data(craw_date, cd_filename, cd_content, create_time) values("%s", "%s", "%s", "%s")' % (
item['craw_date'], item['file_name'], file_count, item['create_time'])
# sql = 'insert into zb_contest_data(craw_date, cd_filename, cd_content, create_time) values("%s", "%s", "%s", "%s")' % (
# item['craw_date'], item['file_name'], item['file_content'], item['create_time'])
# 執行sql語句
self.cursor.execute(sql)
self.conn.commit()
pass
except Exception as e:
print(e)
pass
return item
以上就是scrapy中同步與非同步插入的程式碼片段