Scrapy配置MySQL

配置Item导入MySQL

同步插入数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import MySQLdb.cursors
from scrapy.utils.project import get_project_settings
# 同步MYSQL
class MySQLPipeline(object):
def open_spider(self,spider):
setting=get_project_settings()
db = setting.get('MYSQL_DB_NAME', 'lagou_scrapy')
host = setting.get('MYSQL_HOST', 'localhost')
port = setting.get('MYSQL_PORT', 3306)
user = setting.get('MYSQL_USER', 'root')
passwd = setting.get('MYSQL_PASSWORD', '123456')
self.db_conn=MySQLdb.connect(host=host,port=port,db=db,user=user,passwd=passwd,charset='utf8')
self.db_cur=self.db_conn.cursor()
def close_spider(self,spider):
self.db_conn.commit()
self.db_conn.close()
def process_item(self,item,spider):
self.insert_db(item)
return item
def insert_db(self,itme):
values=(itme['positionName'],
itme['companyShortName'],
itme['salary'],
itme['positionAdvantage'],
)
sql='INSERT INTO java_beijing (positionName, companyShortName, salary, positionAdvantage) VALUES ("%s", "%s", "%s", "%s")'
self.db_cur.execute(sql,values)

异步插入数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from twisted.enterprise import adbapi
import MySQLdb.cursors
from scrapy.utils.project import get_project_settings
# 异步 MYSQL
class MySQLAsyncPipeline(object):
def __init__(self):
setting=get_project_settings()
db = setting.get('MYSQL_DB_NAME', 'lagou_scrapy')
host = setting.get('MYSQL_HOST', 'localhost')
port = setting.get('MYSQL_PORT', 3306)
user = setting.get('MYSQL_USER', 'root')
passwd = setting.get('MYSQL_PASSWORD', '123456')
self.dbpool=adbapi.ConnectionPool('MySQLdb',host=host,db=db,user=user,passwd=passwd,charset='utf8')
def close_spider(self,spider):
self.dbpool.close()
def process_item(self,item,spider):
self.dbpool.runInteraction(self.insert_db,item)
return item
def insert_db(self,tx,itme):
values=(itme['positionName'],
itme['companyShortName'],
itme['salary'],
itme['industryField'],
itme['positionAdvantage'],
itme['workYear'],
itme['education'],
itme['jobNature'],
itme['positionId'],
itme['createTime'],
itme['city'],
itme['district'],
itme['companyFullName'],
itme['financeStage'],
itme['companySize'],
itme['info']
)
sql='INSERT IGNORE INTO java_beijing (positionName, companyShortName, salary, industryField, positionAdvantage, workYear, education, jobNature, positionId, createTime, city, district, companyFullName, financeStage, companySize, info) VALUES ("%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")'
tx.execute(sql,values)