# coding=utf-8
import requests
from pyquery import PyQuery as pq
import pymysql
import time
def dbInsert(sqls):
db = pymysql.connect('localhost', 'root', 'eagle', 'db_4dd', charset='utf8')
cursor = db.cursor()
try:
for i in sqls:
cursor.execute(i)
db.commit()
except Exception as e:
db.rollback()
print(e)
def getUrl():
url = 'https://zst.ssqzj.com/shdd/0-szs-pl5-disanwei-100.html'
content = requests.get(url)
tr_list = pq(content.text)('.charttab_bg')
sqls = []
# 记录最后一期的数据
p = ''
a = ''
b = ''
c = ''
d = ''
e = ''
for i in tr_list:
period = (pq(i)('.issue_list').text())
nums = (pq(i)('.font_red').text()).split(' ')
if nums is None or len(nums) != 5:
continue
sql = 'REPLACE INTO t_peroid(id, peroid, a, b, c, d, e, updateTime) VALUES (UUID(), %s, %s, %s, %s, %s, %s, NOW());' % \
(period, nums[0], nums[1], nums[2], nums[3], nums[4])
sqls.append(sql)
p = '"' + period + '"'
a = nums[0]
b = nums[1]
c = nums[2]
d = nums[3]
e = nums[4]
print(len(sqls))
result = '"' + str(a) + ',' + str(b) + ',' + str(c) + ',' + str(d) + ',' + str(e) + '"'
# 取出最后一期,再计算历史数据的概率,加上最后一期拼成记录插入结果表
getMost(p, result)
dbInsert(sqls)
def getMost(p, result):
db = pymysql.connect('localhost', 'root', 'eagle', 'db_4dd', charset='utf8')
cursor = db.cursor()
try:
sql1 = 'SELECT a, count(*) as num FROM t_peroid GROUP BY a ORDER BY num DESC;'
sql2 = 'SELECT b, count(*) as num FROM t_peroid GROUP BY b ORDER BY num DESC;'
sql3 = 'SELECT c, count(*) as num FROM t_peroid GROUP BY c ORDER BY num DESC;'
sql4 = 'SELECT d, count(*) as num FROM t_peroid GROUP BY d ORDER BY num DESC;'
sql5 = 'SELECT e, count(*) as num FROM t_peroid GROUP BY e ORDER BY num DESC;'
cursor.execute(sql1)
result1 = cursor.fetchall()
cursor.execute(sql2)
result2 = cursor.fetchall()
cursor.execute(sql3)
result3 = cursor.fetchall()
cursor.execute(sql4)
result4 = cursor.fetchall()
cursor.execute(sql5)
result5 = cursor.fetchall()
a = ''
for i in result1:
a += (str(i[0]) + ',')
a = '"'+a[:-1]+'"'
b = ''
for i in result2:
b += (str(i[0]) + ',')
b = '"'+b[:-1]+'"'
c = ''
for i in result3:
c += (str(i[0]) + ',')
c = '"'+c[:-1]+'"'
d = ''
for i in result4:
d += (str(i[0]) + ',')
d = '"'+d[:-1]+'"'
e = ''
for i in result5:
e += (str(i[0]) + ',')
e = '"'+e[:-1]+'"'
t = '"' + time.strftime('%Y-%m-%d', time.localtime()) + '"'
sql = 'replace into t_result (id, peroid, result, aCode, bCode, cCode, dCode, eCode, codeDay) VALUES (uuid(), %s, %s, %s, %s, %s, %s, %s, %s);' % \
(p, result, a, b, c, d, e, t)
cursor.execute(sql)
db.commit()
print(sql)
except Exception as e:
print(e)
# 先获取今天的数据,取出最后一期,再计算历史数据的概率,加上最后一期拼成记录插入结果表,再把爬取的数据写入期数表
getUrl()