# -*- coding:utf8 -*-
from pyquery import PyQuery as pq
import requests
import pymysql
import time
host = 'localhost'
user = 'root'
# pwd = 'eagle666'
pwd = 'lottery@2017'
db_name = 'db_4dd'
############################################################################
################### 获取2004-当天的所有记录 #############################
############################################################################
# 写入数据
def replaceDB(sqls):
db = pymysql.connect(host, user, pwd, db_name, charset='utf8')
cursor = db.cursor()
try:
for i in sqls:
cursor.execute(i)
db.commit()
except Exception as e:
db.rollback()
print(e)
# 获取所有历史记录
def getHtmlResult(year):
url = 'https://kaijiang.78500.cn/p5/'
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'content-type': 'application/x-www-form-urlencoded',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
data = {
'startqi': '2019001',
'endqi': '2019351',
'year': year,
'action': 'years'
}
content = requests.post(url=url, headers=headers, data=data)
if content.status_code == 200:
html = content.text
trList = pq(html)('.kjls tbody tr')
sqls = []
for tr in trList:
tdList = pq(tr)('td')
period = tdList[0].text
if period is not None:
date = tdList[1].text
result = pq(tdList[3])('a').text().split(' ')
# print(period)
# print(date)
# print(result)
sql = "REPLACE INTO t_history (id, period, a, b, c, d, e, createTime) VALUES (UUID(), '%s', '%s', '%s', '%s', '%s', '%s', '%s');" % \
(period, result[0], result[1], result[2], result[3], result[4], date)
sqls.append(sql)
print(sqls)
replaceDB(sqls)
def startAll():
# 获取从2004-当前的所有数据
for i in range(0, 17):
y = (2004 + i)
getHtmlResult(str(y))
############################################################################
##################### 获取当天记录 ######################################
############################################################################
def startToday():
url = 'https://kaijiang.78500.cn/p5/'
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'content-type': 'application/x-www-form-urlencoded',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
content = requests.get(url=url, headers=headers)
if content.status_code == 200:
html = content.text
trList = pq(html)('.kjls tbody tr')
sqls = []
date = time.strftime('%Y-%m-%d', time.localtime())
for tr in trList:
tdList = pq(tr)('td')
period = tdList[0].text
if period is not None and date == tdList[1].text:
result = pq(tdList[3])('a').text().split(' ')
sql = "REPLACE INTO t_history (id, period, a, b, c, d, e, createTime) VALUES (UUID(), '%s', '%s', '%s', '%s', '%s', '%s', '%s');" % \
(period, result[0], result[1], result[2], result[3], result[4], date)
sqls.append(sql)
break
if len(sqls) > 0:
replaceDB(sqls)
print(date + ' SUCCESS!')
############################################################################
################### 生成2020全部结果的概率表数据 #########################
############################################################################
# 查询数据
def queryDB(sql):
db = pymysql.connect(host, user, pwd, db_name, charset='utf8')
cursor = db.cursor()
try:
cursor.execute(sql)
return cursor.fetchall()
except Exception as e:
print(e)
return ''
def generateDayResult(day):
# day = i[0]
limit = [100, 50, 30, 10]
codes = []
for j in limit:
sqlA = '''
SELECT a, count(*) as num FROM (
SELECT * FROM t_history WHERE DATE_FORMAT(createTime,'%%Y-%%m-%%d') < '%s' ORDER BY createTime DESC limit %d
) t GROUP BY a ORDER BY num DESC, a;
''' % (day, j)
sqlB = '''
SELECT b, count(*) as num FROM (
SELECT * FROM t_history WHERE DATE_FORMAT(createTime,'%%Y-%%m-%%d') < '%s' ORDER BY createTime DESC limit %d
) t GROUP BY b ORDER BY num DESC, b;
''' % (day, j)
sqlC = '''
SELECT C, count(*) as num FROM (
SELECT * FROM t_history WHERE DATE_FORMAT(createTime,'%%Y-%%m-%%d') < '%s' ORDER BY createTime DESC limit %d
) t GROUP BY C ORDER BY num DESC, c;
''' % (day, j)
sqlD = '''
SELECT d, count(*) as num FROM (
SELECT * FROM t_history WHERE DATE_FORMAT(createTime,'%%Y-%%m-%%d') < '%s' ORDER BY createTime DESC limit %d
) t GROUP BY d ORDER BY num DESC, d;
''' % (day, j)
aList = queryDB(sqlA)
bList = queryDB(sqlB)
cList = queryDB(sqlC)
dList = queryDB(sqlD)
codeA = ''
for c in aList:
codeA += str(c[0]) + ','
codes.append(codeA[:-1])
codeB = ''
for c in bList:
codeB += str(c[0]) + ','
codes.append(codeB[:-1])
codeC = ''
for c in cList:
codeC += str(c[0]) + ','
codes.append(codeC[:-1])
codeD = ''
for c in dList:
codeD += str(c[0]) + ','
codes.append(codeD[:-1])
insertSQL = '''
REPLACE INTO t_compare
(id, day, A100, B100, C100, D100, A50, B50, C50, D50, A30, B30, C30, D30, A10, B10, C10, D10)
VALUES
(UUID(), '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');
''' % (
day, codes[0], codes[1], codes[2], codes[3], codes[4], codes[5], codes[6], codes[7], codes[8], codes[9], codes[10],
codes[11], codes[12], codes[13], codes[14], codes[15])
return insertSQL
def generateAll():
# 获取2020所有结果
sql = '''
SELECT createTime FROM t_history WHERE createTime like '2020%' ORDER BY createTime DESC;
'''
timeList = queryDB(sql)
updateSQLs = []
for i in timeList:
updateSQLs.append(generateDayResult(i[0]))
replaceDB(updateSQLs)
############################################################################
################### 生成当天的概率表数据 #########################
############################################################################
def generate():
# 生成当天未出结果前的概率值
today = time.strftime('%Y-%m-%d', time.localtime())
hour = time.strftime('%H', time.localtime())
# 每天20:40出结果,20点之前可以生成当天概率数据,不会包含今天的结果
# 定时任务放在:2:00
if int(hour) < 20:
updateSQLs = []
updateSQLs.append(generateDayResult(today))
replaceDB(updateSQLs)
print(today + ' SUCCESS!')
# generateAll()
generate()
# startAll()
startToday()