diff --git a/py/D4.py b/py/D4.py new file mode 100644 index 0000000..cc198a5 --- /dev/null +++ b/py/D4.py @@ -0,0 +1,128 @@ +# coding=utf-8 + +import requests +from pyquery import PyQuery as pq +import pymysql +import time + + +def dbInsert(sqls): + db = pymysql.connect('localhost', 'root', 'eagle', 'db_4dd', charset='utf8') + + cursor = db.cursor() + try: + for i in sqls: + cursor.execute(i) + db.commit() + + except Exception as e: + db.rollback() + print(e) + + +def getUrl(): + url = 'https://zst.ssqzj.com/shdd/0-szs-pl5-disanwei-100.html' + content = requests.get(url) + tr_list = pq(content.text)('.charttab_bg') + sqls = [] + + # 记录最后一期的数据 + p = '' + a = '' + b = '' + c = '' + d = '' + e = '' + + for i in tr_list: + period = (pq(i)('.issue_list').text()) + nums = (pq(i)('.font_red').text()).split(' ') + + if nums is None or len(nums) != 5: + continue + + sql = 'REPLACE INTO t_peroid(id, peroid, a, b, c, d, e, updateTime) VALUES (UUID(), %s, %s, %s, %s, %s, %s, NOW());' % \ + (period, nums[0], nums[1], nums[2], nums[3], nums[4]) + sqls.append(sql) + + p = '"' + period + '"' + a = nums[0] + b = nums[1] + c = nums[2] + d = nums[3] + e = nums[4] + + print(len(sqls)) + + result = '"' + str(a) + ',' + str(b) + ',' + str(c) + ',' + str(d) + ',' + str(e) + '"' + # 取出最后一期,再计算历史数据的概率,加上最后一期拼成记录插入结果表 + getMost(p, result) + + dbInsert(sqls) + + +def getMost(p, result): + db = pymysql.connect('localhost', 'root', 'eagle', 'db_4dd', charset='utf8') + cursor = db.cursor() + try: + sql1 = 'SELECT a, count(*) as num FROM t_peroid GROUP BY a ORDER BY num DESC;' + sql2 = 'SELECT b, count(*) as num FROM t_peroid GROUP BY b ORDER BY num DESC;' + sql3 = 'SELECT c, count(*) as num FROM t_peroid GROUP BY c ORDER BY num DESC;' + sql4 = 'SELECT d, count(*) as num FROM t_peroid GROUP BY d ORDER BY num DESC;' + sql5 = 'SELECT e, count(*) as num FROM t_peroid GROUP BY e ORDER BY num DESC;' + + cursor.execute(sql1) + result1 = cursor.fetchall() + + cursor.execute(sql2) + result2 = cursor.fetchall() + + cursor.execute(sql3) + result3 = cursor.fetchall() + + cursor.execute(sql4) + result4 = cursor.fetchall() + + cursor.execute(sql5) + result5 = cursor.fetchall() + + a = '' + for i in result1: + a += (str(i[0]) + ',') + a = '"'+a[:-1]+'"' + + b = '' + for i in result2: + b += (str(i[0]) + ',') + b = '"'+b[:-1]+'"' + + c = '' + for i in result3: + c += (str(i[0]) + ',') + c = '"'+c[:-1]+'"' + + d = '' + for i in result4: + d += (str(i[0]) + ',') + d = '"'+d[:-1]+'"' + + e = '' + for i in result5: + e += (str(i[0]) + ',') + e = '"'+e[:-1]+'"' + + t = '"' + time.strftime('%Y-%m-%d', time.localtime()) + '"' + + sql = 'replace into t_result (id, peroid, result, aCode, bCode, cCode, dCode, eCode, codeDay) VALUES (uuid(), %s, %s, %s, %s, %s, %s, %s, %s);' % \ + (p, result, a, b, c, d, e, t) + + cursor.execute(sql) + db.commit() + print(sql) + + except Exception as e: + print(e) + + +# 先获取今天的数据,取出最后一期,再计算历史数据的概率,加上最后一期拼成记录插入结果表,再把爬取的数据写入期数表 +getUrl()