#! /usr/bin/python
# -*- coding:utf-8 -*-
import time
import re
import http
import tools
class START:
def __init__(self):
self.http = http.HTTP()
self.tool = tools.Tool()
def writeFile(self, title, content):
with open('./s.txt', 'ab+') as f:
f.write(unicode.encode(title, 'utf-8') + '\n' + unicode.encode(content, 'utf-8') + '\n\n')
def getContent(self, url, title):
urls = url.split('.')
a = urls[0].split('/')
data = {'bid': a[2], 'rid': a[3]}
formData = self.http.getFormData('./form.txt', data);
header = []
headers = self.http.getHeaders('./header.txt', header)
html = self.http.getHtmlContent('http://www.dongliuxiaoshuo.com/dongliu.php', headers, formData)
if html == False:
print url
print title
self.getContext(url, title)
return
d = self.tool.replace(html)
self.writeFile(title, d)
def getList(self):
header = []
headers = self.http.getHeaders('./header.txt', header)
html = self.http.getHtmlContent('http://www.dongliuxiaoshuo.com/n/1524.html', headers, {})
pattern = re.compile(
'<li class="list-group-item col-lg-3 col-md-3 col-sm-4 col-xs-6 book"><a href="(.*?)" title="(.*?)"', re.S)
results = re.findall(pattern, html)
for i in results:
self.getContent(i[0], i[1])
start = START()
t1 = time.time()
start.getList()
t2 = time.time() - t1
print '总共用时:' + str(t2)