py1/shizhan/HttpMethod/start.py at cbc0515d7d0e9c83c705060032609a17cf08b74f

Fork: 0

bello / py1

Find file

Newer

Older

py1 / shizhan / HttpMethod / start.py

ubt on 11 May 2017 1 KB first

Raw Blame History

#! /usr/bin/python
# -*- coding:utf-8 -*-
import time
import re
import http
import tools


class START:
    def __init__(self):
        self.http = http.HTTP()
        self.tool = tools.Tool()

    def writeFile(self, title, content):
        with open('./s.txt', 'ab+') as f:
            f.write(unicode.encode(title, 'utf-8') + '\n' + unicode.encode(content, 'utf-8') + '\n\n')


    def getContent(self, url, title):
        urls = url.split('.')
        a = urls[0].split('/')
        data = {'bid': a[2], 'rid': a[3]}
        formData = self.http.getFormData('./form.txt', data);

        header = []
        headers = self.http.getHeaders('./header.txt', header)
        html = self.http.getHtmlContent('http://www.dongliuxiaoshuo.com/dongliu.php', headers, formData)
        if html == False:
            print url
            print title
            self.getContext(url, title)
            return

        d = self.tool.replace(html)
        self.writeFile(title, d)


    def getList(self):
        header = []
        headers = self.http.getHeaders('./header.txt', header)
        html = self.http.getHtmlContent('http://www.dongliuxiaoshuo.com/n/1524.html', headers, {})

        pattern = re.compile(
            '<li class="list-group-item col-lg-3 col-md-3 col-sm-4 col-xs-6 book"><a href="(.*?)" title="(.*?)"', re.S)
        results = re.findall(pattern, html)
        for i in results:
            self.getContent(i[0], i[1])



start = START()
t1 = time.time()
start.getList()
t2 = time.time() - t1
print '总共用时：' + str(t2)