#! /usr/bin/python
# -*- coding:utf-8 -*-
import urllib2
import urllib
import re
from socket import error as SocketError
from cookielib import CookieJar
import zlib
class HTTP:
# =========================
# 获取html源码
# =========================
def getHtmlContent(self, url, headers, formData):
try:
# 处理cookie
cj = CookieJar()
cookieHandle = urllib2.HTTPCookieProcessor(cj)
opener = urllib2.build_opener(cookieHandle)
# 新增headers参数
opener.addheaders = headers
# post新增form参数
data_encoded = urllib.urlencode(formData)
r = opener.open(url, data_encoded, timeout=30)
d = r.read()
# gzip压缩的返回值,需求解压
gzipped = r.headers.get('Content-Encoding')
if gzipped:
html = zlib.decompress(d, 16 + zlib.MAX_WBITS)
else:
html = d
# result = chardet.detect(html)
# print(result)
content = html.decode("utf8")
# print content
return content
# self.writeFile(title.decode("utf8"), self.tool.replace(content))
except urllib2.HTTPError, e:
print 'HTTPError: ' + str(e.code)
return False
except urllib2.URLError, e:
print 'URLError: ' + str(e.reason)
return False
except SocketError as e:
print 'SocketError: ' + str(e.errno)
return False
except Exception as e:
print 'Exception' + str(e.message)
return False
# =========================
# 获取request headers参数
# =========================
def getHeaders(self, fileName, data):
try:
headers=[]
# 打开 request headers 文件
o = open(fileName, 'r')
for ln in o.readlines():
# 不能为空行
if ln != '\n' :
h = ln.split(':')
# 过滤行尾换行符
h[1] = re.sub(re.compile('\n'), '', h[1])
l = (h[0], h[1])
headers.append(l)
o.close()
# 合并file和传递的内容
headers.extend(data)
# print headers
return headers
except Exception as e:
print 'Exception' + str(e.message)
return []
# =========================
# 获取form data参数
# =========================
def getFormData(self, fileName, data):
try:
form = {}
# 打开 form data 文件
o = open(fileName, 'r')
for ln in o.readlines():
# 不能为空行
if ln != '\n':
h = ln.split(':')
# 过滤行尾换行符
h[1] = re.sub(re.compile('\n'), '', h[1])
form.setdefault(h[0], h[1])
o.close()
# 合并file和传递的内容
all = dict(form, **data)
# print all
return all
except Exception as e:
print 'Exception' + str(e.message)
return {}
# http = HTTP()
#
# data = {'bid':'1524', 'rid':'980349'}
# formData = http.getFormData('./form.txt', data);
#
# header = []
# headers = http.getHeaders('./header.txt', header)
#
# http.getHtmlContent('http://www.dongliuxiaoshuo.com/dongliu.php', headers, formData)