import requests
from pyquery import PyQuery as pq
import time
class PROXY:
def __init__(self):
self.ips = []
def get(self):
f = self.read()
if f == '':
self.start0()
self.start1()
self.start2()
self.start3()
self.start4()
print('proxy numbers: ' + str(len(self.ips)))
self.save(self.ips)
else:
self.ips = eval(f)
return self.ips
def read(self):
try:
f = open('proxy.txt', 'r+', encoding='utf-8')
str = f.read()
f.close()
return str
except:
return ''
def save(self, ips):
f = open('proxy.txt', 'w', encoding='utf-8')
f.write(str(ips))
f.close()
def test_proxy(self, proxy):
try:
userAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
head = {}
head['User-Agent'] = userAgent
response = requests.get(url='http://ip.haschek.at/', headers=head, timeout=15, proxies=proxy)
if response.status_code != 200:
print('change proxy')
return ''
elif str(proxy).find(response.text) != -1:
print('ok')
return proxy
else:
return ''
except Exception as e:
print(e)
return ''
def request_html(self, url):
try:
agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
head = {'User-Agent': agent}
res = requests.get(url=url, headers=head, timeout=15)
if res.status_code != 200:
print('request error')
return ''
return res.text
except Exception as e:
print(e)
def start0(self):
try:
print('start 000 xsdaili.com...')
url = 'http://www.xsdaili.com/'
index_html = self.request_html(url)
if index_html == '':
return
content = pq(index_html)
urls = content('.table.table-hover.panel-default.panel.ips')
# ips = []
for i in range(0, 4):
sub_url = pq(urls[i])('.title').find('a').attr('href')
sub_html = self.request_html(url + sub_url)
content = pq(sub_html)('.panel-body .cont').text()
c = content.splitlines()
for j in c:
if j != '':
a1 = j.split('@')
if len(a1) == 2:
ip = a1[0]
a2 = a1[1].split('#')
h = a2[0].lower()
self.ips.append({h: h+'://'+ip})
# return ips
except Exception as e:
print(e)
def start1(self):
try:
print('start 111 qydaili.com...')
# ips = []
urls = ['http://www.qydaili.com/free/?action=china&page=', 'http://www.qydaili.com/free/?action=unchina&page=']
for u in urls:
for page in range(1, 10):
url = u + str(page)
index_html = self.request_html(url)
if index_html == '':
return
content = pq(index_html)
trs = content('.table.table-bordered.table-striped tbody tr')
for i in trs:
ip = pq(i).find('td')[0].text
port = pq(i).find('td')[1].text
type = pq(i).find('td')[3].text.lower()
self.ips.append({type: type+'://'+ip + ':' + port})
time.sleep(2)
# return ips
except Exception as e:
print(e)
def start2(self):
try:
print('start 222 kuaidaili.com...')
# ips = []
for page in range(1, 4):
url = 'https://www.kuaidaili.com/free/inha/'+str(page)+'/'
index_html = self.request_html(url)
if index_html == '':
return
content = pq(index_html)
trs = content('.table.table-bordered.table-striped tbody tr')
for i in trs:
ip = pq(i).find('td')[0].text
port = pq(i).find('td')[1].text
type = pq(i).find('td')[3].text.lower()
self.ips.append({type: type+'://'+ip + ':' + port})
time.sleep(1)
# return ips
except Exception as e:
print(e)
def start3(self):
try:
print('start 333 data5u.com...')
# ips = []
url = 'http://www.data5u.com/free/gngn/index.shtml'
index_html = self.request_html(url)
if index_html == '':
return
content = pq(index_html)
l2 = content('.wlist .l2')
for i in l2:
ip = pq(i).find('li')[0].text
port = pq(i).find('li')[1].text
type = pq(i).find('li')[3].text.lower()
self.ips.append({type: type+'://'+ip + ':' + port})
# return ips
except Exception as e:
print(e)
def start4(self):
try:
print('start 444 ip3366.net...')
urls = ['http://www.ip3366.net/free/?stype=1&page=',
'http://www.ip3366.net/free/?stype=3&page=']
for u in urls:
for page in range(1, 4):
url = u + str(page)
index_html = self.request_html(url)
if index_html == '':
return
content = pq(index_html)
trs = content('.table.table-bordered.table-striped tbody tr')
for i in trs:
ip = pq(i).find('td')[0].text
port = pq(i).find('td')[1].text
type = pq(i).find('td')[3].text.lower()
self.ips.append({type: type+'://'+ip + ':' + port})
time.sleep(1)
# return ips
except Exception as e:
print(e)
if __name__ == "__main__":
pro = PROXY()
pro.start1()
result = []
for i in pro.ips:
r = pro.test_proxy(i)
if r != '':
result.append(r)
print(result)