Newer
Older
PythonProxy / ProxyUtil.py
import requests
from pyquery import PyQuery as pq
import time
import ProxyThread


class PROXY:

    def __init__(self):
        self.ips = []
        self.okIpList = []
        self.errCount = 0

    def get(self):
        f = self.read()
        if f == '':
            self.start0()
            self.start1()
            self.start2()
            # self.start3()
            self.start4()

            print('proxy numbers: ' + str(len(self.ips)))

            self.save(self.ips)
        else:
            self.ips = eval(f)

        return self.ips

    def read(self):
        try:
            f = open('proxy.txt', 'r+', encoding='utf-8')
            str = f.read()
            f.close()
            return str
        except:
            return ''


    def save(self, ips):
        f = open('proxy.txt', 'w', encoding='utf-8')
        f.write(str(ips))
        f.close()


    def request_html(self, url):
        try:
            agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
            head = {'User-Agent': agent}
            res = requests.get(url=url, headers=head, timeout=15)

            if res.status_code != 200:
                print('request error')
                return ''

            return res.text

        except Exception as e:
            print(e)


    def start0(self):
        try:
            print('start 000 xsdaili.com...')

            url = 'http://www.xsdaili.com/'
            index_html = self.request_html(url)
            if index_html == '':
                return

            content = pq(index_html)
            urls = content('.table.table-hover.panel-default.panel.ips')

            # ips = []
            for i in range(0, 4):
                sub_url = pq(urls[i])('.title').find('a').attr('href')
                sub_html = self.request_html(url + sub_url)
                content = pq(sub_html)('.panel-body .cont').text()
                c = content.splitlines()
                for j in c:
                    if j != '':
                        a1 = j.split('@')
                        if len(a1) == 2:
                            ip = a1[0]
                            a2 = a1[1].split('#')
                            h = a2[0].lower()

                            self.ips.append({h: h+'://'+ip})


            # return ips

        except Exception as e:
            print(e)


    def start1(self):
        try:
            print('start 111 qydaili.com...')

            # ips = []
            urls = ['http://www.qydaili.com/free/?action=china&page=', 'http://www.qydaili.com/free/?action=unchina&page=']
            for u in urls:
                for page in range(1, 10):
                    url = u + str(page)
                    index_html = self.request_html(url)
                    if index_html == '':
                        return

                    content = pq(index_html)
                    trs = content('.table.table-bordered.table-striped tbody tr')

                    for i in trs:
                        ip = pq(i).find('td')[0].text
                        port = pq(i).find('td')[1].text
                        type = pq(i).find('td')[3].text.lower()

                        self.ips.append({type: type+'://'+ip + ':' + port})

                    time.sleep(2)

            # return ips

        except Exception as e:
            print(e)


    def start2(self):
        try:
            print('start 222 kuaidaili.com...')

            # ips = []
            for page in range(1, 4):
                url = 'https://www.kuaidaili.com/free/inha/'+str(page)+'/'
                index_html = self.request_html(url)
                if index_html == '':
                    return

                content = pq(index_html)
                trs = content('.table.table-bordered.table-striped tbody tr')

                for i in trs:
                    ip = pq(i).find('td')[0].text
                    port = pq(i).find('td')[1].text
                    type = pq(i).find('td')[3].text.lower()

                    self.ips.append({type: type+'://'+ip + ':' + port})

                time.sleep(1)

            # return ips

        except Exception as e:
            print(e)


    def start3(self):
        try:
            print('start 333 data5u.com...')

            # ips = []

            url = 'http://www.data5u.com/free/gngn/index.shtml'
            index_html = self.request_html(url)
            if index_html == '':
                return

            content = pq(index_html)
            l2 = content('.wlist .l2')

            for i in l2:
                ip = pq(i).find('li')[0].text
                port = pq(i).find('li')[1].text
                type = pq(i).find('li')[3].text.lower()

                self.ips.append({type: type+'://'+ip + ':' + port})

            # return ips

        except Exception as e:
            print(e)


    def start4(self):
        try:
            print('start 444 ip3366.net...')

            urls = ['http://www.ip3366.net/free/?stype=1&page=',
                    'http://www.ip3366.net/free/?stype=3&page=']
            for u in urls:
                for page in range(1, 4):
                    url = u + str(page)
                    index_html = self.request_html(url)
                    if index_html == '':
                        return

                    content = pq(index_html)
                    trs = content('.table.table-bordered.table-striped tbody tr')

                    for i in trs:
                        ip = pq(i).find('td')[0].text
                        port = pq(i).find('td')[1].text
                        type = pq(i).find('td')[3].text.lower()

                        self.ips.append({type: type+'://'+ip + ':' + port})

                    time.sleep(1)

            # return ips

        except Exception as e:
            print(e)


    def testProxy(self, proxies):
        try:
            # url = 'https://ip.gs'     # 检查代理是否成功的地址
            url = 'https://gzk3.jyuuu.com/result'   # 检查目标地址是否能成功访问
            # proxies = {data[2]: data[0] + ':' + data[1]}
            # proxies = {'https': 'https://118.163.26.248:60636'}

            userAgent = 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Mobile Safari/537.36'
            accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3'
            # cookie = '__cfduid=db1fd31e3ad3a3c2e55ca72f7044eff231562497269; LogId=2285749'

            head = {}
            head['User-Agent'] = userAgent
            # head['Referer'] = url
            # head['Cookie'] = cookie
            head['Accept'] = accept

            response = requests.get(url=url, proxies=proxies, headers=head, timeout=15)
            if response.status_code == 200:
                # 比对是否真实IP
                if '119.139.195.98' not in response.text and '百度云加速' not in response.text:
                    self.okIpList.append(proxies)
                    print(proxies)

            self.errCount = 0

        except Exception as e:
            print(e)
            self.errCount += 1
            if self.errCount < 2:
                self.testProxy(proxies)
            else:
                return


if __name__ == "__main__":

    threadList = []
    pro = PROXY()
    list = pro.get()
    for i in list:
        thd = ProxyThread.MyThread(pro, i)
        thd.start()
        threadList.append(thd)

    for i in threadList:
        i.join()

    print(pro.okIpList)
    pro.save(pro.okIpList)