pythonrequests代理ip_python使用requests模块使用ip代理池
大家好,又见面了,我是你们的朋友全栈君。
import json
import telnetlib
import requests
import random
# 代理ip列表
proxy_url = “https://raw.githubusercontent.com/fate0/proxylist/master/proxy.list”
# 写入可用ip代理池文件路径
ip_pool_file = “verified_proxies.json”
# 用于测试代理ip是否可用的网站
test_url = “http://icanhazip.com/”
# user-agent头
USER_AGENTS = [
“Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)”,
“Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)”,
“Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6”,
“Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1”,
“Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0”,
“Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5”,
“Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6”,
“Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11”,
“Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52”,
“Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)”,
“Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)”,
“Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)”,
“Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)”,
“Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1”,
“Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1”,
“Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre”,
“Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0”,
“Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11”,
“Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10”,
]
# 从ip网站上爬取所有代理ip
def get_proxy(proxy_url):
response = requests.get(proxy_url)
proxy_list = response.text.split(“\n”)
for proxy_str in proxy_list:
proxy_json = json.loads(proxy_str)
host = proxy_json[“host”]
port = proxy_json[“port”]
ip_type = proxy_json[“type”]
check_and_save_ip(host, port, ip_type)
# 测试ip是否可用,可用的话存入文件
def check_and_save_ip(ip, port, ip_type):
proxies = {}
try:
# 测试是否能使用
telnet = telnetlib.Telnet(ip, port=port, timeout=3)
except Exception:
print(‘unconnected’)
else:
print(‘connected successfully’)
proxies[‘type’] = ip_type
proxies[‘host’] = ip
proxies[‘port’] = port
proxies_json = json.dumps(proxies)
with open(ip_pool_file, ‘a+’) as fp:
fp.write(proxies_json + ‘\n’)
print(“已写入:%s” % proxies)
# 随机获取一个UA头
def get_request_headers():
headers = {
‘User-Agent’: random.choice(USER_AGENTS),
‘Accept’: ‘text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8’,
‘Accept-Language’: ‘en-US,en;q=0.5’,
‘Connection’: ‘keep-alive’,
}
return headers
# 这里只是测试
def use_proxy():
proxies = []
with open(ip_pool_file, “r”) as fp:
while True:
item = fp.readline()
if item:
# print(type(json.loads(item)))
proxies.append(json.loads(item))
else:
break
# print(type(proxies[1]))
for item in proxies:
ip = item[‘host’]
port = item[‘port’]
# print(ip, port)
proxies_param = {
‘http’: ‘%s:%s’%(ip, port)
}
print(proxies_param)
try:
# 发送请求,获取响应数据
response = requests.get(test_url, headers=get_request_headers(), proxies=proxies_param, timeout=5)
if response.ok:
# 把响应的json字符串转换为字典
# dic = json.loads(response.text)
# print(dic[‘origin’])
print(response.content)
except Exception as ex:
print(ex)
if __name__ == “__main__”:
# get_proxy(proxy_url)
use_proxy()
# res = requests.get(‘http://icanhazip.com/’, proxies={‘http’: ‘167.99.145.189:3128’})
# print(res.content)
发布者:全栈程序员栈长,转载请注明出处:https://javaforall.cn/141029.html原文链接:https://javaforall.cn
相关文章
- python类的初始化方法_python初始化列表
- python win32api messagebox_如何在Python中使用Win32 API?
- Python爬虫之requests
- 推荐一款Python数据可视化神器
- python dropna()用法「建议收藏」
- 人生苦短,我用Python-手把手教你如何使用python写串口调试助手
- 如何使用python+urllib库+代理IP爬取新闻数据
- windows下,cmd 运行 python 脚本,选中文字就停止运行的解决办法
- FreeBuf 周报 | 马斯克血洗推特安全部门;新形式钓鱼软件针对 Python开发人员
- python高级线程编程-线程安全的数据结构(一)
- Python学习:1.快速搭建python环境详解编程语言
- Python 算法(2) 哈夫曼编码 Huffman Encoding详解编程语言
- MySQL与Python搭配,实现数据库操作。(mysql-python)
- Linux安装Python离不开你(linux安装.py)
- Python脚本实现Linux系统管理及自动化部署(python写linux)
- 用Python仿写MSSQL 编程体验更有趣(python仿mssql)
- Linux中如何离开Python环境(linux怎么退出python)
- Linux系统下安装Python模块指南(linux安装python模块)
- Python操作MySQL数据库的必备模块mysqlpython(mysql_python)
- Python时间处理datetime实例
- python转换摩斯密码示例
- 在python中的socket模块使用代理实例