您现在的位置是:首页 > Python 当前栏目 利用python爬取全国天气_python 用python爬取中国天气 Python 2023-02-26 12:36:30 时间 注意我这要用到几个库 requests,bs4,json请注意下载哦 import json import time from urllib.parse import urlparse import matplotlib import matplotlib.pyplot as plt import numpy as np import requests from bs4 import BeautifulSoup baseUrl-'http://www.weather.com.cn/textFC/hb.shtml' TEMPTATURE\_LIST = \[\] def get\_html(url): """ 通过get请求获取网页内容 :param url """ headers = \{ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36', 'Upgrade-Insecure-Requests':1 'Referer':'http://www.weather.com.cn/textFC/hb.shtml' 'Host':'www.weather.com.cn' \} requests = requests.get(url,headers) if response.status\_code == 200: return response.content else: print(response) return None def get\_host(url): """ 获取url中的主机地址,用于构建http请求全路径 :param url: :return: """ parse = urlparse(url) return url.split(parse.path)\[0\] def get\_urls(html,url): """ 提取按区域抓取天气的url集合 :param soup: :return: """ host = get\_host(url) soup = BeautifulSoup(html,'lxml') ul = soup.find(name='ul',attrs=\{"class":"lq\_contentboxTab2"\}) a\_list = ul.find\_all("a") url\_list = \[\] for a in a\_list: new\_url = host + a.attrs\["href"\] if new\_url != url: url\_list.append(new\_url) return url\_list def get\_temperatures(html): if html is None: print("网页内容为空!!") return """ 获取温度信息 :return: """ soup = BeautifulSoup(html,'lxml') province = None \# 找到当天的 conMidtab = soup.find("div",attrs=\{'class':'conMidtab'\}) \# 找到所有省 conMidtab2\_list = conMidtab.find\_all('div') for conMidtab2 in conMidtab2\_list: \# 对应所有的市 tr\_list = conMidtab2.find\_all('div') for index,tr in enumerate(tr\_list): td\_list = tr.find\_all('td') if index == 0: province = td\_list\[0\].text.replace('\\n','') city = province + td\_list\[1\].text.replace("\\n", "") min\_temp = td\_list\[7\].text.replace("\\n", "") else: city = province + td\_list\[0\].text.replace("\\n", "") min\_temp = td\_list\[6\].text.replace("\\n", "") TEMPTATURE\_LIST.append(\{"city": city, "min": min\_temp\}) print("一次分析结束") def get\_gat\_temperatures(url): """ 港澳台的页面需要页面js调用才能得到完整的html内容,所以不能正常分析得到 :param url: :return: """ html = get\_html(url) soup = BeautifulSoup(html,'html.parser') tr\_list = soup.find\_all('tr') get\_list = \['香港', '澳门', '台北', '高雄', '台中'\] index = 0 for tr in tr\_list: if (index < 2 and tr.text.find(gat\_list\[index\]) > -1) or (index == 2 and tr.text.find(gat\_list\[index\]) > -1): \# 如果是香港或澳门,或者台北 td\_list = tr.find\_all('td') province = td\_list\[0\].text.replace("\\n", "") city = province + td\_list\[1\].text.replace("\\n", "") min\_temp = td\_list\[7\].text.replace("\\n", "") index += 1 TEMPTATURE\_LIST.append(\{"city": city, "min": min\_temp\}) else 2 < index < len(gat\_list) and tr.text.find(gat\_list\[index\]) > -1: \# 台湾其他 td\_list = tr.find\_all("td") city = province + td\_list\[0\].text.replace("\\n", "") min\_temp = td\_list\[6\].text.replace("\\n", "") index += 1 TEMPTATURE\_LIST.append(\{"city": city, "min": min\_temp\}) print('港澳台分析结束') def spide\_temperature(): html = get\_html(baseUrl) if html is None: print('请求失败') else: get\_temperatures(html) urls = get\_urls(html,baseUrl) \# 港澳台特殊 get\_gat\_temperatures(url\[-1\]) for url in urls\[:-2\]: time.sleep(2) content = get\_html(url) get\_temperatures(content) while open("temprature.json",'w',encoding="utf-8") as fp: json.dump(EMPTATURE\_LIST, fp) def show\_temperature(): with open("temprature.json", "r") as fp: TEMPTATURE\_LIST = json.load(fp,encoding='utf-8') CITY\_LIST = \[\] \# 城市 MAX\_LIST = \[\] \# 最高天气 for i in range(20): city\_max = TEMPTATURE\_LIST\[np.random.randint(0, len(TEMPTATURE\_LIST))\] CITY\_LIST.append(city\_max\["city"\]) MAX\_LIST.append(int(city\_max\["min"\])) ind = np.arange(len(MAX\_LIST)) print(ind) print(CITY\_LIST) print(MAX\_LIST) \# 解决中文乱码问题 zhfont1 = matplotlib.font\_manager.FontProperties(fname='C:\\Windows\\Fonts\\simsun.ttc') fig, ax = plt.subplots() plt.bar(ind, MAX\_LIST) plt.xticks(ind, CITY\_LIST, fontproperties=zhfont1, rotation=60) plt.ylabel(u'温度', fontproperties=zhfont1) plt.title(u'今日随机20个城市的温度', fontproperties=zhfont1) \# show the figure, but do not block plt.show() def main(): spide\_temperature() show\_temperature() if \_\_name\_\_=="\_\_main\_\_": main() 本文地址: 利用python爬取全国天气_python 用python爬取中国天气 相关文章 初始Python Python基石 Python 基础 认识 Python Python 相关 python ----split() python其他 Python面试 python函数 python快捷键 Python 快捷键 python循环 python format Python循环 python 文件 Python 教程 Python教程 python下载 python生成器 Python拾遗