zl程序教程

您现在的位置是:首页 >  其他

当前栏目

爬取天气信息,并存储到txt文件

文件存储 信息 txt 爬取 天气
2023-09-14 09:14:59 时间

爬取天气信息,并存储到txt文件

import requests
from bs4 import BeautifulSoup
import xpinyin


#
def getHtml(url, header=None):
    if header != None:
        res = requests.get(url, headers=header)
    else:
        res = requests.get(url)
    res.encoding = 'utf8'
    if res.status_code == 200:
        return res.text
    return None

#请求头
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
        'Host':'lishi.tianqi.com',
        'Accept-Encoding': "gzip, deflate, br",
        'Connection': "keep-alive",
        'cache-control': "max-age=0"}   



def getURL(city, month, year=2022):
    p = xpinyin.Pinyin()
    city = p.get_pinyin(city).replace("-", "")
    if month < 10:
        month = "0" + str(month)
    return "https://lishi.tianqi.com/" + city + "/" + str(year) + str(month) + ".html"

dic = {"晴": 0,"阴": 1,"多云": 2,"风": 3, "霾": 4, "雾":5,"小雪": 6,
       "中雪": 7, "大雪": 8,"小雨": 9,"中雨": 10,"大雨": 11,"阵雨":12,
       "暴雨":13}

citys = ["海南","晋城"]
for city in citys:
    with open(city + "2022天气数据.txt", 'w', encoding='utf8') as f:
        for m in range(1, 13):
            cnt = 1
            url = getURL(city, m)
            html = getHtml(url, headers)
            soup = BeautifulSoup(html, 'html.parser')#解析器,正则表达式
            weather_html = soup.findAll("ul", class_="thrui")[0]
            weather = weather_html.text
            weather = weather.split("\n\n")[:-1]

            for d in weather:
                if d != "":
                    dd = d.replace("\n", " ")
                    dd = dd.split(" ")
                    print(dd)
                    m = str(m)
                    dd[0] = m.rjust(2,"0")
                    dd[1] = cnt
                    cnt += 1
                    dd[3] = dd[3][:-1]
                    dd[4] = dd[4][:-1]
                    sss = dd[5]
                    sss = sss.split("转")[0]
                    sss = sss.split("到")[0]
                    #print(sss)
                    dd[5] = dic[sss]
                    #print(dd)
                    ss=str(dd[0])+" "+str(dd[1])+" "+str(dd[3])+ " "+str(dd[4])+" "+str(dd[5])+"\n"
                    #mysql.insert(str(dd[0]), str(dd[4]), str(dd[3]), str(dd[5]), city)
                    #dd=dd+"\n"
                    #print(ss)
                    f.write(ss)