first version
This commit is contained in:
parent
2e1bdec5e0
commit
963ef1e82c
59
data.py
Normal file
59
data.py
Normal file
@ -0,0 +1,59 @@
|
||||
import json
|
||||
import threading
|
||||
import sys
|
||||
|
||||
import requests
|
||||
|
||||
def main():
|
||||
weather_urls = [
|
||||
"http://www.tianqihoubao.com/lishi/jiaxing/month/{}{}.html".format(
|
||||
year, str(month) if len(str(month)) == 2 else "0{}".format(month)
|
||||
)
|
||||
for month in range(1, 13) for year in range(2014, 2023)
|
||||
]
|
||||
|
||||
aqi_urls = [
|
||||
"http://tianqihoubao.com/aqi/jiaxing-{}{}.html".format(
|
||||
year, str(month) if len(str(month)) == 2 else "0{}".format(month)
|
||||
)
|
||||
for month in range(1, 13) for year in range(2014, 2023)
|
||||
]
|
||||
|
||||
urls = [*weather_urls, *aqi_urls]
|
||||
|
||||
data = {}
|
||||
|
||||
def add(url):
|
||||
while True:
|
||||
try:
|
||||
d = requests.get(url, headers = {
|
||||
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
|
||||
}, timeout=10).text
|
||||
except:
|
||||
print(f"{url} Failed... Retrying...")
|
||||
else:
|
||||
print(f"{url} Done")
|
||||
data[url]=d
|
||||
break
|
||||
|
||||
for url in urls:
|
||||
print(f"Getting {url}")
|
||||
thread = threading.Thread(target=add, args=(url,), name=url)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
|
||||
while len(data) != len(urls):
|
||||
for url in urls:
|
||||
if url not in data.keys():
|
||||
print(url)
|
||||
print(len(data), "Done")
|
||||
c = input()
|
||||
if c == "save":
|
||||
with open("result.json", "w+") as fb:
|
||||
fb.write(json.dumps(data))
|
||||
sys.exit(0)
|
||||
with open("result.json", "w+") as fb:
|
||||
fb.write(json.dumps(data))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
requests
|
||||
bs4
|
||||
pandas
|
1
result.json
Normal file
1
result.json
Normal file
File diff suppressed because one or more lines are too long
280
solve.py
Normal file
280
solve.py
Normal file
@ -0,0 +1,280 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
|
||||
def main():
|
||||
os.makedirs("output", exist_ok=True)
|
||||
|
||||
with open("result.json", "r") as fb:
|
||||
data = json.loads(fb.read())
|
||||
|
||||
aqi_data = {}
|
||||
weather_data = {}
|
||||
|
||||
for key, value in data.items():
|
||||
if "aqi" in key:
|
||||
aqi_data[key] = value
|
||||
else:
|
||||
weather_data[key] = value
|
||||
|
||||
# AQI
|
||||
for key, value in aqi_data.items():
|
||||
soup = BeautifulSoup(value, 'html.parser')
|
||||
# Get data area
|
||||
d = soup.find("div", id="content").select(".api_month_list")[0].table.select("tr")
|
||||
|
||||
# Head
|
||||
head = [str(i).strip("<td>\n<b>").strip("</b></td>") for i in d[0].select("td")]
|
||||
df = pd.DataFrame([], columns=head)
|
||||
|
||||
for fd in d[1:]:
|
||||
fds = fd.select("td")
|
||||
|
||||
detail = []
|
||||
for i in fds:
|
||||
temp = str(i)\
|
||||
.replace("</td>", "").replace("<td>", "")\
|
||||
.replace('<td class="aqi-lv1">\r\n', "")\
|
||||
.replace('<td class="aqi-lv2">\r\n', "")\
|
||||
.replace('<td class="aqi-lv3">\r\n', "")\
|
||||
.replace('<td class="aqi-lv4">\r\n', "")\
|
||||
.replace('<td class="aqi-lv5">\r\n', "")\
|
||||
.replace('<td class="aqi-lv6">\r\n', "")\
|
||||
.strip()
|
||||
try:
|
||||
temp = float(temp)
|
||||
except:
|
||||
pass
|
||||
detail.append([temp])
|
||||
|
||||
|
||||
df = df.append(pd.DataFrame(dict(zip(
|
||||
head, detail
|
||||
))), ignore_index=True)
|
||||
|
||||
name = int(key\
|
||||
.replace('http://tianqihoubao.com/aqi/jiaxing-', "")\
|
||||
.replace(".html", ""))
|
||||
|
||||
os.makedirs(os.path.join("output", f"{name // 100}"), exist_ok=True)
|
||||
path = os.path.join("output", f"{name // 100}", "{}月.xls".format(name % 100))
|
||||
print(f"{path} Done")
|
||||
df.to_excel(path)
|
||||
|
||||
# Weather
|
||||
for key, value in weather_data.items():
|
||||
soup = BeautifulSoup(value, 'html.parser')
|
||||
# Get data area
|
||||
d = soup.find("div", id="content").select(".b")[0].select("tr")
|
||||
|
||||
# Head
|
||||
head = [str(i).strip("<td>\n<b>").strip("</b></td>") for i in d[0].select("td")]
|
||||
df = pd.DataFrame([], columns=head)
|
||||
|
||||
for fd in d[1:]:
|
||||
fds = fd.select("td")
|
||||
|
||||
detail = []
|
||||
for i in fds:
|
||||
if "年" in str(i):
|
||||
temp = str(i)[115:].strip("</td>").strip().strip("</a>").strip()
|
||||
elif "/" in str(i):
|
||||
temp = str(i).strip().strip("<td>").strip("</td>").strip().replace(" ", "").replace("\r\n", "").split("/")
|
||||
else:
|
||||
temp = str(i)
|
||||
|
||||
if type(temp) is list:
|
||||
temp = [i.replace("<", "<").replace("~", "-") for i in temp]
|
||||
else:
|
||||
temp = temp.replace("<", "<").replace("~", "-")
|
||||
|
||||
if type(temp) is list and "℃" in temp[0]:
|
||||
temp = [int(i.replace("℃", "")) for i in temp]
|
||||
detail.append([temp])
|
||||
|
||||
|
||||
df = df.append(pd.DataFrame(dict(zip(
|
||||
head, detail
|
||||
))), ignore_index=True)
|
||||
|
||||
df["天气1"] = [""] * len(df.index)
|
||||
df["天气2"] = [""] * len(df.index)
|
||||
for i in range(len(df.index)):
|
||||
df.loc[i, "天气1"] = df.at[i, "天气状况"][0]
|
||||
df.loc[i, "天气2"] = df.at[i, "天气状况"][1]
|
||||
df = df.drop(labels='天气状况', axis=1)
|
||||
|
||||
df["最高气温"] = [""] * len(df.index)
|
||||
df["最低气温"] = [""] * len(df.index)
|
||||
for i in range(len(df.index)):
|
||||
df.loc[i, "最低气温"] = int(df.at[i, "最低气温/最高气温"][0])
|
||||
df.loc[i, "最高气温"] = int(df.at[i, "最低气温/最高气温"][1])
|
||||
df = df.drop(labels='最低气温/最高气温', axis=1)
|
||||
|
||||
df["白天风向"] = [""] * len(df.index)
|
||||
df["夜晚风向"] = [""] * len(df.index)
|
||||
df["白天风力1"] = [""] * len(df.index)
|
||||
df["白天风力2"] = [""] * len(df.index)
|
||||
df["夜晚风力1"] = [""] * len(df.index)
|
||||
df["夜晚风力2"] = [""] * len(df.index)
|
||||
for i in range(len(df.index)):
|
||||
if "无持续" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "无持续" not in str(df.at[i, "风力风向(夜间/白天)"][1]) :
|
||||
if "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[1])
|
||||
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[1])
|
||||
elif "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" not in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[1])
|
||||
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||
elif "-" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[1])
|
||||
else:
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||
elif "无持续" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "无持续" in str(df.at[i, "风力风向(夜间/白天)"][1]) :
|
||||
if "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[1])
|
||||
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[1])
|
||||
elif "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" not in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[1])
|
||||
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||
elif "-" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[1])
|
||||
else:
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||
elif "无持续" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "无持续" not in str(df.at[i, "风力风向(夜间/白天)"][1]) :
|
||||
if "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[1])
|
||||
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[1])
|
||||
elif "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" not in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[1])
|
||||
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||
elif "-" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[1])
|
||||
else:
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||
else:
|
||||
if "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[1])
|
||||
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[1])
|
||||
elif "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" not in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[1])
|
||||
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||
elif "-" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[0])
|
||||
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[1])
|
||||
else:
|
||||
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||
|
||||
df = df.drop(labels='风力风向(夜间/白天)', axis=1)
|
||||
|
||||
name = int(key\
|
||||
.replace('http://www.tianqihoubao.com/lishi/jiaxing/month/', "")\
|
||||
.replace(".html", ""))
|
||||
|
||||
os.makedirs(os.path.join("output", f"{name // 100}"), exist_ok=True)
|
||||
path = os.path.join("output", f"{name // 100}", "{}月.xls".format(name % 100))
|
||||
if os.path.exists(path):
|
||||
df_o = pd.read_excel(path)
|
||||
df_c = pd.merge(df, df_o)
|
||||
|
||||
for i in df_o.columns:
|
||||
df_c[i] = df_o[i]
|
||||
|
||||
for i in range(len(df.index)):
|
||||
for j in range(len(df_c.index)):
|
||||
year = str(df.at[i, "日期"]).split("年")[0]
|
||||
month = str(df.at[i, "日期"]).split("年")[1].split("月")[0]
|
||||
day = str(df.at[i, "日期"]).split("年")[1].split("月")[1].replace("日", "")
|
||||
if len(month) == 1:
|
||||
month = f"0{month}"
|
||||
if len(day) == 1:
|
||||
day = f"0{day}"
|
||||
|
||||
if str(df_c.at[j, "日期"]) == "{}-{}-{}".format(year, month, day):
|
||||
for k in df.columns:
|
||||
if str(k) != "日期":
|
||||
df_c.loc[j, str(k)] = df.at[i, str(k)]
|
||||
|
||||
df_c = df_c.drop(labels="Unnamed: 0", axis=1)
|
||||
|
||||
print(f"{path} Done")
|
||||
df_c.to_excel(path)
|
||||
else:
|
||||
print(f"{path} Done")
|
||||
df.to_excel(path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
x
Reference in New Issue
Block a user