first version
This commit is contained in:
parent
2e1bdec5e0
commit
963ef1e82c
59
data.py
Normal file
59
data.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
import json
|
||||||
|
import threading
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
def main():
|
||||||
|
weather_urls = [
|
||||||
|
"http://www.tianqihoubao.com/lishi/jiaxing/month/{}{}.html".format(
|
||||||
|
year, str(month) if len(str(month)) == 2 else "0{}".format(month)
|
||||||
|
)
|
||||||
|
for month in range(1, 13) for year in range(2014, 2023)
|
||||||
|
]
|
||||||
|
|
||||||
|
aqi_urls = [
|
||||||
|
"http://tianqihoubao.com/aqi/jiaxing-{}{}.html".format(
|
||||||
|
year, str(month) if len(str(month)) == 2 else "0{}".format(month)
|
||||||
|
)
|
||||||
|
for month in range(1, 13) for year in range(2014, 2023)
|
||||||
|
]
|
||||||
|
|
||||||
|
urls = [*weather_urls, *aqi_urls]
|
||||||
|
|
||||||
|
data = {}
|
||||||
|
|
||||||
|
def add(url):
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
d = requests.get(url, headers = {
|
||||||
|
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"
|
||||||
|
}, timeout=10).text
|
||||||
|
except:
|
||||||
|
print(f"{url} Failed... Retrying...")
|
||||||
|
else:
|
||||||
|
print(f"{url} Done")
|
||||||
|
data[url]=d
|
||||||
|
break
|
||||||
|
|
||||||
|
for url in urls:
|
||||||
|
print(f"Getting {url}")
|
||||||
|
thread = threading.Thread(target=add, args=(url,), name=url)
|
||||||
|
thread.daemon = True
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
while len(data) != len(urls):
|
||||||
|
for url in urls:
|
||||||
|
if url not in data.keys():
|
||||||
|
print(url)
|
||||||
|
print(len(data), "Done")
|
||||||
|
c = input()
|
||||||
|
if c == "save":
|
||||||
|
with open("result.json", "w+") as fb:
|
||||||
|
fb.write(json.dumps(data))
|
||||||
|
sys.exit(0)
|
||||||
|
with open("result.json", "w+") as fb:
|
||||||
|
fb.write(json.dumps(data))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
requests
|
||||||
|
bs4
|
||||||
|
pandas
|
1
result.json
Normal file
1
result.json
Normal file
File diff suppressed because one or more lines are too long
280
solve.py
Normal file
280
solve.py
Normal file
@ -0,0 +1,280 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def main():
|
||||||
|
os.makedirs("output", exist_ok=True)
|
||||||
|
|
||||||
|
with open("result.json", "r") as fb:
|
||||||
|
data = json.loads(fb.read())
|
||||||
|
|
||||||
|
aqi_data = {}
|
||||||
|
weather_data = {}
|
||||||
|
|
||||||
|
for key, value in data.items():
|
||||||
|
if "aqi" in key:
|
||||||
|
aqi_data[key] = value
|
||||||
|
else:
|
||||||
|
weather_data[key] = value
|
||||||
|
|
||||||
|
# AQI
|
||||||
|
for key, value in aqi_data.items():
|
||||||
|
soup = BeautifulSoup(value, 'html.parser')
|
||||||
|
# Get data area
|
||||||
|
d = soup.find("div", id="content").select(".api_month_list")[0].table.select("tr")
|
||||||
|
|
||||||
|
# Head
|
||||||
|
head = [str(i).strip("<td>\n<b>").strip("</b></td>") for i in d[0].select("td")]
|
||||||
|
df = pd.DataFrame([], columns=head)
|
||||||
|
|
||||||
|
for fd in d[1:]:
|
||||||
|
fds = fd.select("td")
|
||||||
|
|
||||||
|
detail = []
|
||||||
|
for i in fds:
|
||||||
|
temp = str(i)\
|
||||||
|
.replace("</td>", "").replace("<td>", "")\
|
||||||
|
.replace('<td class="aqi-lv1">\r\n', "")\
|
||||||
|
.replace('<td class="aqi-lv2">\r\n', "")\
|
||||||
|
.replace('<td class="aqi-lv3">\r\n', "")\
|
||||||
|
.replace('<td class="aqi-lv4">\r\n', "")\
|
||||||
|
.replace('<td class="aqi-lv5">\r\n', "")\
|
||||||
|
.replace('<td class="aqi-lv6">\r\n', "")\
|
||||||
|
.strip()
|
||||||
|
try:
|
||||||
|
temp = float(temp)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
detail.append([temp])
|
||||||
|
|
||||||
|
|
||||||
|
df = df.append(pd.DataFrame(dict(zip(
|
||||||
|
head, detail
|
||||||
|
))), ignore_index=True)
|
||||||
|
|
||||||
|
name = int(key\
|
||||||
|
.replace('http://tianqihoubao.com/aqi/jiaxing-', "")\
|
||||||
|
.replace(".html", ""))
|
||||||
|
|
||||||
|
os.makedirs(os.path.join("output", f"{name // 100}"), exist_ok=True)
|
||||||
|
path = os.path.join("output", f"{name // 100}", "{}月.xls".format(name % 100))
|
||||||
|
print(f"{path} Done")
|
||||||
|
df.to_excel(path)
|
||||||
|
|
||||||
|
# Weather
|
||||||
|
for key, value in weather_data.items():
|
||||||
|
soup = BeautifulSoup(value, 'html.parser')
|
||||||
|
# Get data area
|
||||||
|
d = soup.find("div", id="content").select(".b")[0].select("tr")
|
||||||
|
|
||||||
|
# Head
|
||||||
|
head = [str(i).strip("<td>\n<b>").strip("</b></td>") for i in d[0].select("td")]
|
||||||
|
df = pd.DataFrame([], columns=head)
|
||||||
|
|
||||||
|
for fd in d[1:]:
|
||||||
|
fds = fd.select("td")
|
||||||
|
|
||||||
|
detail = []
|
||||||
|
for i in fds:
|
||||||
|
if "年" in str(i):
|
||||||
|
temp = str(i)[115:].strip("</td>").strip().strip("</a>").strip()
|
||||||
|
elif "/" in str(i):
|
||||||
|
temp = str(i).strip().strip("<td>").strip("</td>").strip().replace(" ", "").replace("\r\n", "").split("/")
|
||||||
|
else:
|
||||||
|
temp = str(i)
|
||||||
|
|
||||||
|
if type(temp) is list:
|
||||||
|
temp = [i.replace("<", "<").replace("~", "-") for i in temp]
|
||||||
|
else:
|
||||||
|
temp = temp.replace("<", "<").replace("~", "-")
|
||||||
|
|
||||||
|
if type(temp) is list and "℃" in temp[0]:
|
||||||
|
temp = [int(i.replace("℃", "")) for i in temp]
|
||||||
|
detail.append([temp])
|
||||||
|
|
||||||
|
|
||||||
|
df = df.append(pd.DataFrame(dict(zip(
|
||||||
|
head, detail
|
||||||
|
))), ignore_index=True)
|
||||||
|
|
||||||
|
df["天气1"] = [""] * len(df.index)
|
||||||
|
df["天气2"] = [""] * len(df.index)
|
||||||
|
for i in range(len(df.index)):
|
||||||
|
df.loc[i, "天气1"] = df.at[i, "天气状况"][0]
|
||||||
|
df.loc[i, "天气2"] = df.at[i, "天气状况"][1]
|
||||||
|
df = df.drop(labels='天气状况', axis=1)
|
||||||
|
|
||||||
|
df["最高气温"] = [""] * len(df.index)
|
||||||
|
df["最低气温"] = [""] * len(df.index)
|
||||||
|
for i in range(len(df.index)):
|
||||||
|
df.loc[i, "最低气温"] = int(df.at[i, "最低气温/最高气温"][0])
|
||||||
|
df.loc[i, "最高气温"] = int(df.at[i, "最低气温/最高气温"][1])
|
||||||
|
df = df.drop(labels='最低气温/最高气温', axis=1)
|
||||||
|
|
||||||
|
df["白天风向"] = [""] * len(df.index)
|
||||||
|
df["夜晚风向"] = [""] * len(df.index)
|
||||||
|
df["白天风力1"] = [""] * len(df.index)
|
||||||
|
df["白天风力2"] = [""] * len(df.index)
|
||||||
|
df["夜晚风力1"] = [""] * len(df.index)
|
||||||
|
df["夜晚风力2"] = [""] * len(df.index)
|
||||||
|
for i in range(len(df.index)):
|
||||||
|
if "无持续" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "无持续" not in str(df.at[i, "风力风向(夜间/白天)"][1]) :
|
||||||
|
if "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||||
|
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[1])
|
||||||
|
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[1])
|
||||||
|
elif "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" not in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||||
|
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||||
|
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[1])
|
||||||
|
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||||
|
elif "-" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||||
|
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[1])
|
||||||
|
else:
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||||
|
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||||
|
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||||
|
elif "无持续" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "无持续" in str(df.at[i, "风力风向(夜间/白天)"][1]) :
|
||||||
|
if "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||||
|
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[1])
|
||||||
|
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[1])
|
||||||
|
elif "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" not in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||||
|
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||||
|
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[1])
|
||||||
|
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||||
|
elif "-" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||||
|
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[1])
|
||||||
|
else:
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||||
|
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||||
|
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||||
|
elif "无持续" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "无持续" not in str(df.at[i, "风力风向(夜间/白天)"][1]) :
|
||||||
|
if "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||||
|
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[1])
|
||||||
|
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[1])
|
||||||
|
elif "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" not in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||||
|
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||||
|
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[1])
|
||||||
|
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||||
|
elif "-" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||||
|
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[1])
|
||||||
|
else:
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
|
||||||
|
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||||
|
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
|
||||||
|
else:
|
||||||
|
if "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||||
|
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[1])
|
||||||
|
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[1])
|
||||||
|
elif "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" not in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||||
|
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||||
|
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[1])
|
||||||
|
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||||
|
elif "-" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||||
|
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[0])
|
||||||
|
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[1])
|
||||||
|
else:
|
||||||
|
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
|
||||||
|
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
|
||||||
|
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||||
|
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
|
||||||
|
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
|
||||||
|
|
||||||
|
df = df.drop(labels='风力风向(夜间/白天)', axis=1)
|
||||||
|
|
||||||
|
name = int(key\
|
||||||
|
.replace('http://www.tianqihoubao.com/lishi/jiaxing/month/', "")\
|
||||||
|
.replace(".html", ""))
|
||||||
|
|
||||||
|
os.makedirs(os.path.join("output", f"{name // 100}"), exist_ok=True)
|
||||||
|
path = os.path.join("output", f"{name // 100}", "{}月.xls".format(name % 100))
|
||||||
|
if os.path.exists(path):
|
||||||
|
df_o = pd.read_excel(path)
|
||||||
|
df_c = pd.merge(df, df_o)
|
||||||
|
|
||||||
|
for i in df_o.columns:
|
||||||
|
df_c[i] = df_o[i]
|
||||||
|
|
||||||
|
for i in range(len(df.index)):
|
||||||
|
for j in range(len(df_c.index)):
|
||||||
|
year = str(df.at[i, "日期"]).split("年")[0]
|
||||||
|
month = str(df.at[i, "日期"]).split("年")[1].split("月")[0]
|
||||||
|
day = str(df.at[i, "日期"]).split("年")[1].split("月")[1].replace("日", "")
|
||||||
|
if len(month) == 1:
|
||||||
|
month = f"0{month}"
|
||||||
|
if len(day) == 1:
|
||||||
|
day = f"0{day}"
|
||||||
|
|
||||||
|
if str(df_c.at[j, "日期"]) == "{}-{}-{}".format(year, month, day):
|
||||||
|
for k in df.columns:
|
||||||
|
if str(k) != "日期":
|
||||||
|
df_c.loc[j, str(k)] = df.at[i, str(k)]
|
||||||
|
|
||||||
|
df_c = df_c.drop(labels="Unnamed: 0", axis=1)
|
||||||
|
|
||||||
|
print(f"{path} Done")
|
||||||
|
df_c.to_excel(path)
|
||||||
|
else:
|
||||||
|
print(f"{path} Done")
|
||||||
|
df.to_excel(path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
x
Reference in New Issue
Block a user