import json
import os
from bs4 import BeautifulSoup
import pandas as pd
def main():
os.makedirs("output", exist_ok=True)
with open("result.json", "r") as fb:
data = json.loads(fb.read())
aqi_data = {}
weather_data = {}
for key, value in data.items():
if "aqi" in key:
aqi_data[key] = value
else:
weather_data[key] = value
# AQI
for key, value in aqi_data.items():
soup = BeautifulSoup(value, 'html.parser')
# Get data area
d = soup.find("div", id="content").select(".api_month_list")[0].table.select("tr")
# Head
head = [str(i).strip("
\n").strip(" | ") for i in d[0].select("td")]
df = pd.DataFrame([], columns=head)
for fd in d[1:]:
fds = fd.select("td")
detail = []
for i in fds:
temp = str(i)\
.replace("", "").replace("", "")\
.replace(' | \r\n', "")\
.replace(' | \r\n', "")\
.replace(' | \r\n', "")\
.replace(' | \r\n', "")\
.replace(' | \r\n', "")\
.replace(' | \r\n', "")\
.strip()
try:
temp = float(temp)
except:
pass
detail.append([temp])
df = df.append(pd.DataFrame(dict(zip(
head, detail
))), ignore_index=True)
name = int(key\
.replace('http://tianqihoubao.com/aqi/jiaxing-', "")\
.replace(".html", ""))
os.makedirs(os.path.join("output", f"{name // 100}"), exist_ok=True)
path = os.path.join("output", f"{name // 100}", "{}月.xls".format(name % 100))
print(f"{path} Done")
df.to_excel(path)
# Weather
for key, value in weather_data.items():
soup = BeautifulSoup(value, 'html.parser')
# Get data area
d = soup.find("div", id="content").select(".b")[0].select("tr")
# Head
head = [str(i).strip(" | \n").strip(" | ") for i in d[0].select("td")]
df = pd.DataFrame([], columns=head)
for fd in d[1:]:
fds = fd.select("td")
detail = []
for i in fds:
if "年" in str(i):
temp = str(i)[115:].strip("").strip().strip("").strip()
elif "/" in str(i):
temp = str(i).strip().strip("").strip(" | ").strip().replace(" ", "").replace("\r\n", "").split("/")
else:
temp = str(i)
if type(temp) is list:
temp = [i.replace("<", "<").replace("~", "-") for i in temp]
else:
temp = temp.replace("<", "<").replace("~", "-")
if type(temp) is list and "℃" in temp[0]:
temp = [int(i.replace("℃", "")) for i in temp]
detail.append([temp])
df = df.append(pd.DataFrame(dict(zip(
head, detail
))), ignore_index=True)
df["天气1"] = [""] * len(df.index)
df["天气2"] = [""] * len(df.index)
for i in range(len(df.index)):
df.loc[i, "天气1"] = df.at[i, "天气状况"][0]
df.loc[i, "天气2"] = df.at[i, "天气状况"][1]
df = df.drop(labels='天气状况', axis=1)
df["最高气温"] = [""] * len(df.index)
df["最低气温"] = [""] * len(df.index)
for i in range(len(df.index)):
df.loc[i, "最低气温"] = int(df.at[i, "最低气温/最高气温"][0])
df.loc[i, "最高气温"] = int(df.at[i, "最低气温/最高气温"][1])
df = df.drop(labels='最低气温/最高气温', axis=1)
df["白天风向"] = [""] * len(df.index)
df["夜晚风向"] = [""] * len(df.index)
df["白天风力1"] = [""] * len(df.index)
df["白天风力2"] = [""] * len(df.index)
df["夜晚风力1"] = [""] * len(df.index)
df["夜晚风力2"] = [""] * len(df.index)
for i in range(len(df.index)):
if "无持续" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "无持续" not in str(df.at[i, "风力风向(夜间/白天)"][1]) :
if "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[0])
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[0])
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[1])
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[1])
elif "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" not in str(df.at[i, "风力风向(夜间/白天)"][1]):
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[0])
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[1])
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
elif "-" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[0])
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[1])
else:
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
elif "无持续" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "无持续" in str(df.at[i, "风力风向(夜间/白天)"][1]) :
if "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[0])
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[0])
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[1])
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[1])
elif "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" not in str(df.at[i, "风力风向(夜间/白天)"][1]):
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[0])
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "").split("-")[1])
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
elif "-" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[0])
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[1])
else:
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风")[1].replace("级", "")
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
elif "无持续" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "无持续" not in str(df.at[i, "风力风向(夜间/白天)"][1]) :
if "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[0])
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[0])
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[1])
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[1])
elif "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" not in str(df.at[i, "风力风向(夜间/白天)"][1]):
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[0])
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[1])
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
elif "-" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[0])
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "").split("-")[1])
else:
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风")[0])
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风")[1].replace("级", "")
else:
if "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[0])
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[0])
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[1])
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[1])
elif "-" in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" not in str(df.at[i, "风力风向(夜间/白天)"][1]):
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
df.loc[i, "白天风力1"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[0])
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
df.loc[i, "白天风力2"] = int(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "").split("-")[1])
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
elif "-" not in str(df.at[i, "风力风向(夜间/白天)"][0]) and "-" in str(df.at[i, "风力风向(夜间/白天)"][1]):
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
df.loc[i, "夜晚风力1"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[0])
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
df.loc[i, "夜晚风力2"] = int(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "").split("-")[1])
else:
df.loc[i, "白天风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][0].split("风向")[0])
df.loc[i, "夜晚风向"] = "{}风".format(df.at[i, "风力风向(夜间/白天)"][1].split("风向")[0])
df.loc[i, "白天风力1"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
df.loc[i, "夜晚风力1"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
df.loc[i, "白天风力2"] = df.at[i, "风力风向(夜间/白天)"][0].split("风向")[1].replace("级", "")
df.loc[i, "夜晚风力2"] = df.at[i, "风力风向(夜间/白天)"][1].split("风向")[1].replace("级", "")
df = df.drop(labels='风力风向(夜间/白天)', axis=1)
name = int(key\
.replace('http://www.tianqihoubao.com/lishi/jiaxing/month/', "")\
.replace(".html", ""))
os.makedirs(os.path.join("output", f"{name // 100}"), exist_ok=True)
path = os.path.join("output", f"{name // 100}", "{}月.xls".format(name % 100))
if os.path.exists(path):
df_o = pd.read_excel(path)
df_c = pd.merge(df, df_o)
for i in df_o.columns:
df_c[i] = df_o[i]
for i in range(len(df.index)):
for j in range(len(df_c.index)):
year = str(df.at[i, "日期"]).split("年")[0]
month = str(df.at[i, "日期"]).split("年")[1].split("月")[0]
day = str(df.at[i, "日期"]).split("年")[1].split("月")[1].replace("日", "")
if len(month) == 1:
month = f"0{month}"
if len(day) == 1:
day = f"0{day}"
if str(df_c.at[j, "日期"]) == "{}-{}-{}".format(year, month, day):
for k in df.columns:
if str(k) != "日期":
df_c.loc[j, str(k)] = df.at[i, str(k)]
df_c = df_c.drop(labels="Unnamed: 0", axis=1)
print(f"{path} Done")
df_c.to_excel(path)
else:
print(f"{path} Done")
df.to_excel(path)
if __name__ == "__main__":
main()