Fixed bugs : one that format of new weather datas are not matching the existing one
@96193186e2e910a581a63cc90bd8e8f684ca9992
--- app.py
+++ app.py
... | ... | @@ -2,6 +2,7 @@ |
2 | 2 |
from flask_restx import Api |
3 | 3 |
from auth import Auth |
4 | 4 |
from action import Action |
5 |
+from datetime import datetime |
|
5 | 6 |
from apscheduler.schedulers.background import BackgroundScheduler |
6 | 7 |
from apscheduler.triggers.interval import IntervalTrigger |
7 | 8 |
from tools.weather_agency_api.weather_api import update_weather_info_to_today |
... | ... | @@ -21,7 +22,7 @@ |
21 | 22 |
|
22 | 23 |
scheduler = BackgroundScheduler() |
23 | 24 |
scheduler.start() |
24 |
- |
|
25 |
+today = datetime.today().strftime('%Y-%m-%d') |
|
25 | 26 |
# Schedule task_function to be called every 6 hours |
26 | 27 |
scheduler.add_job( |
27 | 28 |
func=update_weather_info_to_today, |
... | ... | @@ -35,7 +36,7 @@ |
35 | 36 |
scheduler.add_job( |
36 | 37 |
func=sarima, |
37 | 38 |
trigger=IntervalTrigger(hours=6), |
38 |
- args=("data/weather/weather_data.csv",), |
|
39 |
+ args=("data/weather/weather_data.csv", f"{today}"), |
|
39 | 40 |
# comma to make it a tuple, so that python won't confuse this as a list of char |
40 | 41 |
id='weather_data_update', |
41 | 42 |
name='update weather time every 6 hours', |
... | ... | @@ -44,7 +45,7 @@ |
44 | 45 |
|
45 | 46 |
api.add_namespace(Action, '/action') |
46 | 47 |
update_weather_info_to_today("data/weather/weather_data.csv") |
47 |
-sarima("data/weather/weather_data.csv") |
|
48 |
+sarima("data/weather/weather_data.csv",f"{today}") |
|
48 | 49 |
|
49 | 50 |
api.add_namespace(Auth, '/auth') |
50 | 51 |
print("Api Add Auth") |
--- data/weather/weather_data.csv
+++ data/weather/weather_data.csv
This diff is too big to display. |
--- tools/algo/SARIMA.py
+++ tools/algo/SARIMA.py
... | ... | @@ -1,5 +1,6 @@ |
1 | 1 |
import pandas as pd |
2 | 2 |
import numpy as np |
3 |
+import time |
|
3 | 4 |
from statsmodels.tsa.statespace.sarimax import SARIMAX |
4 | 5 |
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf |
5 | 6 |
import matplotlib.pyplot as plt |
... | ... | @@ -9,7 +10,7 @@ |
9 | 10 |
import pickle |
10 | 11 |
|
11 | 12 |
|
12 |
-def sarima(file, col_key='상대습도', future_hours=24): |
|
13 |
+def sarima(file, save_name, col_key='상대습도', future_hours=24): |
|
13 | 14 |
df = pd.read_csv(file) |
14 | 15 |
|
15 | 16 |
ah = absolute_humidity(df[col_key], df["기온"]) |
... | ... | @@ -37,15 +38,17 @@ |
37 | 38 |
|
38 | 39 |
forecast_df.to_csv(f"{file.split('.')[0]}_forecast.csv", index=False) |
39 | 40 |
|
40 |
- with open('sarima_model.pkl', 'wb') as pkl_file: |
|
41 |
+ with open(f'sarima_model_{save_name}.pkl', 'wb') as pkl_file: |
|
41 | 42 |
pickle.dump(model_fit, pkl_file) |
42 | 43 |
|
43 |
-def forecast_from_saved_model(file, future_hours=24): |
|
44 |
+def forecast_from_saved_model(file, model_file, future_hours=24): |
|
44 | 45 |
# Load the saved model |
45 |
- with open('sarima_model.pkl', 'rb') as pkl_file: |
|
46 |
+ with open(model_file, 'rb') as pkl_file: |
|
46 | 47 |
loaded_model = pickle.load(pkl_file) |
47 | 48 |
|
48 | 49 |
df = pd.read_csv(file) |
50 |
+ print("files loaded") |
|
51 |
+ t1 = time.time() |
|
49 | 52 |
|
50 | 53 |
df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M')) |
51 | 54 |
|
... | ... | @@ -58,47 +61,54 @@ |
58 | 61 |
}) |
59 | 62 |
|
60 | 63 |
forecast_df.to_csv(f"{file.split('.')[0]}_forecast.csv", index=False) |
61 |
- |
|
64 |
+ t2 = -(t1 - time.time()) |
|
65 |
+ # print(forecast_df) |
|
66 |
+ print(f"{t2} seconds per {future_hours}\n" |
|
67 |
+ f"that is {future_hours/t2} per seconds") |
|
62 | 68 |
return forecast_df |
63 | 69 |
|
64 | 70 |
if __name__ == "__main__": |
65 |
- df = pd.read_csv("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv") |
|
66 |
- ah = absolute_humidity(df["상대습도"], df["기온"]) |
|
67 |
- df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M')) |
|
68 |
- df["절대습도"] = ah |
|
69 |
- # fig = go.Figure() |
|
70 |
- # |
|
71 |
- # fig.add_trace( |
|
72 |
- # go.Scatter(x=df["관측시각"], y=df["절대습도"]) |
|
73 |
- # ) |
|
74 |
- # fig.add_trace( |
|
75 |
- # go.Scatter(x=df["관측시각"], y=signal.savgol_filter( |
|
76 |
- # df["절대습도"],72,3) |
|
77 |
- # )) |
|
78 |
- # fig.show() |
|
79 |
- log_df = np.log(df["상대습도"]) |
|
80 |
- diff_1 = (log_df.diff(periods=1).iloc[1:]) |
|
81 |
- diff_2 = diff_1.diff(periods=1).iloc[1:] |
|
82 |
- plot_acf(diff_2) |
|
83 |
- plot_pacf(diff_2) |
|
84 |
- plt.show() |
|
85 |
- model = SARIMAX(df["상대습도"], order=(2,0,2), seasonal_order=(1,1,2,24)) |
|
86 |
- model_fit = model.fit() |
|
87 |
- # ARIMA_model = pm.auto_arima(df['절대습도'], |
|
88 |
- # start_p=1, |
|
89 |
- # start_q=1, |
|
90 |
- # test='adf', # use adftest to find optimal 'd' |
|
91 |
- # max_p=3, max_q=3, # maximum p and q |
|
92 |
- # m=24, # frequency of series (if m==1, seasonal is set to FALSE automatically) |
|
93 |
- # d=None, # let model determine 'd' |
|
94 |
- # D=2, #order of the seasonal differencing |
|
95 |
- # seasonal=True, # No Seasonality for standard ARIMA |
|
96 |
- # trace=False, # logs |
|
97 |
- # error_action='warn', # shows errors ('ignore' silences these) |
|
98 |
- # suppress_warnings=False, |
|
99 |
- # stepwise=True) |
|
100 |
- print(model_fit.summary()) |
|
101 |
- df['forecast'] = model_fit.predict(start=-100, end=-1, dynamic=True) |
|
102 |
- # df[['절대습도', 'forecast']].plot(figsize=(12, 8)) |
|
103 |
- fig = px.line(df[['상대습도', 'forecast']]) |
|
104 |
- fig.show()(파일 끝에 줄바꿈 문자 없음) |
|
71 |
+ # sarima("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv", "test1") |
|
72 |
+ forecast_from_saved_model("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv", |
|
73 |
+ "/home/juni/PycharmProjects/failure_analysis/tools/algo/sarima_model_test1.pkl", |
|
74 |
+ 240) |
|
75 |
+ # df = pd.read_csv("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv") |
|
76 |
+ # ah = absolute_humidity(df["상대습도"], df["기온"]) |
|
77 |
+ # df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M')) |
|
78 |
+ # df["절대습도"] = ah |
|
79 |
+ # # fig = go.Figure() |
|
80 |
+ # # |
|
81 |
+ # # fig.add_trace( |
|
82 |
+ # # go.Scatter(x=df["관측시각"], y=df["절대습도"]) |
|
83 |
+ # # ) |
|
84 |
+ # # fig.add_trace( |
|
85 |
+ # # go.Scatter(x=df["관측시각"], y=signal.savgol_filter( |
|
86 |
+ # # df["절대습도"],72,3) |
|
87 |
+ # # )) |
|
88 |
+ # # fig.show() |
|
89 |
+ # log_df = np.log(df["상대습도"]) |
|
90 |
+ # diff_1 = (log_df.diff(periods=1).iloc[1:]) |
|
91 |
+ # diff_2 = diff_1.diff(periods=1).iloc[1:] |
|
92 |
+ # plot_acf(diff_2) |
|
93 |
+ # plot_pacf(diff_2) |
|
94 |
+ # plt.show() |
|
95 |
+ # model = SARIMAX(df["상대습도"], order=(2,0,2), seasonal_order=(1,1,2,24)) |
|
96 |
+ # model_fit = model.fit() |
|
97 |
+ # # ARIMA_model = pm.auto_arima(df['절대습도'], |
|
98 |
+ # # start_p=1, |
|
99 |
+ # # start_q=1, |
|
100 |
+ # # test='adf', # use adftest to find optimal 'd' |
|
101 |
+ # # max_p=3, max_q=3, # maximum p and q |
|
102 |
+ # # m=24, # frequency of series (if m==1, seasonal is set to FALSE automatically) |
|
103 |
+ # # d=None, # let model determine 'd' |
|
104 |
+ # # D=2, #order of the seasonal differencing |
|
105 |
+ # # seasonal=True, # No Seasonality for standard ARIMA |
|
106 |
+ # # trace=False, # logs |
|
107 |
+ # # error_action='warn', # shows errors ('ignore' silences these) |
|
108 |
+ # # suppress_warnings=False, |
|
109 |
+ # # stepwise=True) |
|
110 |
+ # print(model_fit.summary()) |
|
111 |
+ # df['forecast'] = model_fit.predict(start=-100, end=-1, dynamic=True) |
|
112 |
+ # # df[['절대습도', 'forecast']].plot(figsize=(12, 8)) |
|
113 |
+ # fig = px.line(df[['상대습도', 'forecast']]) |
|
114 |
+ # fig.show()(파일 끝에 줄바꿈 문자 없음) |
--- tools/weather_agency_api/check_missing.py
+++ tools/weather_agency_api/check_missing.py
... | ... | @@ -2,13 +2,15 @@ |
2 | 2 |
|
3 | 3 |
def check_missing(df): |
4 | 4 |
# Convert the '관측시각' column to a datetime object |
5 |
- df['관측시각'] = pd.to_datetime(df['관측시각'], format='%Y%m%d%H%M') |
|
5 |
+ # Also, doing copy() to prevent pass by reference in which is not intended. |
|
6 |
+ df_copy = df.copy() |
|
7 |
+ df_copy['관측시각'] = pd.to_datetime(df_copy['관측시각'], format='%Y%m%d%H%M') |
|
6 | 8 |
|
7 | 9 |
# Calculate the difference between each row and its subsequent row |
8 |
- df['time_diff'] = df['관측시각'].diff() |
|
10 |
+ df_copy['time_diff'] = df_copy['관측시각'].diff() |
|
9 | 11 |
|
10 | 12 |
# Check for differences that aren't 1 hour, excluding the first row |
11 |
- errors = df[df['time_diff'] != pd.Timedelta(hours=1)][1:] |
|
13 |
+ errors = df_copy[df_copy['time_diff'] != pd.Timedelta(hours=1)][1:] |
|
12 | 14 |
|
13 | 15 |
if not errors.empty: |
14 | 16 |
print("Errors found:") |
--- tools/weather_agency_api/weather_api.py
+++ tools/weather_agency_api/weather_api.py
... | ... | @@ -115,6 +115,7 @@ |
115 | 115 |
buffer = StringIO(text) |
116 | 116 |
df = pd.read_csv(buffer, skiprows=2, skipfooter=1, sep=r"\s+", header=None, index_col=False, |
117 | 117 |
engine="python").iloc[2:, :-1] |
118 |
+ df = df.set_axis(weather_api_columns, axis=1, inplace=False) |
|
118 | 119 |
if not check_missing(df): # 빠진 정보가 있다면 |
119 | 120 |
print("api is not working !") |
120 | 121 |
return { |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?