윤영준 윤영준 2023-09-26
Fixed bugs : one that format of new weather datas are not matching the existing one
@96193186e2e910a581a63cc90bd8e8f684ca9992
app.py
--- app.py
+++ app.py
@@ -2,6 +2,7 @@
 from flask_restx import Api
 from auth import Auth
 from action import Action
+from datetime import datetime
 from apscheduler.schedulers.background import BackgroundScheduler
 from apscheduler.triggers.interval import IntervalTrigger
 from tools.weather_agency_api.weather_api import update_weather_info_to_today
@@ -21,7 +22,7 @@
 
 scheduler = BackgroundScheduler()
 scheduler.start()
-
+today = datetime.today().strftime('%Y-%m-%d')
 # Schedule task_function to be called every 6 hours
 scheduler.add_job(
     func=update_weather_info_to_today,
@@ -35,7 +36,7 @@
 scheduler.add_job(
     func=sarima,
     trigger=IntervalTrigger(hours=6),
-    args=("data/weather/weather_data.csv",),
+    args=("data/weather/weather_data.csv", f"{today}"),
     # comma to make it a tuple, so that python won't confuse this as a list of char
     id='weather_data_update',
     name='update weather time every 6 hours',
@@ -44,7 +45,7 @@
 
 api.add_namespace(Action, '/action')
 update_weather_info_to_today("data/weather/weather_data.csv")
-sarima("data/weather/weather_data.csv")
+sarima("data/weather/weather_data.csv",f"{today}")
 
 api.add_namespace(Auth, '/auth')
 print("Api Add Auth")
data/weather/weather_data.csv
--- data/weather/weather_data.csv
+++ data/weather/weather_data.csv
This diff is too big to display.
tools/algo/SARIMA.py
--- tools/algo/SARIMA.py
+++ tools/algo/SARIMA.py
@@ -1,5 +1,6 @@
 import pandas as pd
 import numpy as np
+import time
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
 import matplotlib.pyplot as plt
@@ -9,7 +10,7 @@
 import pickle
 
 
-def sarima(file, col_key='상대습도', future_hours=24):
+def sarima(file, save_name, col_key='상대습도', future_hours=24):
     df = pd.read_csv(file)
 
     ah = absolute_humidity(df[col_key], df["기온"])
@@ -37,15 +38,17 @@
 
     forecast_df.to_csv(f"{file.split('.')[0]}_forecast.csv", index=False)
 
-    with open('sarima_model.pkl', 'wb') as pkl_file:
+    with open(f'sarima_model_{save_name}.pkl', 'wb') as pkl_file:
         pickle.dump(model_fit, pkl_file)
 
-def forecast_from_saved_model(file, future_hours=24):
+def forecast_from_saved_model(file, model_file, future_hours=24):
     # Load the saved model
-    with open('sarima_model.pkl', 'rb') as pkl_file:
+    with open(model_file, 'rb') as pkl_file:
         loaded_model = pickle.load(pkl_file)
 
     df = pd.read_csv(file)
+    print("files loaded")
+    t1 = time.time()
 
     df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M'))
 
@@ -58,47 +61,54 @@
     })
 
     forecast_df.to_csv(f"{file.split('.')[0]}_forecast.csv", index=False)
-
+    t2 = -(t1 - time.time())
+    # print(forecast_df)
+    print(f"{t2} seconds per {future_hours}\n"
+          f"that is {future_hours/t2} per seconds")
     return forecast_df
 
 if __name__ == "__main__":
-    df = pd.read_csv("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv")
-    ah = absolute_humidity(df["상대습도"], df["기온"])
-    df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M'))
-    df["절대습도"] = ah
-    # fig = go.Figure()
-    #
-    # fig.add_trace(
-    #     go.Scatter(x=df["관측시각"], y=df["절대습도"])
-    # )
-    # fig.add_trace(
-    #     go.Scatter(x=df["관측시각"], y=signal.savgol_filter(
-    #         df["절대습도"],72,3)
-    #     ))
-    # fig.show()
-    log_df = np.log(df["상대습도"])
-    diff_1 = (log_df.diff(periods=1).iloc[1:])
-    diff_2 = diff_1.diff(periods=1).iloc[1:]
-    plot_acf(diff_2)
-    plot_pacf(diff_2)
-    plt.show()
-    model = SARIMAX(df["상대습도"], order=(2,0,2), seasonal_order=(1,1,2,24))
-    model_fit = model.fit()
-    # ARIMA_model = pm.auto_arima(df['절대습도'],
-    #                             start_p=1,
-    #                             start_q=1,
-    #                             test='adf',  # use adftest to find optimal 'd'
-    #                             max_p=3, max_q=3,  # maximum p and q
-    #                             m=24,  # frequency of series (if m==1, seasonal is set to FALSE automatically)
-    #                             d=None,  # let model determine 'd'
-    #                             D=2, #order of the seasonal differencing
-    #                             seasonal=True,  # No Seasonality for standard ARIMA
-    #                             trace=False,  # logs
-    #                             error_action='warn',  # shows errors ('ignore' silences these)
-    #                             suppress_warnings=False,
-    #                             stepwise=True)
-    print(model_fit.summary())
-    df['forecast'] = model_fit.predict(start=-100, end=-1, dynamic=True)
-    # df[['절대습도', 'forecast']].plot(figsize=(12, 8))
-    fig = px.line(df[['상대습도', 'forecast']])
-    fig.show()
(파일 끝에 줄바꿈 문자 없음)
+    # sarima("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv", "test1")
+    forecast_from_saved_model("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv",
+                              "/home/juni/PycharmProjects/failure_analysis/tools/algo/sarima_model_test1.pkl",
+                              240)
+    # df = pd.read_csv("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv")
+    # ah = absolute_humidity(df["상대습도"], df["기온"])
+    # df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M'))
+    # df["절대습도"] = ah
+    # # fig = go.Figure()
+    # #
+    # # fig.add_trace(
+    # #     go.Scatter(x=df["관측시각"], y=df["절대습도"])
+    # # )
+    # # fig.add_trace(
+    # #     go.Scatter(x=df["관측시각"], y=signal.savgol_filter(
+    # #         df["절대습도"],72,3)
+    # #     ))
+    # # fig.show()
+    # log_df = np.log(df["상대습도"])
+    # diff_1 = (log_df.diff(periods=1).iloc[1:])
+    # diff_2 = diff_1.diff(periods=1).iloc[1:]
+    # plot_acf(diff_2)
+    # plot_pacf(diff_2)
+    # plt.show()
+    # model = SARIMAX(df["상대습도"], order=(2,0,2), seasonal_order=(1,1,2,24))
+    # model_fit = model.fit()
+    # # ARIMA_model = pm.auto_arima(df['절대습도'],
+    # #                             start_p=1,
+    # #                             start_q=1,
+    # #                             test='adf',  # use adftest to find optimal 'd'
+    # #                             max_p=3, max_q=3,  # maximum p and q
+    # #                             m=24,  # frequency of series (if m==1, seasonal is set to FALSE automatically)
+    # #                             d=None,  # let model determine 'd'
+    # #                             D=2, #order of the seasonal differencing
+    # #                             seasonal=True,  # No Seasonality for standard ARIMA
+    # #                             trace=False,  # logs
+    # #                             error_action='warn',  # shows errors ('ignore' silences these)
+    # #                             suppress_warnings=False,
+    # #                             stepwise=True)
+    # print(model_fit.summary())
+    # df['forecast'] = model_fit.predict(start=-100, end=-1, dynamic=True)
+    # # df[['절대습도', 'forecast']].plot(figsize=(12, 8))
+    # fig = px.line(df[['상대습도', 'forecast']])
+    # fig.show()
(파일 끝에 줄바꿈 문자 없음)
tools/weather_agency_api/check_missing.py
--- tools/weather_agency_api/check_missing.py
+++ tools/weather_agency_api/check_missing.py
@@ -2,13 +2,15 @@
 
 def check_missing(df):
     # Convert the '관측시각' column to a datetime object
-    df['관측시각'] = pd.to_datetime(df['관측시각'], format='%Y%m%d%H%M')
+    # Also, doing copy() to prevent pass by reference in which is not intended.
+    df_copy = df.copy()
+    df_copy['관측시각'] = pd.to_datetime(df_copy['관측시각'], format='%Y%m%d%H%M')
 
     # Calculate the difference between each row and its subsequent row
-    df['time_diff'] = df['관측시각'].diff()
+    df_copy['time_diff'] = df_copy['관측시각'].diff()
 
     # Check for differences that aren't 1 hour, excluding the first row
-    errors = df[df['time_diff'] != pd.Timedelta(hours=1)][1:]
+    errors = df_copy[df_copy['time_diff'] != pd.Timedelta(hours=1)][1:]
 
     if not errors.empty:
         print("Errors found:")
tools/weather_agency_api/weather_api.py
--- tools/weather_agency_api/weather_api.py
+++ tools/weather_agency_api/weather_api.py
@@ -115,6 +115,7 @@
             buffer = StringIO(text)
             df = pd.read_csv(buffer, skiprows=2, skipfooter=1, sep=r"\s+", header=None, index_col=False,
                              engine="python").iloc[2:, :-1]
+            df = df.set_axis(weather_api_columns, axis=1, inplace=False)
             if not check_missing(df): # 빠진 정보가 있다면
                 print("api is not working !")
                 return {
Add a comment
List