Commit @96193186e2e910a581a63cc90bd8e8f684ca9992

윤영준 2023-09-26

Fixed bugs : one that format of new weather datas are not matching the existing one

@96193186e2e910a581a63cc90bd8e8f684ca9992

cdb9153

9619318

app.py

--- app.py

+++ app.py


 from flask_restx import Api
 from auth import Auth
 from action import Action
+from datetime import datetime
 from apscheduler.schedulers.background import BackgroundScheduler
 from apscheduler.triggers.interval import IntervalTrigger
 from tools.weather_agency_api.weather_api import update_weather_info_to_today

 
 scheduler = BackgroundScheduler()
 scheduler.start()
-
+today = datetime.today().strftime('%Y-%m-%d')
 # Schedule task_function to be called every 6 hours
 scheduler.add_job(
     func=update_weather_info_to_today,

 scheduler.add_job(
     func=sarima,
     trigger=IntervalTrigger(hours=6),
-    args=("data/weather/weather_data.csv",),
+    args=("data/weather/weather_data.csv", f"{today}"),
     # comma to make it a tuple, so that python won't confuse this as a list of char
     id='weather_data_update',
     name='update weather time every 6 hours',

 
 api.add_namespace(Action, '/action')
 update_weather_info_to_today("data/weather/weather_data.csv")
-sarima("data/weather/weather_data.csv")
+sarima("data/weather/weather_data.csv",f"{today}")
 
 api.add_namespace(Auth, '/auth')
 print("Api Add Auth")

cdb9153

9619318

data/weather/weather_data.csv

--- data/weather/weather_data.csv

+++ data/weather/weather_data.csv

This diff is too big to display.

cdb9153

9619318

tools/algo/SARIMA.py

--- tools/algo/SARIMA.py

+++ tools/algo/SARIMA.py


 import pandas as pd
 import numpy as np
+import time
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
 import matplotlib.pyplot as plt

 import pickle
 
 
-def sarima(file, col_key='상대습도', future_hours=24):
+def sarima(file, save_name, col_key='상대습도', future_hours=24):
     df = pd.read_csv(file)
 
     ah = absolute_humidity(df[col_key], df["기온"])

 
     forecast_df.to_csv(f"{file.split('.')[0]}_forecast.csv", index=False)
 
-    with open('sarima_model.pkl', 'wb') as pkl_file:
+    with open(f'sarima_model_{save_name}.pkl', 'wb') as pkl_file:
         pickle.dump(model_fit, pkl_file)
 
-def forecast_from_saved_model(file, future_hours=24):
+def forecast_from_saved_model(file, model_file, future_hours=24):
     # Load the saved model
-    with open('sarima_model.pkl', 'rb') as pkl_file:
+    with open(model_file, 'rb') as pkl_file:
         loaded_model = pickle.load(pkl_file)
 
     df = pd.read_csv(file)
+    print("files loaded")
+    t1 = time.time()
 
     df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M'))
 

     })
 
     forecast_df.to_csv(f"{file.split('.')[0]}_forecast.csv", index=False)
-
+    t2 = -(t1 - time.time())
+    # print(forecast_df)
+    print(f"{t2} seconds per {future_hours}\n"
+          f"that is {future_hours/t2} per seconds")
     return forecast_df
 
 if __name__ == "__main__":
-    df = pd.read_csv("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv")
-    ah = absolute_humidity(df["상대습도"], df["기온"])
-    df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M'))
-    df["절대습도"] = ah
-    # fig = go.Figure()
-    #
-    # fig.add_trace(
-    #     go.Scatter(x=df["관측시각"], y=df["절대습도"])
-    # )
-    # fig.add_trace(
-    #     go.Scatter(x=df["관측시각"], y=signal.savgol_filter(
-    #         df["절대습도"],72,3)
-    #     ))
-    # fig.show()
-    log_df = np.log(df["상대습도"])
-    diff_1 = (log_df.diff(periods=1).iloc[1:])
-    diff_2 = diff_1.diff(periods=1).iloc[1:]
-    plot_acf(diff_2)
-    plot_pacf(diff_2)
-    plt.show()
-    model = SARIMAX(df["상대습도"], order=(2,0,2), seasonal_order=(1,1,2,24))
-    model_fit = model.fit()
-    # ARIMA_model = pm.auto_arima(df['절대습도'],
-    #                             start_p=1,
-    #                             start_q=1,
-    #                             test='adf',  # use adftest to find optimal 'd'
-    #                             max_p=3, max_q=3,  # maximum p and q
-    #                             m=24,  # frequency of series (if m==1, seasonal is set to FALSE automatically)
-    #                             d=None,  # let model determine 'd'
-    #                             D=2, #order of the seasonal differencing
-    #                             seasonal=True,  # No Seasonality for standard ARIMA
-    #                             trace=False,  # logs
-    #                             error_action='warn',  # shows errors ('ignore' silences these)
-    #                             suppress_warnings=False,
-    #                             stepwise=True)
-    print(model_fit.summary())
-    df['forecast'] = model_fit.predict(start=-100, end=-1, dynamic=True)
-    # df[['절대습도', 'forecast']].plot(figsize=(12, 8))
-    fig = px.line(df[['상대습도', 'forecast']])
-    fig.show()
+    # sarima("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv", "test1")
+    forecast_from_saved_model("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv",
+                              "/home/juni/PycharmProjects/failure_analysis/tools/algo/sarima_model_test1.pkl",
+                              240)
+    # df = pd.read_csv("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv")
+    # ah = absolute_humidity(df["상대습도"], df["기온"])
+    # df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M'))
+    # df["절대습도"] = ah
+    # # fig = go.Figure()
+    # #
+    # # fig.add_trace(
+    # #     go.Scatter(x=df["관측시각"], y=df["절대습도"])
+    # # )
+    # # fig.add_trace(
+    # #     go.Scatter(x=df["관측시각"], y=signal.savgol_filter(
+    # #         df["절대습도"],72,3)
+    # #     ))
+    # # fig.show()
+    # log_df = np.log(df["상대습도"])
+    # diff_1 = (log_df.diff(periods=1).iloc[1:])
+    # diff_2 = diff_1.diff(periods=1).iloc[1:]
+    # plot_acf(diff_2)
+    # plot_pacf(diff_2)
+    # plt.show()
+    # model = SARIMAX(df["상대습도"], order=(2,0,2), seasonal_order=(1,1,2,24))
+    # model_fit = model.fit()
+    # # ARIMA_model = pm.auto_arima(df['절대습도'],
+    # #                             start_p=1,
+    # #                             start_q=1,
+    # #                             test='adf',  # use adftest to find optimal 'd'
+    # #                             max_p=3, max_q=3,  # maximum p and q
+    # #                             m=24,  # frequency of series (if m==1, seasonal is set to FALSE automatically)
+    # #                             d=None,  # let model determine 'd'
+    # #                             D=2, #order of the seasonal differencing
+    # #                             seasonal=True,  # No Seasonality for standard ARIMA
+    # #                             trace=False,  # logs
+    # #                             error_action='warn',  # shows errors ('ignore' silences these)
+    # #                             suppress_warnings=False,
+    # #                             stepwise=True)
+    # print(model_fit.summary())
+    # df['forecast'] = model_fit.predict(start=-100, end=-1, dynamic=True)
+    # # df[['절대습도', 'forecast']].plot(figsize=(12, 8))
+    # fig = px.line(df[['상대습도', 'forecast']])
+    # fig.show()

cdb9153

9619318

tools/weather_agency_api/check_missing.py

--- tools/weather_agency_api/check_missing.py

+++ tools/weather_agency_api/check_missing.py


 
 def check_missing(df):
     # Convert the '관측시각' column to a datetime object
-    df['관측시각'] = pd.to_datetime(df['관측시각'], format='%Y%m%d%H%M')
+    # Also, doing copy() to prevent pass by reference in which is not intended.
+    df_copy = df.copy()
+    df_copy['관측시각'] = pd.to_datetime(df_copy['관측시각'], format='%Y%m%d%H%M')
 
     # Calculate the difference between each row and its subsequent row
-    df['time_diff'] = df['관측시각'].diff()
+    df_copy['time_diff'] = df_copy['관측시각'].diff()
 
     # Check for differences that aren't 1 hour, excluding the first row
-    errors = df[df['time_diff'] != pd.Timedelta(hours=1)][1:]
+    errors = df_copy[df_copy['time_diff'] != pd.Timedelta(hours=1)][1:]
 
     if not errors.empty:
         print("Errors found:")

cdb9153

9619318

tools/weather_agency_api/weather_api.py

--- tools/weather_agency_api/weather_api.py

+++ tools/weather_agency_api/weather_api.py


             buffer = StringIO(text)
             df = pd.read_csv(buffer, skiprows=2, skipfooter=1, sep=r"\s+", header=None, index_col=False,
                              engine="python").iloc[2:, :-1]
+            df = df.set_axis(weather_api_columns, axis=1, inplace=False)
             if not check_missing(df): # 빠진 정보가 있다면
                 print("api is not working !")
                 return {

Add a comment

Open 0
Closed 0

List

...	...	@@ -2,6 +2,7 @@
2	2	from flask_restx import Api
3	3	from auth import Auth
4	4	from action import Action
	5	+from datetime import datetime
5	6	from apscheduler.schedulers.background import BackgroundScheduler
6	7	from apscheduler.triggers.interval import IntervalTrigger
7	8	from tools.weather_agency_api.weather_api import update_weather_info_to_today
...	...	@@ -21,7 +22,7 @@
21	22
22	23	scheduler = BackgroundScheduler()
23	24	scheduler.start()
24		-
	25	+today = datetime.today().strftime('%Y-%m-%d')
25	26	# Schedule task_function to be called every 6 hours
26	27	scheduler.add_job(
27	28	func=update_weather_info_to_today,
...	...	@@ -35,7 +36,7 @@
35	36	scheduler.add_job(
36	37	func=sarima,
37	38	trigger=IntervalTrigger(hours=6),
38		- args=("data/weather/weather_data.csv",),
	39	+ args=("data/weather/weather_data.csv", f"{today}"),
39	40	# comma to make it a tuple, so that python won't confuse this as a list of char
40	41	id='weather_data_update',
41	42	name='update weather time every 6 hours',
...	...	@@ -44,7 +45,7 @@
44	45
45	46	api.add_namespace(Action, '/action')
46	47	update_weather_info_to_today("data/weather/weather_data.csv")
47		-sarima("data/weather/weather_data.csv")
	48	+sarima("data/weather/weather_data.csv",f"{today}")
48	49
49	50	api.add_namespace(Auth, '/auth')
50	51	print("Api Add Auth")

...	...	@@ -115,6 +115,7 @@
115	115	buffer = StringIO(text)
116	116	df = pd.read_csv(buffer, skiprows=2, skipfooter=1, sep=r"\s+", header=None, index_col=False,
117	117	engine="python").iloc[2:, :-1]
	118	+ df = df.set_axis(weather_api_columns, axis=1, inplace=False)
118	119	if not check_missing(df): # 빠진 정보가 있다면
119	120	print("api is not working !")
120	121	return {

...	...	@@ -1,5 +1,6 @@
1	1	import pandas as pd
2	2	import numpy as np
	3	+import time
3	4	from statsmodels.tsa.statespace.sarimax import SARIMAX
4	5	from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
5	6	import matplotlib.pyplot as plt
...	...	@@ -9,7 +10,7 @@
9	10	import pickle
10	11
11	12
12		-def sarima(file, col_key='상대습도', future_hours=24):
	13	+def sarima(file, save_name, col_key='상대습도', future_hours=24):
13	14	df = pd.read_csv(file)
14	15
15	16	ah = absolute_humidity(df[col_key], df["기온"])
...	...	@@ -37,15 +38,17 @@
37	38
38	39	forecast_df.to_csv(f"{file.split('.')[0]}_forecast.csv", index=False)
39	40
40		- with open('sarima_model.pkl', 'wb') as pkl_file:
	41	+ with open(f'sarima_model_{save_name}.pkl', 'wb') as pkl_file:
41	42	pickle.dump(model_fit, pkl_file)
42	43
43		-def forecast_from_saved_model(file, future_hours=24):
	44	+def forecast_from_saved_model(file, model_file, future_hours=24):
44	45	# Load the saved model
45		- with open('sarima_model.pkl', 'rb') as pkl_file:
	46	+ with open(model_file, 'rb') as pkl_file:
46	47	loaded_model = pickle.load(pkl_file)
47	48
48	49	df = pd.read_csv(file)
	50	+ print("files loaded")
	51	+ t1 = time.time()
49	52
50	53	df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M'))
51	54
...	...	@@ -58,47 +61,54 @@
58	61	})
59	62
60	63	forecast_df.to_csv(f"{file.split('.')[0]}_forecast.csv", index=False)
61		-
	64	+ t2 = -(t1 - time.time())
	65	+ # print(forecast_df)
	66	+ print(f"{t2} seconds per {future_hours}\n"
	67	+ f"that is {future_hours/t2} per seconds")
62	68	return forecast_df
63	69
64	70	if __name__ == "__main__":
65		- df = pd.read_csv("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv")
66		- ah = absolute_humidity(df["상대습도"], df["기온"])
67		- df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M'))
68		- df["절대습도"] = ah
69		- # fig = go.Figure()
70		- #
71		- # fig.add_trace(
72		- # go.Scatter(x=df["관측시각"], y=df["절대습도"])
73		- # )
74		- # fig.add_trace(
75		- # go.Scatter(x=df["관측시각"], y=signal.savgol_filter(
76		- # df["절대습도"],72,3)
77		- # ))
78		- # fig.show()
79		- log_df = np.log(df["상대습도"])
80		- diff_1 = (log_df.diff(periods=1).iloc[1:])
81		- diff_2 = diff_1.diff(periods=1).iloc[1:]
82		- plot_acf(diff_2)
83		- plot_pacf(diff_2)
84		- plt.show()
85		- model = SARIMAX(df["상대습도"], order=(2,0,2), seasonal_order=(1,1,2,24))
86		- model_fit = model.fit()
87		- # ARIMA_model = pm.auto_arima(df['절대습도'],
88		- # start_p=1,
89		- # start_q=1,
90		- # test='adf', # use adftest to find optimal 'd'
91		- # max_p=3, max_q=3, # maximum p and q
92		- # m=24, # frequency of series (if m==1, seasonal is set to FALSE automatically)
93		- # d=None, # let model determine 'd'
94		- # D=2, #order of the seasonal differencing
95		- # seasonal=True, # No Seasonality for standard ARIMA
96		- # trace=False, # logs
97		- # error_action='warn', # shows errors ('ignore' silences these)
98		- # suppress_warnings=False,
99		- # stepwise=True)
100		- print(model_fit.summary())
101		- df['forecast'] = model_fit.predict(start=-100, end=-1, dynamic=True)
102		- # df[['절대습도', 'forecast']].plot(figsize=(12, 8))
103		- fig = px.line(df[['상대습도', 'forecast']])
104		- fig.show() (파일 끝에 줄바꿈 문자 없음)
	71	+ # sarima("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv", "test1")
	72	+ forecast_from_saved_model("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv",
	73	+ "/home/juni/PycharmProjects/failure_analysis/tools/algo/sarima_model_test1.pkl",
	74	+ 240)
	75	+ # df = pd.read_csv("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv")
	76	+ # ah = absolute_humidity(df["상대습도"], df["기온"])
	77	+ # df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M'))
	78	+ # df["절대습도"] = ah
	79	+ # # fig = go.Figure()
	80	+ # #
	81	+ # # fig.add_trace(
	82	+ # # go.Scatter(x=df["관측시각"], y=df["절대습도"])
	83	+ # # )
	84	+ # # fig.add_trace(
	85	+ # # go.Scatter(x=df["관측시각"], y=signal.savgol_filter(
	86	+ # # df["절대습도"],72,3)
	87	+ # # ))
	88	+ # # fig.show()
	89	+ # log_df = np.log(df["상대습도"])
	90	+ # diff_1 = (log_df.diff(periods=1).iloc[1:])
	91	+ # diff_2 = diff_1.diff(periods=1).iloc[1:]
	92	+ # plot_acf(diff_2)
	93	+ # plot_pacf(diff_2)
	94	+ # plt.show()
	95	+ # model = SARIMAX(df["상대습도"], order=(2,0,2), seasonal_order=(1,1,2,24))
	96	+ # model_fit = model.fit()
	97	+ # # ARIMA_model = pm.auto_arima(df['절대습도'],
	98	+ # # start_p=1,
	99	+ # # start_q=1,
	100	+ # # test='adf', # use adftest to find optimal 'd'
	101	+ # # max_p=3, max_q=3, # maximum p and q
	102	+ # # m=24, # frequency of series (if m==1, seasonal is set to FALSE automatically)
	103	+ # # d=None, # let model determine 'd'
	104	+ # # D=2, #order of the seasonal differencing
	105	+ # # seasonal=True, # No Seasonality for standard ARIMA
	106	+ # # trace=False, # logs
	107	+ # # error_action='warn', # shows errors ('ignore' silences these)
	108	+ # # suppress_warnings=False,
	109	+ # # stepwise=True)
	110	+ # print(model_fit.summary())
	111	+ # df['forecast'] = model_fit.predict(start=-100, end=-1, dynamic=True)
	112	+ # # df[['절대습도', 'forecast']].plot(figsize=(12, 8))
	113	+ # fig = px.line(df[['상대습도', 'forecast']])
	114	+ # fig.show() (파일 끝에 줄바꿈 문자 없음)

...	...	@@ -2,13 +2,15 @@
2	2
3	3	def check_missing(df):
4	4	# Convert the '관측시각' column to a datetime object
5		- df['관측시각'] = pd.to_datetime(df['관측시각'], format='%Y%m%d%H%M')
	5	+ # Also, doing copy() to prevent pass by reference in which is not intended.
	6	+ df_copy = df.copy()
	7	+ df_copy['관측시각'] = pd.to_datetime(df_copy['관측시각'], format='%Y%m%d%H%M')
6	8
7	9	# Calculate the difference between each row and its subsequent row
8		- df['time_diff'] = df['관측시각'].diff()
	10	+ df_copy['time_diff'] = df_copy['관측시각'].diff()
9	11
10	12	# Check for differences that aren't 1 hour, excluding the first row
11		- errors = df[df['time_diff'] != pd.Timedelta(hours=1)][1:]
	13	+ errors = df_copy[df_copy['time_diff'] != pd.Timedelta(hours=1)][1:]
12	14
13	15	if not errors.empty:
14	16	print("Errors found:")

Delete comment