File name
Commit message
Commit date
2023-09-26
2023-09-26
File name
Commit message
Commit date
File name
Commit message
Commit date
2023-09-26
import pandas as pd
import numpy as np
import time
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import plotly.express as px
from tools.algo.humidity import absolute_humidity
import pickle
def sarima(file, save_name, col_key='상대습도', future_hours=24):
df = pd.read_csv(file)
ah = absolute_humidity(df[col_key], df["기온"])
df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M'))
df["절대습도"] = ah
# Use the data up to the last 'future_hours' for fitting
model = SARIMAX(df[col_key].iloc[:-future_hours], order=(1, 0, 2), seasonal_order=(0, 1, 2, 24))
model_fit = model.fit()
print(model_fit.summary())
# Forecast the next 'future_hours' and append to df
forecast_values = model_fit.forecast(steps=future_hours)
forecast_index = [df['관측시각'].iloc[-1] + timedelta(hours=i) for i in range(1, future_hours + 1)]
forecast_series = pd.Series(data=forecast_values, index=forecast_index, name='forecast')
# Forecast the next 'future_hours'
forecast_values = model_fit.forecast(steps=future_hours)
forecast_index = [df['관측시각'].iloc[-1] + timedelta(hours=i) for i in range(1, future_hours + 1)]
forecast_df = pd.DataFrame({
'관측시각': forecast_index,
'forecast': forecast_values
})
forecast_df.to_csv(f"{file.split('.')[0]}_forecast.csv", index=False)
with open(f'sarima_model_{save_name}.pkl', 'wb') as pkl_file:
pickle.dump(model_fit, pkl_file)
def forecast_from_saved_model(file, model_file, future_hours=24):
# Load the saved model
with open(model_file, 'rb') as pkl_file:
loaded_model = pickle.load(pkl_file)
df = pd.read_csv(file)
print("files loaded")
t1 = time.time()
df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M'))
# Forecast the next 'future_hours' using the loaded model
forecast_values = loaded_model.forecast(steps=future_hours)
forecast_index = [df['관측시각'].iloc[-1] + timedelta(hours=i) for i in range(1, future_hours + 1)]
forecast_df = pd.DataFrame({
'관측시각': forecast_index,
'forecast': forecast_values
})
forecast_df.to_csv(f"{file.split('.')[0]}_forecast.csv", index=False)
t2 = -(t1 - time.time())
# print(forecast_df)
print(f"{t2} seconds per {future_hours}\n"
f"that is {future_hours/t2} per seconds")
return forecast_df
if __name__ == "__main__":
# sarima("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv", "test1")
forecast_from_saved_model("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv",
"/home/juni/PycharmProjects/failure_analysis/tools/algo/sarima_model_test1.pkl",
240)
# df = pd.read_csv("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv")
# ah = absolute_humidity(df["상대습도"], df["기온"])
# df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M'))
# df["절대습도"] = ah
# # fig = go.Figure()
# #
# # fig.add_trace(
# # go.Scatter(x=df["관측시각"], y=df["절대습도"])
# # )
# # fig.add_trace(
# # go.Scatter(x=df["관측시각"], y=signal.savgol_filter(
# # df["절대습도"],72,3)
# # ))
# # fig.show()
# log_df = np.log(df["상대습도"])
# diff_1 = (log_df.diff(periods=1).iloc[1:])
# diff_2 = diff_1.diff(periods=1).iloc[1:]
# plot_acf(diff_2)
# plot_pacf(diff_2)
# plt.show()
# model = SARIMAX(df["상대습도"], order=(2,0,2), seasonal_order=(1,1,2,24))
# model_fit = model.fit()
# # ARIMA_model = pm.auto_arima(df['절대습도'],
# # start_p=1,
# # start_q=1,
# # test='adf', # use adftest to find optimal 'd'
# # max_p=3, max_q=3, # maximum p and q
# # m=24, # frequency of series (if m==1, seasonal is set to FALSE automatically)
# # d=None, # let model determine 'd'
# # D=2, #order of the seasonal differencing
# # seasonal=True, # No Seasonality for standard ARIMA
# # trace=False, # logs
# # error_action='warn', # shows errors ('ignore' silences these)
# # suppress_warnings=False,
# # stepwise=True)
# print(model_fit.summary())
# df['forecast'] = model_fit.predict(start=-100, end=-1, dynamic=True)
# # df[['절대습도', 'forecast']].plot(figsize=(12, 8))
# fig = px.line(df[['상대습도', 'forecast']])
# fig.show()