import numpy as np import pandas as pd from scipy.stats import pearsonr, spearmanr, kendalltau, pointbiserialr import plotly.express as px import random from tools.algo.interpolation import interpolate_value from tools.algo.humidity import absolute_humidity def correlation(weather_path, failure_path): df_weather = pd.read_csv(weather_path) df_failure = pd.read_excel(failure_path) times = df_failure["작업 시작 시간"] temp = [] humidity = [] for time in times: temp.append(interpolate_value(df_weather, timestamp=time, col_key="기온", k=6)) humidity.append(interpolate_value(df_weather, timestamp=time, col_key="상대습도", k=6)) absolute_humidity_val = absolute_humidity(humidity, temp) df_failure["기온"] = temp df_failure["상대습도"] = humidity df_failure["절대습도"] = absolute_humidity_val df_failure['불량 여부'] = np.where(df_failure['불량 여부'] == 'F', 0, df_failure['불량 여부']) df_failure['불량 여부'] = np.where(df_failure['불량 여부'] == 'T', 1, df_failure['불량 여부']) df = df_failure.copy() correlation_manufacturing_abhumidity = pointbiserialr(df["절대습도"], df["불량 여부"]) correlation_manufacturing_rehumidity = pointbiserialr(df["상대습도"], df["불량 여부"]) correlation_manufacturing_temp = pointbiserialr(df["기온"], df["불량 여부"]) correlation_df = pd.DataFrame({ 'Correlation': { '절대습도': correlation_manufacturing_abhumidity[0], '상대습도': correlation_manufacturing_rehumidity[0], '기온': correlation_manufacturing_temp[0] }, 'P-value': { '절대습도': correlation_manufacturing_abhumidity[1], '상대습도': correlation_manufacturing_rehumidity[1], '기온': correlation_manufacturing_temp[1] } }) correlation_df.to_csv("correlation analysis.csv") df_failure.to_csv("merged_data.csv") if __name__ == "__main__": df_weather = pd.read_csv("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv") df_failure = pd.read_excel("/home/juni/PycharmProjects/failure_analysis/data/failure/대옹 작업내용.xlsx") times = df_failure["작업 시작 시간"] temp = [] humidity = [] i=0 for time in times: temp.append(interpolate_value(df_weather, timestamp=time, col_key="기온", k=6)) humidity.append(interpolate_value(df_weather, timestamp=time, col_key="상대습도", k=6)) absolute_humidity_val = absolute_humidity(humidity, temp) df_failure["기온"] = temp df_failure["상대습도"] = humidity df_failure["절대습도"] = absolute_humidity_val df_failure['불량 여부'] = np.where(df_failure['불량 여부'] == 'F', 0, df_failure['불량 여부']) df_failure['불량 여부'] = np.where(df_failure['불량 여부'] == 'T', 1, df_failure['불량 여부']) df = df_failure.copy() correlation_manufacturing_abhumidity = pointbiserialr(df["절대습도"], df["불량 여부"]) correlation_manufacturing_rehumidity = pointbiserialr(df["상대습도"], df["불량 여부"]) correlation_manufacturing_temp = pointbiserialr(df["기온"], df["불량 여부"]) print(correlation_manufacturing_abhumidity) print(correlation_manufacturing_rehumidity) print(correlation_manufacturing_temp) correlation_df = pd.DataFrame({ 'Correlation': { '절대습도': correlation_manufacturing_abhumidity[0], '상대습도': correlation_manufacturing_rehumidity[0], '기온': correlation_manufacturing_temp[0] }, 'P-value': { '절대습도': correlation_manufacturing_abhumidity[1], '상대습도': correlation_manufacturing_rehumidity[1], '기온': correlation_manufacturing_temp[1] } }) df_failure.to_csv("merged_data.csv") pass