import pandas as pd from datetime import datetime, date from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score ,confusion_matrix# 정확도 함수 def taking_TF(user_id): df= pd.read_csv(r'D:\takensoft\project7\약상자송신데이터.csv',encoding='euc-kr') df['humantype']=df['type1'].astype(str)+df['type2'].astype(str)+df['type3'].astype(str)+df['type4'].astype(str)+df['type5'].astype(str) df['taketype']=df['take1'].astype(str)+df['take2'].astype(str)+df['take3'].astype(str)+df['take4'].astype(str)+df['take5'].astype(str) li_type = [] li_taketype = [] for i in range(0,len(df)): li_type.append(int(df['humantype'][i],2)) li_taketype.append(int(df['taketype'][i],2)) humantype=pd.DataFrame({'humantypes':li_type,'taketypes':li_taketype}) df=pd.concat([humantype,df],axis=1) df=df[df['user_id']==user_id] if len(df) <30: print("분석불가") return "분석불가" df=df.reset_index() df['date_info']=pd.to_datetime(df['date_info']) li_weekday=[] for i in range(0,len(df)): li_weekday.append(df['date_info'][i].weekday()) day=pd.DataFrame({'weekday':li_weekday}) df=pd.concat([df,day],axis=1) df['error']=df['humantypes']!=df['taketypes'] df['prev_humantypes']=df['humantypes'].shift() df['prev_taketypes']=df['taketypes'].shift() df=df[1:len(df)-1].reset_index() X= df[['temp1','temp2','temp3','temp4','weekday','prev_humantypes','prev_taketypes']] y = df['error'] train_x, test_x, train_y, test_y = train_test_split(X, y, test_size = 0.2, random_state = 42) # 학습데이터와 평가데이터의 비율을 8:2 로 분할| clf = RandomForestClassifier(n_estimators=20, max_depth=5,random_state=0) clf.fit(train_x,train_y) #print(len(train_x)) predict1 = clf.predict(test_x) print(accuracy_score(test_y,predict1)) #print(predict1) #print(confusion_matrix(test_y, predict1)) #feature_scores = pd.Series(clf.feature_importances_, index=train_x.columns).sort_values(ascending=False) #print(feature_scores[:10]) def taking_class(user_id): df= pd.read_csv(r'D:\takensoft\project7\약상자송신데이터.csv',encoding='euc-kr') df['humantype']=df['type1'].astype(str)+df['type2'].astype(str)+df['type3'].astype(str)+df['type4'].astype(str)+df['type5'].astype(str) df['taketype']=df['take1'].astype(str)+df['take2'].astype(str)+df['take3'].astype(str)+df['take4'].astype(str)+df['take5'].astype(str) li_type = [] li_taketype = [] for i in range(0,len(df)): li_type.append(int(df['humantype'][i],2)) li_taketype.append(int(df['taketype'][i],2)) humantype=pd.DataFrame({'humantypes':li_type,'taketypes':li_taketype}) df=pd.concat([humantype,df],axis=1) df=df[df['user_id']==user_id] if len(df) <30: print("분석불가") return "분석불가" df=df.reset_index() df['date_info']=pd.to_datetime(df['date_info']) li_weekday=[] for i in range(0,len(df)): li_weekday.append(df['date_info'][i].weekday()) day=pd.DataFrame({'weekday':li_weekday}) df=pd.concat([df,day],axis=1) df['error']=df['humantypes']!=df['taketypes'] df['prev_humantypes']=df['humantypes'].shift() df['prev_taketypes']=df['taketypes'].shift() df=df[1:len(df)-1].reset_index() X= df[['temp1','temp2','temp3','temp4','weekday','prev_humantypes','prev_taketypes']] y = df['taketypes'] train_x, test_x, train_y, test_y = train_test_split(X, y, test_size = 0.2, random_state = 42) # 학습데이터와 평가데이터의 비율을 8:2 로 분할| clf = RandomForestClassifier(n_estimators=20, max_depth=5,random_state=0) clf.fit(train_x,train_y) #print(len(train_x)) predict1 = clf.predict(test_x) print(accuracy_score(test_y,predict1)) #print(confusion_matrix(test_y, predict1)) #feature_scores = pd.Series(clf.feature_importances_, index=train_x.columns).sort_values(ascending=False) #print(feature_scores[:10]) taking_TF('MEMBER_000000000000584') taking_class('MEMBER_000000000000584')