
+++ model.py
... | ... | @@ -0,0 +1,95 @@ |
1 | +import pandas as pd | |
2 | +from datetime import datetime, date | |
3 | +from sklearn.model_selection import train_test_split | |
4 | +from sklearn.ensemble import RandomForestClassifier | |
5 | +from sklearn.metrics import accuracy_score ,confusion_matrix# 정확도 함수 | |
6 | + | |
7 | +def taking_TF(user_id): | |
8 | + df= pd.read_csv(r'D:\takensoft\project7\약상자송신데이터.csv',encoding='euc-kr') | |
9 | + df['humantype']=df['type1'].astype(str)+df['type2'].astype(str)+df['type3'].astype(str)+df['type4'].astype(str)+df['type5'].astype(str) | |
10 | + df['taketype']=df['take1'].astype(str)+df['take2'].astype(str)+df['take3'].astype(str)+df['take4'].astype(str)+df['take5'].astype(str) | |
11 | + li_type = [] | |
12 | + li_taketype = [] | |
13 | + | |
14 | + for i in range(0,len(df)): | |
15 | + li_type.append(int(df['humantype'][i],2)) | |
16 | + li_taketype.append(int(df['taketype'][i],2)) | |
17 | + humantype=pd.DataFrame({'humantypes':li_type,'taketypes':li_taketype}) | |
18 | + df=pd.concat([humantype,df],axis=1) | |
19 | + | |
20 | + df=df[df['user_id']==user_id] | |
21 | + if len(df) <30: | |
22 | + print("분석불가") | |
23 | + return "분석불가" | |
24 | + df=df.reset_index() | |
25 | + | |
26 | + df['date_info']=pd.to_datetime(df['date_info']) | |
27 | + li_weekday=[] | |
28 | + for i in range(0,len(df)): | |
29 | + li_weekday.append(df['date_info'][i].weekday()) | |
30 | + day=pd.DataFrame({'weekday':li_weekday}) | |
31 | + df=pd.concat([df,day],axis=1) | |
32 | + df['error']=df['humantypes']!=df['taketypes'] | |
33 | + df['prev_humantypes']=df['humantypes'].shift() | |
34 | + df['prev_taketypes']=df['taketypes'].shift() | |
35 | + | |
36 | + df=df[1:len(df)-1].reset_index() | |
37 | + X= df[['temp1','temp2','temp3','temp4','weekday','prev_humantypes','prev_taketypes']] | |
38 | + y = df['error'] | |
39 | + train_x, test_x, train_y, test_y = train_test_split(X, y, test_size = 0.2, random_state = 42) # 학습데이터와 평가데이터의 비율을 8:2 로 분할| | |
40 | + clf = RandomForestClassifier(n_estimators=20, max_depth=5,random_state=0) | |
41 | + clf.fit(train_x,train_y) | |
42 | + #print(len(train_x)) | |
43 | + predict1 = clf.predict(test_x) | |
44 | + print(accuracy_score(test_y,predict1)) | |
45 | + #print(predict1) | |
46 | + #print(confusion_matrix(test_y, predict1)) | |
47 | + #feature_scores = pd.Series(clf.feature_importances_, index=train_x.columns).sort_values(ascending=False) | |
48 | + #print(feature_scores[:10]) | |
49 | + | |
50 | +def taking_class(user_id): | |
51 | + df= pd.read_csv(r'D:\takensoft\project7\약상자송신데이터.csv',encoding='euc-kr') | |
52 | + df['humantype']=df['type1'].astype(str)+df['type2'].astype(str)+df['type3'].astype(str)+df['type4'].astype(str)+df['type5'].astype(str) | |
53 | + df['taketype']=df['take1'].astype(str)+df['take2'].astype(str)+df['take3'].astype(str)+df['take4'].astype(str)+df['take5'].astype(str) | |
54 | + li_type = [] | |
55 | + li_taketype = [] | |
56 | + | |
57 | + for i in range(0,len(df)): | |
58 | + li_type.append(int(df['humantype'][i],2)) | |
59 | + li_taketype.append(int(df['taketype'][i],2)) | |
60 | + humantype=pd.DataFrame({'humantypes':li_type,'taketypes':li_taketype}) | |
61 | + df=pd.concat([humantype,df],axis=1) | |
62 | + | |
63 | + df=df[df['user_id']==user_id] | |
64 | + if len(df) <30: | |
65 | + print("분석불가") | |
66 | + return "분석불가" | |
67 | + df=df.reset_index() | |
68 | + | |
69 | + df['date_info']=pd.to_datetime(df['date_info']) | |
70 | + li_weekday=[] | |
71 | + for i in range(0,len(df)): | |
72 | + li_weekday.append(df['date_info'][i].weekday()) | |
73 | + day=pd.DataFrame({'weekday':li_weekday}) | |
74 | + df=pd.concat([df,day],axis=1) | |
75 | + df['error']=df['humantypes']!=df['taketypes'] | |
76 | + df['prev_humantypes']=df['humantypes'].shift() | |
77 | + df['prev_taketypes']=df['taketypes'].shift() | |
78 | + | |
79 | + df=df[1:len(df)-1].reset_index() | |
80 | + X= df[['temp1','temp2','temp3','temp4','weekday','prev_humantypes','prev_taketypes']] | |
81 | + y = df['taketypes'] | |
82 | + train_x, test_x, train_y, test_y = train_test_split(X, y, test_size = 0.2, random_state = 42) # 학습데이터와 평가데이터의 비율을 8:2 로 분할| | |
83 | + clf = RandomForestClassifier(n_estimators=20, max_depth=5,random_state=0) | |
84 | + clf.fit(train_x,train_y) | |
85 | + #print(len(train_x)) | |
86 | + predict1 = clf.predict(test_x) | |
87 | + print(accuracy_score(test_y,predict1)) | |
88 | + #print(confusion_matrix(test_y, predict1)) | |
89 | + #feature_scores = pd.Series(clf.feature_importances_, index=train_x.columns).sort_values(ascending=False) | |
90 | + #print(feature_scores[:10]) | |
91 | + | |
92 | + | |
93 | + | |
94 | +taking_TF('MEMBER_000000000000584') | |
95 | +taking_class('MEMBER_000000000000584')(No newline at end of file) |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?