DAY 3
0
AI & Data

## 樹選手1號：decision tree [python實例]

``````from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

def score(m, x_train, y_train, x_test, y_test, train=True):
if train:
pred=m.predict(x_train)
print('Train Result:\n')
print(f"Accuracy Score: {accuracy_score(y_train, pred)*100:.2f}%")
print(f"Precision Score: {precision_score(y_train, pred)*100:.2f}%")
print(f"Recall Score: {recall_score(y_train, pred)*100:.2f}%")
print(f"F1 score: {f1_score(y_train, pred)*100:.2f}%")
print(f"Confusion Matrix:\n {confusion_matrix(y_train, pred)}")
elif train == False:
pred=m.predict(x_test)
print('Test Result:\n')
print(f"Accuracy Score: {accuracy_score(y_test, pred)*100:.2f}%")
print(f"Precision Score: {precision_score(y_test, pred)*100:.2f}%")
print(f"Recall Score: {recall_score(y_test, pred)*100:.2f}%")
print(f"F1 score: {f1_score(y_test, pred)*100:.2f}%")
print(f"Confusion Matrix:\n {confusion_matrix(y_test, pred)}")
``````

``````from sklearn import tree

tree1 = tree.DecisionTreeClassifier()
tree1 = tree1.fit(x_train, y_train)
score(tree1, x_train, y_train, x_test, y_test, train=True)
``````

``````score(tree1, x_train, y_train, x_test, y_test, train=True)
``````

1.max_depth(default=None): 限制樹的最大深度，是非常常用的參數
2.min_samples_split(default=2):限制一個中間節點最少要包含幾個樣本才可以被分支（產生一個yes/no問題）
3.min_samples_leaf(default=1):限制分支後每個子節點要最少要包含幾個樣本

``````#decide the tree depth!
depth_list = list(range(2,15))
depth_tuning = np.zeros((len(depth_list), 4))
depth_tuning[:,0] = depth_list

for index in range(len(depth_list)):
mytree = tree.DecisionTreeClassifier(max_depth=depth_list[index])
mytree = mytree.fit(x_train, y_train)
pred_test_Y = mytree.predict(x_test)
depth_tuning[index,1] = accuracy_score(y_test, pred_test_Y)
depth_tuning[index,2] = precision_score(y_test, pred_test_Y)
depth_tuning[index,3] = recall_score(y_test, pred_test_Y)

col_names = ['Max_Depth','Accuracy','Precision','Recall']
print(pd.DataFrame(depth_tuning, columns=col_names))
``````

``````tree2 = tree.DecisionTreeClassifier(max_depth=3)
tree2 = tree2.fit(x_train,y_train)
score(tree2, x_train, y_train, x_test, y_test, train=True)
``````

``````score(tree2, x_train, y_train, x_test, y_test, train=False)
``````