載入套件
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
import graphviz
from sklearn.metrics import classification_report,confusion_matrix
匯入資料
urlprefix = 'https://vincentarelbundock.github.io/Rdatasets/csv/'
dataname = 'datasets/iris.csv'
iris = pd.read_csv(urlprefix + dataname)
iris = iris.drop("Unnamed: 0", 1)
iris.head()
切割為訓練集與測試集
X = iris.iloc[:, 0:4].values
y = iris.iloc[:, 4].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
建立模型(資料不需標準化)
clf = DecisionTreeClassifier(criterion = "entropy", max_depth=3, random_state=0)
clf = clf.fit(X_train, y_train)
查看結果
print(confusion_matrix(y_test,clf.predict(X_test)))
視覺化呈現
dot_data = tree.export_graphviz(clf, out_file=None)
graph = graphviz.Source(dot_data)
graph.render("iris")
dot_data = tree.export_graphviz(clf, out_file=None,
feature_names=list(iris.columns)[:4],
class_names=clf.classes_,
filled=True, rounded=True,
special_characters=True)
graph = graphviz.Source(dot_data)
graph