MachineLearning

AI : 特徴量の重要度(feature importance)をPythonで可視化する

 

githubリポジトリ

AI : 特徴量の重要度(feature importance)をPythonで可視化する

 

import pandas as pd
from sklearn.datasets import load_iris
 
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set()

iris = load_iris()
df = pd.DataFrame(iris.data, columns = iris.feature_names)
df["y"] = iris.target
df.head()

y = df["y"]

X = df.iloc[:, 0:4]
X.head()


X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, shuffle=True)

model = DecisionTreeClassifier(random_state=1)
model.fit(X_train,y_train)

y_pred = model.predict(X_test)

print("accuracy_score : {}".format(accuracy_score(y_test, y_pred)))

feature_importance = model.feature_importances_
feature_importance

importances = pd.DataFrame({"features":iris.feature_names, "importances" : feature_importance})
importances


plt.figure(figsize=(12, 6))
plt.title("Iris feature inportances")
sns.barplot(x="importances", y="features", data=importances.sort_values(by="importances", ascending=False))
plt.tight_layout()