因为手上没有iris.data数据,只能通过在sklearn中加载原始数据,并将其转换为Dataframe格式
主要内容:数据分布的可视化(特征之间分布、特征内部、分类精度、热力图)
算法:决策树 随机森林
import pandas as pd
from sklearn.datasets import load_iris
import numpy as np
iris = load_iris()
X, y = iris.data, iris.target
iris_data = pd.DataFrame(np.hstack((X, y.reshape(-1, 1))),index = range(X.shape[0]),columns=['sepal_length_cm','sepal_width_cm','petal_length_cm','petal_width_cm','class'] )
import matplotlib.pyplot as plt
print(iris_data.describe())
import seaborn as sb
sb.pairplot(iris_data.dropna(),hue = 'class')
plt.figure(figsize=(10,10))
for column_index,column in enumerate(iris_data.columns):
if column == 'class':
continue
plt.subplot(2,2,column_index+1)
sb.violinplot(x= 'class', y=column, data = iris_data)
plt.show()
输出:
sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm
count 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.054000 3.758667 1.198667
std 0.828066 0.433594 1.764420 0.763161
min 4.300000 2.000000 1.000000 0.100000
25% 5.100000 2.800000 1.600000 0.300000
50% 5.800000 3.000000 4.350000