import numpy as np
import pandas as pd
data = pd.read_csv('iris.csv',header=0)
data['Species'] = data['Species'].map({'Iris-virginica':0,'Iris-setosa':1,'Iris-versicolor':2})
data.sample(10)
data.drop("id",axis=1,inplace=True)
data.duplicated().any()
data.drop_duplicates(inplace=True)
data['Species'].value_counts()
data
class KNN:
"""使用python实现K近邻算法"""
def __init__(self,k):
"""初始化方法
Parameters:
----
k:int
邻居的个数
"""
self.k = k
def fit(self, X, y):
"""训练方法
Parameters
----
X:类似数组类型,list,ndarray……形状:[样本的数量,特征的数量]
y:类似数组类型,形状为[样本数量]
每个样本的目标值,也是就是标签
"""
self.X = np.asarray(X)
self.y = np.asarray(y)
def predict(self, X):
"""根据参数传递的样本,对样本数据进行预测,返回预测之后的结果
Parameters
----
X:类似数组类型,list,ndarray……形状:[样本的数量,特征的数量]
Return
----
result:数类型,预测的结果。
"""
X = np.asarray(X)
result = []
for x in X:
dis = np.sqrt(np.sum((x - self.X) ** 2,axis = 1))
index = dis.argsort()
index = index[:self.k]
count = np.bincount(self.y[index])
result.append(count.argmax())
return np.asarray(result)
def predict2(self, X):
"""根据参数传递的样本,对样本数据进行预测(考虑权重,使用距离的倒数作为权重),返回预测之后的结果
Parameters
----
X:类似数组类型,list,ndarray……形状:[样本的数量,特征的数量]
Return
----
result:数类型,预测的结果。
"""
X = np.asarray(X)
result = []
for x in X:
dis = np.sqrt(np.sum((x - self.X) ** 2,axis = 1))
index = dis.argsort()
index = index[:self.k]
count = np.bincount(self.y[index],weights=1/dis[index])
result.append(count.argmax())
return np.asarray(result)
't0 = data[data['Species']==0]
t1 = data[data['Species']==1]
t2 = data[data['Species']==2]
t0 = t0.sample(len(t0),random_state=0)
t1 = t1.sample(len(t1),random_state=0)
t2 = t2.sample(len(t2),random_state=0)
train_X = pd.concat([t0.iloc[:40,:-1],t1.iloc[:40,:-1],t2.iloc[:40,:-1]],axis=0)
train_Y = pd.concat([t0.iloc[:40,-1],t1.iloc[:40,-1],t2.iloc[:40,-1]],axis=0)
test_X = pd.concat([t0.iloc[40:,:-1],t1.iloc[40:,:-1],t2.iloc[40:,:-1]],axis=0)
test_Y = pd.concat([t0.iloc[40:,-1],t1.iloc[40:,-1],t2.iloc[40:,-1]],axis=0)
knn = KNN(k=3)
knn.fit(train_X,train_Y)
result = knn.predict(test_X)
display(np.sum(result == test_Y))
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.family'] = 'SimHei'
mpl.rcParams['axes.unicode_minus'] = False
plt.figure(figsize=(10,10))
plt.scatter(x=t0['SepalLengthCm'][:40],y=t0['PetalLengthCm'][:40],color='r',label='Iris-virginica')
plt.scatter(x=t1['SepalLengthCm'][:40],y=t1['PetalLengthCm'][:40],color='g',label='Iris-setosa')
plt.scatter(x=t2['SepalLengthCm'][:40],y=t2['PetalLengthCm'][:40],color='b',label='Iris-versicolor')
right = test_X[result == test_Y]
wrong = test_X[result != test_Y]
plt.scatter(x=right['SepalLengthCm'],y=right['PetalLengthCm'],color='c',marker='x',label='right')
plt.scatter(x=wrong['SepalLengthCm'],y=wrong['PetalLengthCm'],color='m',marker='>',label='wrong')
plt.xlabel("花萼长度")
plt.ylabel("花瓣长度")
plt.title("KNN分类显示结果")
plt.legend(loc="best")
plt.show()
相关知识
Knn算法实现鸢尾花分类
KNN算法花的分类预测
K近邻算法和鸢尾花问题
利用KNN对150个实例对花卉进行机器培训
KNN花卉识别项目练习
【人工智能】基于分类算法的学生学业预警系统应用
基于花授粉算法优化实现SVM数据分类
js植物算法
智能分类算法在植物分类中的应用研究
实验一:鸢尾花数据集分类
网址: KNN算法分类算法 https://m.huajiangbk.com/newsview387285.html
上一篇: 机械学习将鸢尾花的特征值和特征向 |
下一篇: “None of [Int64I |