目录
实例一:线性回归波士顿房价
实例二:KNN实现电影分类
实例三:基于线性回归预测波士顿房价
实例四:sklearn完成逻辑回归鸢尾花分类
实例五:支持向量机完成逻辑回归鸢尾花分类
实例六:使用决策树实现鸢尾花分类
实例七:使用随机森林实现鸢尾花分类
实例八:使用朴素贝叶斯进行鸢尾花分类
实例九:使用Kmeans来进行鸢尾花分类
实例十:K最近邻的使用方式
实例十一:kmeans的其他展示方式
实例十二:Kmeans实现鸢尾花聚类
'''
实例一:线性回归波士顿房价【回归问题】
'''
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
X, y = load_boston(return_X_y=True)
X1 = X[:,5:6]
train_x, test_x, train_y, test_y = train_test_split(X1, y, test_size=0.3, random_state=2)
lr = LinearRegression()
lr.fit(train_x, train_y)
result = lr.predict(test_x)
plt.scatter(train_x, train_y, color='blue')
plt.plot(test_x, result, color='red')
plt.show()
'''
实例二:KNN实现电影分类【分类问题】
'''
import numpy as np
import pandas as pd
train_data = {'宝贝当家':[45,2,9,'喜剧片'],
'美人鱼':[21,17,5,'喜剧片'],
'澳门风云3':[54,9,11,'喜剧片'],
'功夫熊猫3':[39,0,31,'喜剧片'],
'谍影重重':[5,2,57,'动作片'],
'叶问3':[3,2,65,'动作片'],
'我的特工爷爷':[6,4,21,'动作片'],
'奔爱':[7,46,4,'爱情片'],
'夜孔雀':[9,39,8,'爱情片'],
'代理情人':[9,38,2,'爱情片'],
'新步步惊心':[8,34,17,'爱情片'],
'伦敦陷落':[2,3,55,'动作片']
}
train_df = pd.DataFrame(train_data).T
train_df.columns = ['搞笑镜头','拥抱镜头','打斗镜头','电影类型']
test_data = {'唐人街探案':[23,3,17]}
def euclidean_distance(vec1,vec2):
return np.sqrt(np.sum(np.square(vec1 - vec2)))
K = 3
movie = '唐人街探案'
d = []
for train_x in train_df.values[:,:-1]:
test_x = np.array(test_data[movie])
d.append(euclidean_distance(train_x,test_x))
dd = pd.DataFrame(train_df.values, index=d)
dd1 = pd.DataFrame(dd.sort_index())
print(dd1.values[:K,-1:].max())
''''
实例三:基于线性回归预测波士顿房价
'''
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
boston = load_boston()
X = boston.data
y = boston.target
X = MinMaxScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2020)
import numpy as np
import matplotlib.pyplot as plt
class LinearRegression:
'''线性回归算法实现'''
def __init__(self, alpha=0.1, epoch=5000, fit_bias=True):
'''
alpha: 学习率,控制参数更新的幅度
epoch: 在整个训练集上训练迭代(参数更新)的次数
fit_bias: 是否训练偏置项参数
'''
self.alpha = alpha
self.epoch = epoch
self.cost_record = []
self.fit_bias = fit_bias
def predict(self, X_test):
'''
X_test: m x n 的 numpy 二维数组
'''
if self.fit_bias:
x_0 = np.ones(X_test.shape[0])
X_test = np.column_stack((x_0, X_test))
return np.dot(X_test, self.w)
def fit(self, X_train, y_train):
'''
X_train: m x n 的 numpy 二维数组
y_train:有 m 个元素的 numpy 一维数组
'''
if self.fit_bias:
x_0 = np.ones(X_train.shape[0])
X_train = np.column_stack((x_0, X_train))
m = X_train.shape[0]
n = X_train.shape[1]
self.w = np.ones(n)
for i in range(self.epoch):
y_pred = np.dot(X_train, self.w)
cost = np.dot(y_pred - y_train, y_pred - y_train) / (2 * m)
self.cost_record.append(cost)
self.w -= self.alpha / m * np.dot(y_pred - y_train, X_train)
self.save_model()
def polt_cost(self):
plt.plot(np.arange(self.epoch), self.cost_record)
plt.xlabel("epoch")
plt.ylabel("cost")
plt.show()
def save_model(self):
np.savetxt("model.txt", self.w)
def load_model(self):
self.w = np.loadtxt("model.txt")
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('偏置参数:', 'ToDo')
print('特征权重:', 'ToDo')
print('预测结果:', y_pred[:5])
'''
实例四:sklearn完成逻辑回归鸢尾花分类
'''
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(C=1000.0, random_state=0)
lr.fit(X_train_std, y_train)
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X_combined_std, y_combined, clf=lr, filler_feature_ranges=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
'''
实例五:支持向量机完成逻辑回归鸢尾花分类
'''
from sklearn import datasets
import numpy as np
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler().fit(X_train)
x_train_std = ss.transform(X_train)
x_test_std = ss.transform(X_test)
svc = SVC(kernel='rbf', random_state=0, gamma=0.2, C=1.0)
svc.fit(x_train_std,y_train)
X_combined_std = np.vstack((x_train_std, x_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X_combined_std, y_combined, clf=svc, filler_feature_ranges=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
'''
实例六:使用决策树实现鸢尾花分类
'''
from sklearn import datasets
import numpy as np
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from mlxtend.plotting import plot_decision_regions
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler().fit(X_train)
x_train_std = ss.transform(X_train)
x_test_std = ss.transform(X_test)
dtc = DecisionTreeClassifier(criterion='entropy',random_state=0,max_depth=3)
dtc.fit(X_train,y_train)
X_combined_std = np.vstack((x_train_std, x_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X_combined_std, y_combined, clf=dtc, filler_feature_ranges=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
'''
实例七:使用随机森林实现鸢尾花分类
'''
from sklearn import datasets
import numpy as np
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from mlxtend.plotting import plot_decision_regions
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler().fit(X_train)
x_train_std = ss.transform(X_train)
x_test_std = ss.transform(X_test)
rfc = RandomForestClassifier(criterion='entropy',n_estimators=10, random_state=1,n_jobs=2)
rfc.fit(X_train,y_train)
X_combined_std = np.vstack((x_train_std, x_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X_combined_std, y_combined, clf=rfc, filler_feature_ranges=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
'''
实例八:使用朴素贝叶斯进行鸢尾花分类
'''
from sklearn import datasets
import numpy as np
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from mlxtend.plotting import plot_decision_regions
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler().fit(X_train)
x_train_std = ss.transform(X_train)
x_test_std = ss.transform(X_test)
gnb = GaussianNB()
gnb.fit(X_train,y_train)
X_combined_std = np.vstack((x_train_std, x_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X_combined_std, y_combined, clf=gnb, filler_feature_ranges=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
'''
实例九:使用Kmeans来进行鸢尾花分类
'''
from sklearn import datasets
import numpy as np
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.cluster import KMeans
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from mlxtend.plotting import plot_decision_regions
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler().fit(X_train)
x_train_std = ss.transform(X_train)
x_test_std = ss.transform(X_test)
km = KMeans(n_clusters=3)
km.fit(X_train,y_train)
X_combined_std = np.vstack((x_train_std, x_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X_combined_std, y_combined, clf=km, filler_feature_ranges=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
'''
实例十:K最近邻的使用方式
'''
from sklearn import datasets
import numpy as np
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from mlxtend.plotting import plot_decision_regions
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler().fit(X_train)
x_train_std = ss.transform(X_train)
x_test_std = ss.transform(X_test)
knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')
knn.fit(x_train_std, y_train)
X_combined_std = np.vstack((x_train_std, x_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X_combined_std, y_combined, clf=knn, filler_feature_ranges=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
''''
实例十一:•kmeans的其他展示方式
'''
import pandas as pd
from sklearn import datasets
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
df = pd.DataFrame(X)
df.columns=['x','y']
df['kind'] = y
df['kind'] = y
data = iris
data1 = df
km = KMeans(n_clusters=3)
km.fit(data1)
predict = km.predict(data1)
colored = ['orange', 'green', 'pink']
col = [colored[i] for i in predict]
plt.scatter(data1['x'], data1['y'], color=col)
plt.xlabel('x')
plt.ylabel('y')
plt.show()
print(predict)
class_mapping = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
data1['kind'] = data1['kind'].map(class_mapping)
c = [colored[i] for i in y]
plt.scatter(data1['x'], data1['y'], color=c)
plt.xlabel('x')
plt.ylabel('y')
plt.show()
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
data = pd.read_csv(r"C:Users单纯小男子Downloadsiris.csv")
data1 = data.drop(['kind'], axis=1)
km = KMeans(n_clusters=3)
km.fit(data1)
predict = km.predict(data1)
colored = ['orange', 'green', 'pink']
col = [colored[i] for i in predict]
plt.scatter(data1['x'], data1['y'], color=col)
plt.xlabel('x')
plt.ylabel('y')
plt.show()
print(predict)
class_mapping = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
data['kind'] = data['kind'].map(class_mapping)
c = [colored[i] for i in data['kind']]
plt.scatter(data['x'], data['y'], color=c)
plt.xlabel('x')
plt.ylabel('y')
plt.show()
相关知识
干货来袭,谷歌最新机器学习术语表(下)
【机器学习】Sklearn 集成学习
【机器学习】鸢尾花分类:机器学习领域经典入门项目实战
干货来袭,谷歌最新机器学习术语表
机器学习之路:经典的鸢尾花数据集
《机器学习》(西瓜书)学习笔记
机器学习术语表
机器学习=编程?NO!
机器学习与因果推断
机器学习鸢尾花数据集
网址: 「超级干货大放送」机器学习十二种经典模型实例 https://m.huajiangbk.com/newsview1385939.html
上一篇: leetcode5. 最长回文子 |
下一篇: 2024广州木棉花观赏点有哪些( |