首页 > 分享 > 「超级干货大放送」机器学习十二种经典模型实例

「超级干货大放送」机器学习十二种经典模型实例

目录

实例一:线性回归波士顿房价

实例二:KNN实现电影分类

实例三:基于线性回归预测波士顿房价

​ 实例四:sklearn完成逻辑回归鸢尾花分类

实例五:支持向量机完成逻辑回归鸢尾花分类

实例六:使用决策树实现鸢尾花分类

实例七:使用随机森林实现鸢尾花分类

实例八:使用朴素贝叶斯进行鸢尾花分类

实例九:使用Kmeans来进行鸢尾花分类

实例十:K最近邻的使用方式

实例十一:kmeans的其他展示方式

实例十二:Kmeans实现鸢尾花聚类

实例一:线性回归波士顿房价

'''

实例一:线性回归波士顿房价【回归问题】

'''

from sklearn.datasets import load_boston

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

import matplotlib.pyplot as plt

X, y = load_boston(return_X_y=True)

X1 = X[:,5:6]

train_x, test_x, train_y, test_y = train_test_split(X1, y, test_size=0.3, random_state=2)

lr = LinearRegression()

lr.fit(train_x, train_y)

result = lr.predict(test_x)

plt.scatter(train_x, train_y, color='blue')

plt.plot(test_x, result, color='red')

plt.show()

实例二:KNN实现电影分类

'''

实例二:KNN实现电影分类【分类问题】

'''

import numpy as np

import pandas as pd

train_data = {'宝贝当家':[45,2,9,'喜剧片'],

'美人鱼':[21,17,5,'喜剧片'],

'澳门风云3':[54,9,11,'喜剧片'],

'功夫熊猫3':[39,0,31,'喜剧片'],

'谍影重重':[5,2,57,'动作片'],

'叶问3':[3,2,65,'动作片'],

'我的特工爷爷':[6,4,21,'动作片'],

'奔爱':[7,46,4,'爱情片'],

'夜孔雀':[9,39,8,'爱情片'],

'代理情人':[9,38,2,'爱情片'],

'新步步惊心':[8,34,17,'爱情片'],

'伦敦陷落':[2,3,55,'动作片']

}

train_df = pd.DataFrame(train_data).T

train_df.columns = ['搞笑镜头','拥抱镜头','打斗镜头','电影类型']

test_data = {'唐人街探案':[23,3,17]}

def euclidean_distance(vec1,vec2):

return np.sqrt(np.sum(np.square(vec1 - vec2)))

K = 3

movie = '唐人街探案'

d = []

for train_x in train_df.values[:,:-1]:

test_x = np.array(test_data[movie])

d.append(euclidean_distance(train_x,test_x))

dd = pd.DataFrame(train_df.values, index=d)

dd1 = pd.DataFrame(dd.sort_index())

print(dd1.values[:K,-1:].max())

'

实例三:基于线性回归预测波士顿房价

'''

实例三:基于线性回归预测波士顿房价

'''

import pandas as pd

from sklearn.datasets import load_boston

from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split

boston = load_boston()

X = boston.data

y = boston.target

X = MinMaxScaler().fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2020)

import numpy as np

import matplotlib.pyplot as plt

class LinearRegression:

'''线性回归算法实现'''

def __init__(self, alpha=0.1, epoch=5000, fit_bias=True):

'''

alpha: 学习率,控制参数更新的幅度

epoch: 在整个训练集上训练迭代(参数更新)的次数

fit_bias: 是否训练偏置项参数

'''

self.alpha = alpha

self.epoch = epoch

self.cost_record = []

self.fit_bias = fit_bias

def predict(self, X_test):

'''

X_test: m x n 的 numpy 二维数组

'''

if self.fit_bias:

x_0 = np.ones(X_test.shape[0])

X_test = np.column_stack((x_0, X_test))

return np.dot(X_test, self.w)

def fit(self, X_train, y_train):

'''

X_train: m x n 的 numpy 二维数组

y_train:有 m 个元素的 numpy 一维数组

'''

if self.fit_bias:

x_0 = np.ones(X_train.shape[0])

X_train = np.column_stack((x_0, X_train))

m = X_train.shape[0]

n = X_train.shape[1]

self.w = np.ones(n)

for i in range(self.epoch):

y_pred = np.dot(X_train, self.w)

cost = np.dot(y_pred - y_train, y_pred - y_train) / (2 * m)

self.cost_record.append(cost)

self.w -= self.alpha / m * np.dot(y_pred - y_train, X_train)

self.save_model()

def polt_cost(self):

plt.plot(np.arange(self.epoch), self.cost_record)

plt.xlabel("epoch")

plt.ylabel("cost")

plt.show()

def save_model(self):

np.savetxt("model.txt", self.w)

def load_model(self):

self.w = np.loadtxt("model.txt")

model = LinearRegression()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print('偏置参数:', 'ToDo')

print('特征权重:', 'ToDo')

print('预测结果:', y_pred[:5])

 实例四:sklearn完成逻辑回归鸢尾花分类

'''

实例四:sklearn完成逻辑回归鸢尾花分类

'''

from sklearn import datasets

import numpy as np

import matplotlib.pyplot as plt

from mlxtend.plotting import plot_decision_regions

iris = datasets.load_iris()

X = iris.data[:, [2, 3]]

y = iris.target

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

sc.fit(X_train)

X_train_std = sc.transform(X_train)

X_test_std = sc.transform(X_test)

from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(C=1000.0, random_state=0)

lr.fit(X_train_std, y_train)

X_combined_std = np.vstack((X_train_std, X_test_std))

y_combined = np.hstack((y_train, y_test))

plot_decision_regions(X_combined_std, y_combined, clf=lr, filler_feature_ranges=range(105, 150))

plt.xlabel('petal length [standardized]')

plt.ylabel('petal width [standardized]')

plt.legend(loc='upper left')

plt.tight_layout()

plt.show()

实例五:支持向量机完成逻辑回归鸢尾花分类

'''

实例五:支持向量机完成逻辑回归鸢尾花分类

'''

from sklearn import datasets

import numpy as np

from sklearn.svm import SVC

import matplotlib.pyplot as plt

from mlxtend.plotting import plot_decision_regions

iris = datasets.load_iris()

X = iris.data[:, [2, 3]]

y = iris.target

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)

from sklearn.preprocessing import StandardScaler

ss = StandardScaler().fit(X_train)

x_train_std = ss.transform(X_train)

x_test_std = ss.transform(X_test)

svc = SVC(kernel='rbf', random_state=0, gamma=0.2, C=1.0)

svc.fit(x_train_std,y_train)

X_combined_std = np.vstack((x_train_std, x_test_std))

y_combined = np.hstack((y_train, y_test))

plot_decision_regions(X_combined_std, y_combined, clf=svc, filler_feature_ranges=range(105, 150))

plt.xlabel('petal length [standardized]')

plt.ylabel('petal width [standardized]')

plt.legend(loc='upper left')

plt.tight_layout()

plt.show()

实例六:使用决策树实现鸢尾花分类

'''

实例六:使用决策树实现鸢尾花分类

'''

from sklearn import datasets

import numpy as np

from sklearn.svm import SVC

import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeClassifier

from mlxtend.plotting import plot_decision_regions

iris = datasets.load_iris()

X = iris.data[:, [2, 3]]

y = iris.target

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)

from sklearn.preprocessing import StandardScaler

ss = StandardScaler().fit(X_train)

x_train_std = ss.transform(X_train)

x_test_std = ss.transform(X_test)

dtc = DecisionTreeClassifier(criterion='entropy',random_state=0,max_depth=3)

dtc.fit(X_train,y_train)

X_combined_std = np.vstack((x_train_std, x_test_std))

y_combined = np.hstack((y_train, y_test))

plot_decision_regions(X_combined_std, y_combined, clf=dtc, filler_feature_ranges=range(105, 150))

plt.xlabel('petal length [standardized]')

plt.ylabel('petal width [standardized]')

plt.legend(loc='upper left')

plt.tight_layout()

plt.show()

 

实例七:使用随机森林实现鸢尾花分类

'''

实例七:使用随机森林实现鸢尾花分类

'''

from sklearn import datasets

import numpy as np

from sklearn.svm import SVC

import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from mlxtend.plotting import plot_decision_regions

iris = datasets.load_iris()

X = iris.data[:, [2, 3]]

y = iris.target

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)

from sklearn.preprocessing import StandardScaler

ss = StandardScaler().fit(X_train)

x_train_std = ss.transform(X_train)

x_test_std = ss.transform(X_test)

rfc = RandomForestClassifier(criterion='entropy',n_estimators=10, random_state=1,n_jobs=2)

rfc.fit(X_train,y_train)

X_combined_std = np.vstack((x_train_std, x_test_std))

y_combined = np.hstack((y_train, y_test))

plot_decision_regions(X_combined_std, y_combined, clf=rfc, filler_feature_ranges=range(105, 150))

plt.xlabel('petal length [standardized]')

plt.ylabel('petal width [standardized]')

plt.legend(loc='upper left')

plt.tight_layout()

plt.show()

实例八:使用朴素贝叶斯进行鸢尾花分类

'''

实例八:使用朴素贝叶斯进行鸢尾花分类

'''

from sklearn import datasets

import numpy as np

from sklearn.svm import SVC

import matplotlib.pyplot as plt

from sklearn.naive_bayes import GaussianNB

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from mlxtend.plotting import plot_decision_regions

iris = datasets.load_iris()

X = iris.data[:, [2, 3]]

y = iris.target

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)

from sklearn.preprocessing import StandardScaler

ss = StandardScaler().fit(X_train)

x_train_std = ss.transform(X_train)

x_test_std = ss.transform(X_test)

gnb = GaussianNB()

gnb.fit(X_train,y_train)

X_combined_std = np.vstack((x_train_std, x_test_std))

y_combined = np.hstack((y_train, y_test))

plot_decision_regions(X_combined_std, y_combined, clf=gnb, filler_feature_ranges=range(105, 150))

plt.xlabel('petal length [standardized]')

plt.ylabel('petal width [standardized]')

plt.legend(loc='upper left')

plt.tight_layout()

plt.show()

实例九:使用Kmeans来进行鸢尾花分类

'''

实例九:使用Kmeans来进行鸢尾花分类

'''

from sklearn import datasets

import numpy as np

from sklearn.svm import SVC

import matplotlib.pyplot as plt

from sklearn.naive_bayes import GaussianNB

from sklearn.cluster import KMeans

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from mlxtend.plotting import plot_decision_regions

iris = datasets.load_iris()

X = iris.data[:, [2, 3]]

y = iris.target

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)

from sklearn.preprocessing import StandardScaler

ss = StandardScaler().fit(X_train)

x_train_std = ss.transform(X_train)

x_test_std = ss.transform(X_test)

km = KMeans(n_clusters=3)

km.fit(X_train,y_train)

X_combined_std = np.vstack((x_train_std, x_test_std))

y_combined = np.hstack((y_train, y_test))

plot_decision_regions(X_combined_std, y_combined, clf=km, filler_feature_ranges=range(105, 150))

plt.xlabel('petal length [standardized]')

plt.ylabel('petal width [standardized]')

plt.legend(loc='upper left')

plt.tight_layout()

plt.show()

实例十:K最近邻的使用方式

'''

实例十:K最近邻的使用方式

'''

from sklearn import datasets

import numpy as np

from sklearn.svm import SVC

import matplotlib.pyplot as plt

from sklearn.naive_bayes import GaussianNB

from sklearn.cluster import KMeans

from sklearn.neighbors import KNeighborsClassifier

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from mlxtend.plotting import plot_decision_regions

iris = datasets.load_iris()

X = iris.data[:, [2, 3]]

y = iris.target

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0)

from sklearn.preprocessing import StandardScaler

ss = StandardScaler().fit(X_train)

x_train_std = ss.transform(X_train)

x_test_std = ss.transform(X_test)

knn = KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')

knn.fit(x_train_std, y_train)

X_combined_std = np.vstack((x_train_std, x_test_std))

y_combined = np.hstack((y_train, y_test))

plot_decision_regions(X_combined_std, y_combined, clf=knn, filler_feature_ranges=range(105, 150))

plt.xlabel('petal length [standardized]')

plt.ylabel('petal width [standardized]')

plt.legend(loc='upper left')

plt.tight_layout()

plt.show()

实例十一:kmeans的其他展示方式

''''

实例十一:•kmeans的其他展示方式

'''

import pandas as pd

from sklearn import datasets

from sklearn.cluster import KMeans

import matplotlib.pyplot as plt

iris = datasets.load_iris()

X = iris.data[:, [2, 3]]

y = iris.target

df = pd.DataFrame(X)

df.columns=['x','y']

df['kind'] = y

df['kind'] = y

data = iris

data1 = df

km = KMeans(n_clusters=3)

km.fit(data1)

predict = km.predict(data1)

colored = ['orange', 'green', 'pink']

col = [colored[i] for i in predict]

plt.scatter(data1['x'], data1['y'], color=col)

plt.xlabel('x')

plt.ylabel('y')

plt.show()

print(predict)

class_mapping = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}

data1['kind'] = data1['kind'].map(class_mapping)

c = [colored[i] for i in y]

plt.scatter(data1['x'], data1['y'], color=c)

plt.xlabel('x')

plt.ylabel('y')

plt.show()

 

实例十二:Kmeans实现鸢尾花聚类

import pandas as pd

from sklearn.cluster import KMeans

import matplotlib.pyplot as plt

data = pd.read_csv(r"C:Users单纯小男子Downloadsiris.csv")

data1 = data.drop(['kind'], axis=1)

km = KMeans(n_clusters=3)

km.fit(data1)

predict = km.predict(data1)

colored = ['orange', 'green', 'pink']

col = [colored[i] for i in predict]

plt.scatter(data1['x'], data1['y'], color=col)

plt.xlabel('x')

plt.ylabel('y')

plt.show()

print(predict)

class_mapping = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}

data['kind'] = data['kind'].map(class_mapping)

c = [colored[i] for i in data['kind']]

plt.scatter(data['x'], data['y'], color=c)

plt.xlabel('x')

plt.ylabel('y')

plt.show()

 

相关知识

干货来袭,谷歌最新机器学习术语表(下)
【机器学习】Sklearn 集成学习
【机器学习】鸢尾花分类:机器学习领域经典入门项目实战
干货来袭,谷歌最新机器学习术语表
机器学习之路:经典的鸢尾花数据集
《机器学习》(西瓜书)学习笔记
机器学习术语表
机器学习=编程?NO!
机器学习与因果推断
机器学习鸢尾花数据集

网址: 「超级干货大放送」机器学习十二种经典模型实例 https://m.huajiangbk.com/newsview1385939.html

所属分类:花卉
上一篇: leetcode5. 最长回文子
下一篇: 2024广州木棉花观赏点有哪些(