# -*- coding: utf-8 -*- from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression class MyLogicRegression(): def __init__(self): self.iris = load_iris() def run(self): x_train = self.iris.data y_train = self.iris.target x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.2, random_state=0, stratify=y_train) # logitic 回归的分类模型 lr = LogisticRegression() lr.fit(x_train, y_train) result = lr.predict(x_test) print('预测的结果', result) print('实际的结果', y_test) if __name__ == '__main__': my_logic_regression = MyLogicRegression() my_logic_regression.run()
123456789101112131415161718192021222324252627# -*- coding: utf-8 -*- import pandas as pd import numpy as np from sklearn.datasets import load_iris def get_data(name): ''' 获取数据 :param name: 文件名 :return:x, y ''' data_sets = pd.read_csv(name, header=None) x = data_sets.iloc[:, 0:4].values.T y = data_sets.iloc[:, 4:].values.T y = y.astype("uint8") return x, y ''' 构建一个具有1个隐藏层的神经网络,隐层的大小为10 输入层为4个特征,输出层为3个分类 (1,0,0)为第一类,(0,1,0)为第二类,(0,0,1)为第三类 ''' class MyBPNN(): def __init__(self, epochs, n_hide, n_input, n_output, learning_rate): ''' 初始化BP神经网络 :param epochs: 总训练次数 :param n_hide: 隐层节点数量 :param n_input: 输入层节点数量 :param n_output: 输出层节点数量 :param learning_rate: 学习率 ''' self.epochs = epochs self.n_hide = n_hide self.n_input = n_input self.n_output = n_output self.learning_rate = learning_rate def _initialize_parameters(self): ''' 初始化权重和偏置矩阵 :return: ''' # 保证随机数一定 np.random.seed(2) self.w1 = np.random.randn(self.n_hide, self.n_input) * 0.01 self.b1 = np.zeros(shape=(self.n_hide, 1)) self.w2 = np.random.randn(self.n_output, self.n_hide) * 0.01 self.b2 = np.zeros(shape=(self.n_output, 1)) def _forward_propagation(self): ''' 前向传播计算a2 :return: ''' self.z1 = np.dot(self.w1 , self.x_train) + self.b1 # 使用tanh作为第一层激活函数 self.a1 = np.tanh(self.z1) self.z2 = np.dot(self.w2, self.a1) + self.b2 # 使用sigmoid作为第二层激活函数 self.a2 = 1 / (1 + np.exp(-self.z2)) def _compute_cost(self): ''' 计算代价函数 :return: ''' # 使用交叉熵作为代价函数,交叉熵要求必须满足分布在[0-1]之间 log = np.multiply(np.log(self.a2), self.y_train) + np.multiply((1 - self.y_train), np.log(1 - self.a2)) self.cost = - np.sum(log) / self.number def _backward_propagation(self): ''' 反向传播(计算代价函数的导数) :return: ''' self.dz2 = self.a2 - self.y_train self.dw2 = (1 / self.number) * np.dot(self.dz2, self.a1.T) self.db2 = (1 / self.number) * np.sum(self.dz2, axis=1, keepdims=True) self.dz1 = np.multiply(np.dot(self.w2.T, self.dz2), 1 - np.power(self.a1, 2)) self.dw1 = (1 / self.number) * np.dot(self.dz1, self.x_train.T) self.db1 = (1 / self.number) * np.sum(self.dz1, axis=1, keepdims=True) def _update_param(self): self.w1 = self.w1 - self.dw1 * self.learning_rate self.b1 = self.b1 - self.db1 * self.learning_rate self.w2 = self.w2 - self.dw2 * self.learning_rate self.b2 = self.b2 - self.db2 * self.learning_rate def fit(self, x_train, y_train, print_cost = True): # 保证随机数一定 np.random.seed(3) # 加载数据 self.x_train = x_train self.y_train = y_train self.number = self.y_train.shape[1] # 初始化参数 self._initialize_parameters() # 执行梯度下降循环 for i in range(0, self.epochs): # 前向传播 self._forward_propagation() # 计算代价 self._compute_cost() # 反向传播 self._backward_propagation() # 更新参数 self._update_param() if(print_cost and ((i % 1000) == 0)): print('迭代第%i次,代价为:%f' % (i, self.cost)) def predict(self, x_test, y_test): ''' 预测结果 :param x_test: :param y_test: :return: ''' # 进行正向传播 z1 = np.dot(self.w1, x_test) + self.b1 a1 = np.tanh(z1) z2 = np.dot(self.w2, a1) + self.b2 a2 = 1 / (1 + np.exp(-z2)) # 结果的维度 n_rows = y_test.shape[0] n_cols = y_test.shape[1] # 预测值结果存储 output = np.empty(shape=(n_rows, n_cols), dtype=int) for i in range(n_rows): for j in range(n_cols): if a2[i][j] > 0.5: output[i][j] = 1 else: output[i][j] = 0 # print('预测结果:') # print(output) # print('真实结果:') # print(y_test) count = 0 for k in range(0, n_cols): if output[0][k] == y_test[0][k] and output[1][k] == y_test[1][k] and output[2][k] == y_test[2][k]: count = count + 1 else: # print(k) continue acc = count / int(y_test.shape[1]) * 100 print('测试集准确率:%.2f%%' % acc) return output if __name__ == '__main__': iris = load_iris() x_train, y_train = get_data('../Datasets/iris-train.csv') x_test, y_test = get_data('../Datasets/iris-test.csv') my_bpnn = MyBPNN(10000, 10, 4, 3, 0.4) my_bpnn.fit(x_train, y_train) result = my_bpnn.predict(x_test, y_test)
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171plt.figure(figsize=(8, 8)) tree.plot_tree(clf, filled='True', feature_names=['花萼长', '花萼宽', '花瓣长', '花瓣宽'], class_names=['山鸢尾', '变色鸢尾', '维吉尼亚鸢尾']) plt.savefig("./Flower_Tree.png", bbox_inches="tight", pad_inches=0.0) 12345
# 仅供示例 from sklearn import tree f = open('../dataSet/iris_tree.dot', 'w') tree.export_graphviz(model.get_params('DTC')['DTC'], out_file=f) 12345
# -*- coding: utf-8 -*- from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn import tree import matplotlib from matplotlib import pyplot as plt # 配置全局的matplotlib参数 matplotlib.rcParams['font.family'] = 'SimHei' matplotlib.rcParams['axes.unicode_minus'] = False class MyDecisionTree(): def __init__(self, criterion, splitter, max_depth, min_samples_split): self.iris = load_iris() self.criterion = criterion self.splitter = splitter self.max_depth = max_depth self.min_samples_split = min_samples_split def run(self): x_train = self.iris.data y_train = self.iris.target x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.2, random_state=0, stratify=y_train) clf = tree.DecisionTreeClassifier(criterion=self.criterion, splitter=self.splitter , max_depth=self.max_depth , min_samples_split=self.min_samples_split) clf.fit(x_train, y_train) print("模型参数:") print(" criterion:" + self.criterion) print(" splitter:" + self.splitter) print(" max_depth:" + str(self.max_depth)) print(" min_samples_split:" + str(self.min_samples_split)) # 训练集准确率 result = clf.predict(x_train) true_number = 0 total_number = y_train.shape[0] for x, y in zip(result, y_train): if (x == y): true_number += 1 print("训练集准确率:", true_number / total_number * 1.0) # 测试集准确率 result = clf.predict(x_test) true_number = 0 total_number = y_test.shape[0] for x,y in zip(result, y_test): if(x == y): true_number += 1 print("测试集准确率:", true_number / total_number * 1.0) plt.figure(figsize=(8, 8)) tree.plot_tree(clf, filled='True', feature_names=['花萼长', '花萼宽', '花瓣长', '花瓣宽'], class_names=['山鸢尾', '变色鸢尾', '维吉尼亚鸢尾']) plt.savefig("./Flower_Tree.png", bbox_inches="tight", pad_inches=0.0) if __name__ == '__main__': my_decision_tree = MyDecisionTree( "gini", "best", 4, 2) my_decision_tree.run()
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465# -*- coding: utf-8 -*- from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn import svm class MySVM(): def __init__(self): self.iris = load_iris() def run(self, kernel, C): x_train = self.iris.data y_train = self.iris.target x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.2, random_state=0, stratify=y_train) svm_classifier = svm.SVC(C=C, kernel=kernel, decision_function_shape='ovr') svm_classifier.fit(x_train, y_train) print("核函数:" + kernel + ",惩罚参数:" + str(C)) print("训练集准确率:", svm_classifier.score(x_train, y_train)) print("测试集准确率:", svm_classifier.score(x_test, y_test)) if __name__ == '__main__': my_svm = MySVM() my_svm.run("linear", 1.0)
1234567891011121314151617181920212223242526相关知识
花卉大数据分析与市场预测
花卉大数据分析与消费市场洞察
大数据分析技术应用于智能农业植物病虫害检测.pptx
大数据分析一下《全唐诗》,竟藏着这么多秘密!
一种基于大数据分析预测的荔枝控梢促花管理方法与流程
[中投产业研究院]:2022年中国花卉行业专业市场经营大数据分析报告
花卉大数据分析与消费市场洞察.docx
应急通信指挥,应急指挥系统,应急大数据分析,应急工作管理
虫情监测设备:实现对农田环境的实时监控和数据分析
鲜花大数据分析
网址: 大数据分析 https://m.huajiangbk.com/newsview830939.html
上一篇: 谷歌adwords开户三步骤 |
下一篇: 数据泄漏检测和溯源技术 |