import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
def moving_average(a, w=10):
if len(a) < w:
return a[:]
return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]
def plot_losses(losses):
avgloss= moving_average(losses)
plt.figure(1)
plt.subplot(211)
plt.plot(range(len(avgloss)), avgloss, 'b--')
plt.xlabel('step number')
plt.ylabel('Training loss')
plt.title('step number vs. Training loss')
plt.show()
titanic_data = pd.read_csv('csv_list/titanic3.csv')
print(titanic_data.columns)
数据之间没有任何连续性的数据称为离散数据,例如数据中的男、女。
离散数据通常可以处理为one-hot编码或者词向量,可以分为两类:
①具有固定类别的样本(性别):易于处理,按照总得类别进行变换
②没有固定类别的样本(姓名):通过hash算法或其他散列算法处理,再通过词向量技术进行转化
1.2.2 连续数据的特征数据之间具有连续性的数据,称为连续数据,例如票价与年龄
对于连续数据做特征变化时,通过对数运算or归一化处理,使其具有统一的值域
1.2.3 连续数据与离散数据的转化对于一个跨度很大的特征属性进行数据预处理时,可以有三种方法:
①按照最大值、最小值进行归一化处理
②使用对数运算
③按照分布情况将其分为几类,再做离散化处理
titanic_data = pd.concat(
[titanic_data,
pd.get_dummies(titanic_data['sex']),
pd.get_dummies(titanic_data['embarked'],prefix="embark"),
pd.get_dummies(titanic_data['pclass'],prefix="class")],axis=1
)
print(titanic_data.columns)
print(titanic_data['sex'])
print(titanic_data['female'])
1.3.2 对数据中的Nan值进行过滤填充代码---Titanic forecast.py(第3部分)对于两个具有连续属性的数据列进行Nan值处理,age与fare。
titanic_data["age"] = titanic_data["age"].fillna(titanic_data["age"].mean())
titanic_data["fare"] = titanic_data["fare"].fillna(titanic_data["fare"].mean())
1.3.3 剔除无用的数据列代码---Titanic forecast.py(第4部分)本部分剔除与遇难无关的数据列。
titanic_data = titanic_data.drop(['name','ticket','cabin','boat','body','home.dest','sex','embarked','pclass'], axis=1)
print(titanic_data.columns )
将suivived列从数据集中抽取出来,将数据列中剩下的数据作为输入样本。
labels = titanic_data["survived"].to_numpy()
titanic_data = titanic_data.drop(['survived'],axis=1)
data = titanic_data.to_numpy()
feature_names = list(titanic_data.columns)
np.random.seed(10)
train_indices = np.random.choice(len(labels),int(0.7 * len(labels)),replace = False)
print('train_indices++++',train_indices)
test_indices = list(set(range(len(labels))) - set(train_indices))
print('train_indices++++',train_indices)
train_features = data[train_indices]
train_labels = labels[train_indices]
test_features = data[test_indices]
test_labels = labels[test_indices]
print('测试样本数量',len(test_labels))
class Mish(nn.Module):
def __init__(self):
super().__init__()
def forward(self,x):
x = x * (torch.tanh(F.softplus(x)))
return x
torch.manual_seed(0)
class ThreeLinearModel(nn.Module):
def __init__(self):
super().__init__()
self.linear1 = nn.Linear(12,12)
self.mish1 = Mish()
self.linear2 = nn.Linear(12,8)
self.mish2 = Mish()
self.linear3 = nn.Linear(8,2)
self.softmax = nn.Softmax(dim = 1)
self.criterion = nn.CrossEntropyLoss()
def forward(self,x):
lin1_out = self.linear1(x)
out_1 = self.mish1(lin1_out)
out_2 = self.mish2(self.linear2(out_1))
return self.softmax(self.linear3(out_2))
def getloss(self,x,y):
y_pred = self.forward(x)
loss = self.criterion(y_pred,y)
return loss
if __name__ == '__main__':
net = ThreeLinearModel()
num_epochs = 200
optimizer = torch.optim.Adam(net.parameters(),lr = 0.04)
input_tensor = torch.from_numpy(train_features).type(torch.FloatTensor)
label_tensor = torch.from_numpy(train_labels)
losses = []
for epoch in range(num_epochs):
loss = net.getloss(input_tensor, label_tensor)
losses.append(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % 20 == 0:
print('Epoch {}/{} => Loss: {:.2f}'.format(epoch + 1, num_epochs, loss.item()))
os.makedirs('models', exist_ok=True)
torch.save(net.state_dict(), 'models/titanic_model.pt')
plot_losses(losses)
out_probs = net(input_tensor).detach().numpy()
out_classes = np.argmax(out_probs, axis=1)
print("Train Accuracy:", sum(out_classes == train_labels) / len(train_labels))
test_input_tensor = torch.from_numpy(test_features).type(torch.FloatTensor)
out_probs = net(test_input_tensor).detach().numpy()
out_classes = np.argmax(out_probs, axis=1)
print("Test Accuracy:", sum(out_classes == test_labels) / len(test_labels))
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
def moving_average(a, w=10):
if len(a) < w:
return a[:]
return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]
def plot_losses(losses):
avgloss= moving_average(losses)
plt.figure(1)
plt.subplot(211)
plt.plot(range(len(avgloss)), avgloss, 'b--')
plt.xlabel('step number')
plt.ylabel('Training loss')
plt.title('step number vs. Training loss')
plt.show()
titanic_data = pd.read_csv('csv_list/titanic3.csv')
print(titanic_data.columns)
titanic_data = pd.concat(
[titanic_data,
pd.get_dummies(titanic_data['sex']),
pd.get_dummies(titanic_data['embarked'],prefix="embark"),
pd.get_dummies(titanic_data['pclass'],prefix="class")],axis=1
)
print(titanic_data.columns)
print(titanic_data['sex'])
print(titanic_data['female'])
titanic_data["age"] = titanic_data["age"].fillna(titanic_data["age"].mean())
titanic_data["fare"] = titanic_data["fare"].fillna(titanic_data["fare"].mean())
titanic_data = titanic_data.drop(['name','ticket','cabin','boat','body','home.dest','sex','embarked','pclass'], axis=1)
print(titanic_data.columns )
labels = titanic_data["survived"].to_numpy()
titanic_data = titanic_data.drop(['survived'],axis=1)
data = titanic_data.to_numpy()
feature_names = list(titanic_data.columns)
np.random.seed(10)
train_indices = np.random.choice(len(labels),int(0.7 * len(labels)),replace = False)
print('train_indices++++',train_indices)
test_indices = list(set(range(len(labels))) - set(train_indices))
print('train_indices++++',train_indices)
train_features = data[train_indices]
train_labels = labels[train_indices]
test_features = data[test_indices]
test_labels = labels[test_indices]
print('测试样本数量',len(test_labels))
class Mish(nn.Module):
def __init__(self):
super().__init__()
def forward(self,x):
x = x * (torch.tanh(F.softplus(x)))
return x
torch.manual_seed(0)
class ThreeLinearModel(nn.Module):
def __init__(self):
super().__init__()
self.linear1 = nn.Linear(12,12)
self.mish1 = Mish()
self.linear2 = nn.Linear(12,8)
self.mish2 = Mish()
self.linear3 = nn.Linear(8,2)
self.softmax = nn.Softmax(dim = 1)
self.criterion = nn.CrossEntropyLoss()
def forward(self,x):
lin1_out = self.linear1(x)
out_1 = self.mish1(lin1_out)
out_2 = self.mish2(self.linear2(out_1))
return self.softmax(self.linear3(out_2))
def getloss(self,x,y):
y_pred = self.forward(x)
loss = self.criterion(y_pred,y)
return loss
if __name__ == '__main__':
net = ThreeLinearModel()
num_epochs = 200
optimizer = torch.optim.Adam(net.parameters(),lr = 0.04)
input_tensor = torch.from_numpy(train_features).type(torch.FloatTensor)
label_tensor = torch.from_numpy(train_labels)
losses = []
for epoch in range(num_epochs):
loss = net.getloss(input_tensor, label_tensor)
losses.append(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % 20 == 0:
print('Epoch {}/{} => Loss: {:.2f}'.format(epoch + 1, num_epochs, loss.item()))
os.makedirs('models', exist_ok=True)
torch.save(net.state_dict(), 'models/titanic_model.pt')
plot_losses(losses)
out_probs = net(input_tensor).detach().numpy()
out_classes = np.argmax(out_probs, axis=1)
print("Train Accuracy:", sum(out_classes == train_labels) / len(train_labels))
test_input_tensor = torch.from_numpy(test_features).type(torch.FloatTensor)
out_probs = net(test_input_tensor).detach().numpy()
out_classes = np.argmax(out_probs, axis=1)
print("Test Accuracy:", sum(out_classes == test_labels) / len(test_labels))
相关知识
Pytorch神经网络【手写数字识别】
Deep Learning:基于pytorch搭建神经网络的花朵种类识别项目(内涵完整文件和代码)—超详细完整实战教程
基于pytorch搭建AlexNet神经网络用于花类识别
搭建简单的神经网络——使用pytorch实现鸢尾花的分类
基于pytorch搭建VGGNet神经网络用于花类识别
基于pytorch搭建神经网络的花朵种类识别(深度学习)
基于pytorch搭建ResNet神经网络用于花类识别
使用PyTorch实现对花朵的分类
【大虾送书第二期】《Python机器学习:基于PyTorch和Scikit
BP神经网络,bp神经网络预测模型,Python源码.rar.zip资源
网址: 【Pytorch神经网络实战案例】07 预测泰坦尼克号上生存的乘客 https://m.huajiangbk.com/newsview1250566.html
上一篇: 因为采取简易计税所以当期可以抵扣 |
下一篇: 水果有什么水果 |