工具:Pycharm,win10,Python3.6.4
根据如下数据使用决策树算法进行预测。
Roles Duration Audiobook Genre 5 80 no Action 15 120 yes Drama 15 100 yes Action 20 80 no Drama 5 80 no Action 12 115 yes ? 2 180 yes ?
给出三种属性,判断是那种类型,这部分我们直接给出代码了。
from math import log
import operator
def calcShannon(dataSet):
num = len(dataSet)
labelCount = {}
for fea in dataSet:
currentLabel = fea[-1]
if currentLabel not in labelCount.keys():
labelCount[currentLabel] = 0
labelCount[currentLabel] += 1
shannon = 0.0
for key in labelCount:
prob = float(labelCount[key]) / num
shannon -= prob * log(prob,2)
return shannon
def createDataSet():
dataSet = [[0,0,0,0],
[1,2,1,1],
[1,1,1,0],
[2,0,0,1],
[0,0,0,0],]
labels = ['Roles','Duration','Audiobook',]
return dataSet,labels
def splitDataSet(dataSet,axis,value):
retDataSet = []
for fea in dataSet:
if fea[axis] == value:
reducedFea = fea[:axis]
reducedFea.extend(fea[axis+1:])
retDataSet.append(reducedFea)
return retDataSet
def BestFea(dataSet):
numFea = len(dataSet[0]) - 1
baseEntropy = calcShannon(dataSet)
bestInfo = 0.0
bestFeature = -1
for i in range(numFea):
feaList = [example[i] for example in dataSet]
uniqueVals = set(feaList)
newEntropy = 0.0
for value in uniqueVals:
subDataSet = splitDataSet(dataSet,i,value)
prob = len(subDataSet) / float(len(dataSet))
infoGain = baseEntropy - newEntropy
if infoGain > bestInfo:
bestInfo = infoGain
bestFeature = i
return bestFeature
def majority(classList):
classCount = {}
for vote in classList:
if vote not in classCount.keys():
classCount[vote] = 0
classCount[vote] += 1
sortedClassCount = sorted(classCount.items(),key = operator.itemgetter(1),reverse=True)
return sortedClassCount
def createTree(dataSet, labels, featLabels):
classList = [example[-1] for example in dataSet]
if classList.count(classList[0]) == len(classList):
return classList[0]
if len(dataSet[0]) == 1 or len(labels) == 0:
return majority(classList)
bestFeat = BestFea(dataSet)
bestFeatLabel = labels[bestFeat]
featLabels.append(bestFeatLabel)
myTree = {bestFeatLabel:{}}
del(labels[bestFeat])
featValues = [example[bestFeat] for example in dataSet]
uniqueVals = set(featValues)
for value in uniqueVals:
myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value), labels, featLabels)
return myTree
def classify(inputTree, featLabels, testVec):
classLabel = ''
firstStr = next(iter(inputTree))
secondDict = inputTree[firstStr]
featIndex = featLabels.index(firstStr)
for key in secondDict.keys():
if testVec[featIndex] == key:
if type(secondDict[key]).__name__ == 'dict':
classLabel = classify(secondDict[key], featLabels, testVec)
else: classLabel = secondDict[key]
return classLabel
if __name__ == '__main__':
dataSet, labels = createDataSet()
featLabels = []
myTree = createTree(dataSet, labels, featLabels)
testVec1 = [1,2,1]
result = classify(myTree, featLabels, testVec1)
if result == 1:
print('Drama')
else:
print('Action')
testVec2 = [0,2,1]
result = classify(myTree, featLabels, testVec2)
if result == 1:
print('Drama')
else:
print('Action')
相关知识
深度学习实战(1):花的分类任务|附数据集与源码
实践
基于TensorFlow的CNN卷积网络模型花卉分类(1)
对鸢尾花数据集和月亮数据集,分别采用线性LDA、k
【深度学习TPU+Keras+Tensorflow+EfficientNetB7】kaggle竞赛 使用TPU对104种花朵进行分类 第十八次尝试 99.9%准确率
sklearn
python+Tensorflow+CNN花朵分类
神经网络
花卉分类CNN
Tensorflow五种花卉分类
网址: 0 for key in range(label + 1)} https://m.huajiangbk.com/newsview398118.html
上一篇: 社保补缴=中小企业倒闭潮?不存在 |
下一篇: 线性回归模型 |