首页 > 分享 > 0 for key in range(label + 1)}

0 for key in range(label + 1)}

决策树使用

工具:Pycharm,win10,Python3.6.4

1.题目要求

根据如下数据使用决策树算法进行预测。

Roles Duration Audiobook Genre 5 80 no Action 15 120 yes Drama 15 100 yes Action 20 80 no Drama 5 80 no Action 12 115 yes ? 2 180 yes ?

2.Python代码

给出三种属性,判断是那种类型,这部分我们直接给出代码了。

from math import log

import operator

def calcShannon(dataSet):

num = len(dataSet)

labelCount = {}

for fea in dataSet:

currentLabel = fea[-1]

if currentLabel not in labelCount.keys():

labelCount[currentLabel] = 0

labelCount[currentLabel] += 1

shannon = 0.0

for key in labelCount:

prob = float(labelCount[key]) / num

shannon -= prob * log(prob,2)

return shannon

def createDataSet():

dataSet = [[0,0,0,0],

[1,2,1,1],

[1,1,1,0],

[2,0,0,1],

[0,0,0,0],]

labels = ['Roles','Duration','Audiobook',]

return dataSet,labels

def splitDataSet(dataSet,axis,value):

retDataSet = []

for fea in dataSet:

if fea[axis] == value:

reducedFea = fea[:axis]

reducedFea.extend(fea[axis+1:])

retDataSet.append(reducedFea)

return retDataSet

def BestFea(dataSet):

numFea = len(dataSet[0]) - 1

baseEntropy = calcShannon(dataSet)

bestInfo = 0.0

bestFeature = -1

for i in range(numFea):

feaList = [example[i] for example in dataSet]

uniqueVals = set(feaList)

newEntropy = 0.0

for value in uniqueVals:

subDataSet = splitDataSet(dataSet,i,value)

prob = len(subDataSet) / float(len(dataSet))

infoGain = baseEntropy - newEntropy

if infoGain > bestInfo:

bestInfo = infoGain

bestFeature = i

return bestFeature

def majority(classList):

classCount = {}

for vote in classList:

if vote not in classCount.keys():

classCount[vote] = 0

classCount[vote] += 1

sortedClassCount = sorted(classCount.items(),key = operator.itemgetter(1),reverse=True)

return sortedClassCount

def createTree(dataSet, labels, featLabels):

classList = [example[-1] for example in dataSet]

if classList.count(classList[0]) == len(classList):

return classList[0]

if len(dataSet[0]) == 1 or len(labels) == 0:

return majority(classList)

bestFeat = BestFea(dataSet)

bestFeatLabel = labels[bestFeat]

featLabels.append(bestFeatLabel)

myTree = {bestFeatLabel:{}}

del(labels[bestFeat])

featValues = [example[bestFeat] for example in dataSet]

uniqueVals = set(featValues)

for value in uniqueVals:

myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value), labels, featLabels)

return myTree

def classify(inputTree, featLabels, testVec):

classLabel = ''

firstStr = next(iter(inputTree))

secondDict = inputTree[firstStr]

featIndex = featLabels.index(firstStr)

for key in secondDict.keys():

if testVec[featIndex] == key:

if type(secondDict[key]).__name__ == 'dict':

classLabel = classify(secondDict[key], featLabels, testVec)

else: classLabel = secondDict[key]

return classLabel

if __name__ == '__main__':

dataSet, labels = createDataSet()

featLabels = []

myTree = createTree(dataSet, labels, featLabels)

testVec1 = [1,2,1]

result = classify(myTree, featLabels, testVec1)

if result == 1:

print('Drama')

else:

print('Action')

testVec2 = [0,2,1]

result = classify(myTree, featLabels, testVec2)

if result == 1:

print('Drama')

else:

print('Action')

相关知识

深度学习实战(1):花的分类任务|附数据集与源码
实践
基于TensorFlow的CNN卷积网络模型花卉分类(1)
对鸢尾花数据集和月亮数据集,分别采用线性LDA、k
【深度学习TPU+Keras+Tensorflow+EfficientNetB7】kaggle竞赛 使用TPU对104种花朵进行分类 第十八次尝试 99.9%准确率
sklearn
python+Tensorflow+CNN花朵分类
神经网络
花卉分类CNN
Tensorflow五种花卉分类

网址: 0 for key in range(label + 1)} https://m.huajiangbk.com/newsview398118.html

所属分类:花卉
上一篇: 社保补缴=中小企业倒闭潮?不存在
下一篇: 线性回归模型