import pandas as pd dataSet = [['A', 'C', 'D'], ['B', 'C', 'E'], ['A', 'B', 'C', 'E'], ['B', 'E']] 123456 1、生成候选的1-项集
import itertools def createC1(dataSet): # 'C1' for Candidate-itemset of 1 item. # Flatten the dataSet, leave unique item C1 = set(itertools.chain(*dataSet)) # Transform to a list of frozenset return [frozenset([i]) for i in C1] 12345678 测试
C1 = createC1(dataSet) list(C1)[:5] 12
[frozenset({'E'}), frozenset({'D'}), frozenset({'C'}), frozenset({'A'}), frozenset({'B'})] 12345 2、按条件过滤,生成频繁1-项集
def scanD(dataSet, Ck, min_support): # 'Ck' for Candidate-set of k items. support = { } # Calculate the support of all itemsets for i in dataSet: for j in Ck: if j.issubset(i): support[j] = support.get(j, 0) + 1 n = len(dataSet) # Return litemset with support return { k: v/n for k, v in support.items() if v/n >= min_support} 123456789101112131415 测试
min_support = 0.4 L1 = scanD(d1