我用python实现的关联规则挖掘算法
2009年04月22日 09:12

空间不能上传文件,只能贴代码了

#minsupport = 0.4
#minsup_count = 5*0.4 = 2

C = ['A', 'B', 'C', 'D', 'E']
t = C
T = ['A,B,C,D', 'B,C,E', 'A,B,C,E', 'B,D,E', 'A,B,C,D']
frequent_itemset = []

def apriori_gen(inputlist, C):
    returnlist = []
    for i in inputlist:
        for j in C:
            listi = i.split(',')
            if j in listi:
                continue
            listi.append(j)
            listi.sort()
            tmpstring = ",".join(listi)
            if tmpstring in returnlist:
                continue
            returnlist.append(tmpstring)
    return returnlist

def have_infrequent_subset():
    return 1

def is_subset(A,B):
    items_in_A = A.split(',')
    items_in_B = B.split(',')
    is_subset = True
    for i in items_in_A:
        if i not in items_in_B:
            is_subset = False
            break
    return is_subset

def is_frequest_subset(itemset, T):
    occur_count = 0
    for i in T:
        if is_subset(itemset, i):
            occur_count = occur_count+1
    if occur_count>=2:
        return True
    else:
        return False

def generate_frequent(C):
    tmp_frequent_itemset = []
    for i in C:
        if is_frequest_subset(i, T):
            tmp_frequent_itemset.append(i)
    return tmp_frequent_itemset


for i in range(1,C.__len__()):
    t = generate_frequent(t)
    #print(t)
    if i!=1:
        frequent_itemset = frequent_itemset + t
    t = apriori_gen(t, C)
    #print('--------------------')

print(frequent_itemset)

max_frequent_itemset = []
max_frequent_itemset_index = []
frequent_itemset_len = frequent_itemset.__len__()
for i in range(1,frequent_itemset_len):
    for j in frequent_itemset[i:]:
        if is_subset(frequent_itemset[i-1],j):           
            max_frequent_itemset_index.append(i)
            break

for i in range(1,frequent_itemset_len+1):
    if i not in max_frequent_itemset_index:
        max_frequent_itemset.append(frequent_itemset[i-1])


print(max_frequent_itemset)

最小支持度0.4

C是item全集

T是事务集

输出的