改了下他的小数据集…只用了MovieLens的大数据集:

from math import sqrt

critics = {
    '用户A': {
        '集体智慧编程': 2.5,
        '机器学习': 3.5,
        '数据之美': 3.0,
        '数据挖掘导论': 3.5,
        '深入浅出数据挖掘': 2.5,
        '数据挖掘实战': 3.0
    },
    '用户B': {
        '集体智慧编程': 3.0,
        '机器学习': 3.5,
        '数据之美': 1.5,
        '数据挖掘导论': 5.0,
        '深入浅出数据挖掘': 3.5,
        '数据挖掘实战': 3.0
    },
    '用户C': {
        '集体智慧编程': 2.5,
        '机器学习': 3.0,
        '数据挖掘导论': 3.5,
        '深入浅出数据挖掘': 4.0
    },
    '用户D': {
        '集体智慧编程': 3.5,
        '数据之美': 3.0,
        '数据挖掘导论': 4.0,
        '深入浅出数据挖掘': 4.0,
        '数据挖掘实战': 2.5
    },
    '用户E': {
        '集体智慧编程': 3.0,
        '机器学习': 4.0,
        '数据之美': 2.0,
        '数据挖掘导论': 3.0,
        '深入浅出数据挖掘': 3.0,
        '数据挖掘实战': 2.5
    },
    '用户F': {
        '集体智慧编程': 3.0,
        '机器学习': 4.0,
        '数据挖掘导论': 5.0,
        '深入浅出数据挖掘': 3.0,
        '数据挖掘实战': 2.0
    },
    '用户G': {
        '集体智慧编程': 4.5,
        '数据挖掘导论': 4.0,
        '深入浅出数据挖掘': 1.0
    },
}

# 使用指定算法返回匹配用户列表
def top_matches(prefs, person, n=5, similarity=sim_pearson):
    scores = [(similarity(prefs, person, other), other) for other in prefs if other != person]

    scores.sort()
    scores.reverse()
    return scores[0:n]


# 利用他人评价值的加权平均为某用户提供物品推荐
def get_recommendations(prefs, person, similarity=sim_pearson):
    totals = {}
    simSums = {}
    for other in prefs:
        if other == person:
            continue

        sim = similarity(prefs, person, other)

        if sim <= 0:
            continue
        for item in prefs[other]:
            if item not in prefs[person] or prefs[person][item] == 0:
                totals.setdefault(item, 0)
                # 累计增加 相似度*评分
                totals[item] += prefs[other][item] * sim
                # 相似度之和
                simSums.setdefault(item, 0)
                simSums[item] += sim

    rank_list = [(total / simSums[item], item) for item, total in totals.items()]

    rank_list.sort()
    rank_list.reverse()
    return rank_list


# 交换人与物
def transform_prefs(prefs):
    result = {}
    for person in prefs:
        for item in prefs[person]:
            result.setdefault(item, {})

            result[item][person] = prefs[person][item]
    return result


# 返回相似物品列表
def calc_similar_items(prefs, n=10):
    result = {}

    itemPref = transform_prefs(prefs)
    c = 0
    for item in itemPref:
        c += 1
        if c % 100 == 0:
            print(str(c) + " / " + str(len(itemPref)))
        scores = top_matches(itemPref, item, similarity=sim_distance)
        result[item] = scores

    return result


# 利用已存在的相似物品列表返回推荐列表
def get_recommended_items(prefs, itemMatch, user):
    userRatings = prefs[user]
    scores = {}
    totalSim = {}

    for item, rating in userRatings.items():
        for similarity, item2 in itemMatch[item]:
            if item2 in userRatings:
                continue

            scores.setdefault(item2, 0)
            scores[item2] += similarity * rating

            totalSim.setdefault(item2, 0)
            totalSim[item2] += similarity

    rank_list = [(score / totalSim[item], item) for item, score in scores.items()]

    rank_list.sort()
    rank_list.reverse()
    return rank_list


# 读取movieLens数据集
def load_movielens():
    movies = {}
    for line in open('./movieLens/u.item'):
        (id, title) = line.split('|')[0:2]
        movies[id] = title

    prefs = {}
    for line in open('./movieLens/u.data'):
        (user, movieId, rating, ts) = line.split('\t')
        prefs.setdefault(user, {})
        prefs[user][movies[movieId]] = float(rating)

    return prefs

MovieLens数据文件:movielens

几种相似度算法参见上一篇:几种相似度算法的Python实现