Getting Started

We’re working on documentation!

For now, this example computes nDCG for an item-based k-NN collaborative filter:

import pandas as pd
from lenskit import batch, topn
from lenskit import crossfold as xf
from lenskit.algorithms import item_knn as knn

ratings = pd.read_csv('ml-100k/u.data', sep='\t',
        names=['user', 'item', 'rating', 'timestamp'])

algo = knn.ItemItem(30)

def eval(train, test):
    model = algo.train(train)
    users = test.user.unique()
    recs = batch.recommend(algo, model, users, 100,
            topn.UnratedCandidates(train))
    # combine with test ratings for relevance data
    res = pd.merge(recs, test, how='left',
                on=('user', 'item'))
    # fill in missing 0s
    res.loc[res.rating.isna(), 'rating'] = 0
    return res

# compute evaluation
splits = xf.partition_users(ratings, 5,
        xf.SampleFrac(0.2))
recs = pd.concat((eval(train, test)
                for (train, test) in splits))

# compile results
ndcg = recs.groupby('user').rating.apply(topn.ndcg)