
about Tech, Computer vision and Machine learning

perl de 集合知プログラミング(6.5)

2.7 アイテムベースのフィルタリング




# 各アイテムに似ているアイテムのリストを作成する
sub calculateSimilarItems {
    my ($prefs, $n, $similarity) = @_;

    # ユーザベースのデータをアイテムベースに入れ替える
    my %itemPrefs = transformPrefs($prefs);

    # リスト作成
    my %result = ();
    my $c = 0;
    foreach my $item (keys %itemPrefs) {
        # 巨大なデータセットのときはステータス表示
        $c += 1;
        if( ($c % 100) == 0 ) {
            printf("%d / %d\n", $c, scalar(keys %itemPrefs));
        # アイテムに似ているアイテムを探す
        my @scores = topMatches(\%itemPrefs, $item, $n, $similarity);
        $result{$item} = \@scores;

    return \%result;


my $json_file = $ARGV[0] || 'critics.json';
my $critics = conv_json2perl($json_file);

print "Enter num of match items ==> ";
my $n = <STDIN>;

my $itemSim = calculateSimilarItems($critics, $n, \&sim_distance);

print "Similar items:\n";
foreach my $item (keys %{ $itemSim }) {
    print $item . ":\n";
    for(my $i=0; $i<$n; $i++) {
        foreach my $similar (keys %{ $itemSim->{$item}->[$i] }) {
            print "\t" . $similar . " " . $itemSim->{$item}->[$i]{$similar} . "\n";


Enter num of match items ==> 5
Similar items:
Just My Luck:
	Lady in the Water 0.222222222222222
	You, Me and Dupree 0.181818181818182
	The Night Listener 0.153846153846154
	Snakes on a Plane 0.105263157894737
	Superman Returns 0.0645161290322581
Snakes on a Plane:
	Lady in the Water 0.222222222222222
	The Night Listener 0.181818181818182
	Superman Returns 0.166666666666667
	Just My Luck 0.105263157894737
	You, Me and Dupree 0.0512820512820513
Lady in the Water:
	You, Me and Dupree 0.4
	The Night Listener 0.285714285714286
	Just My Luck 0.222222222222222
	Snakes on a Plane 0.222222222222222
	Superman Returns 0.0909090909090909
Superman Returns:
	Snakes on a Plane 0.166666666666667
	The Night Listener 0.102564102564103
	Lady in the Water 0.0909090909090909
	Just My Luck 0.0645161290322581
	You, Me and Dupree 0.0533333333333333
The Night Listener:
	Lady in the Water 0.285714285714286
	Snakes on a Plane 0.181818181818182
	Just My Luck 0.153846153846154
	You, Me and Dupree 0.148148148148148
	Superman Returns 0.102564102564103
You, Me and Dupree:
	Lady in the Water 0.4
	Just My Luck 0.181818181818182
	The Night Listener 0.148148148148148
	Superman Returns 0.0533333333333333
	Snakes on a Plane 0.0512820512820513
