Experiment3: IR Evaluation

  • 实现代码如下
def generate_tweetid_gain(file_path):
    data_dict = {}
    with open(file_path, 'r', errors='ignore') as f:
        for line in f:
            result = line.strip().split(' ')
            query_id = result[0]
            docu_id = result[2]
            rel = int(result[3])
            if query_id not in data_dict:
                data_dict[query_id] = []
            if rel > 0:
                data_dict[query_id].append([docu_id,rel])
            else:
                data_dict[query_id].append([docu_id,0])
    return data_dict

Experiment3: IR Evaluation
import pandas as pd
import math
import numpy as np

def generate_tweetid_gain(file_path):
    data_dict = {}
    with open(file_path, 'r', errors='ignore') as f:
        for line in f:
            result = line.strip().split(' ')
            query_id = result[0]
            docu_id = result[2]
            rel = int(result[3])
            if query_id not in data_dict:
                data_dict[query_id] = []
            if rel > 0:
                data_dict[query_id].append([docu_id,rel])
            else:
                data_dict[query_id].append([docu_id,0])
    return data_dict

def MAP_eval(data_dict):
    MAP = 0
    for query_result in data_dict:
        data = data_dict[query_result]
        AP = 0
        Rank = []
        for result in data:
            rel = result[1]
            if(rel > 0):
                index = data.index(result) + 1
                Rank.append(index)

        num_related_doc = len(Rank)
        i = 1
        for index in Rank:
            precision = i / index

            AP += precision
            i += 1
        AP /= num_related_doc

        MAP += AP
    MAP = MAP / len(data_dict)
    return MAP

def MRR_eval(data_dict):
    MRR = 0
    for query_id in data_dict:
        data = data_dict[query_id]
        RR = 0
        for result in data:
            rel = result[1]
            if(rel > 0):
                index = data.index(result) + 1
                RR = 1 / index
                MRR += RR
                break

    MRR = MRR / len(data_dict)
    return MRR

def NDCG_eval(data_dict):
    NDCG = 0
    for query_id in data_dict:
        data = data_dict[query_id]
        CG = 0
        DCG = 0
        IDCG = 0

        i = 1
        for result in data:
            rel = result[1]
            CG +=  rel
            if i == 1:
                DCG = rel
            else:
                DCG += (rel / math.log2(i))
            i += 1

        sorted_data = sorted(data, key = lambda x:x[1],reverse = True)
        i = 1
        for result in sorted_data:
            rel = result[1]

            if i == 1:
                IDCG = rel
            else:
                IDCG += (rel / math.log2(i))
            i += 1

        NDCG += DCG / IDCG

    NDCG = NDCG / len(data_dict)
    return NDCG

def evaluation():

    file_path = './qrels.txt'
    data_dict = generate_tweetid_gain(file_path)
    MAP = MAP_eval(data_dict)
    print('MAP', ' = ', round(MAP, 5), sep='')
    MRR = MRR_eval(data_dict)
    print('MRR', ' = ', round(MRR, 5), sep='')
    NDCG = NDCG_eval(data_dict)
    print('NDCG', ' = ', round(NDCG, 5), sep='')

if __name__ == '__main__':
    evaluation()

Original: https://blog.csdn.net/Hzh130727/article/details/121652920
Author: AI-NuanBaobao
Title: Experiment3: IR Evaluation

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/700693/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

亲爱的 Coder【最近整理,可免费获取】👉 最新必读书单  | 👏 面试题下载  | 🌎 免费的AI知识星球