| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| """Mean average precision metric""" |
|
|
| import evaluate |
| import datasets |
| import json |
| from ranx import Qrels, Run |
| from ranx import evaluate as ran_evaluate |
|
|
|
|
| _CITATION = """\ |
| @inproceedings{ranx, |
| author = {Elias Bassani}, |
| title = {ranx: {A} Blazing-Fast Python Library for Ranking Evaluation and Comparison}, |
| booktitle = {{ECIR} {(2)}}, |
| series = {Lecture Notes in Computer Science}, |
| volume = {13186}, |
| pages = {259--264}, |
| publisher = {Springer}, |
| year = {2022}, |
| doi = {10.1007/978-3-030-99739-7\_30} |
| } |
| """ |
|
|
| _DESCRIPTION = """\ |
| This is the mean average precision (map) metric for retrieval systems. |
| It is the average of the precision scores computer after each relevant document is got. You can refer to [here](https://amenra.github.io/ranx/metrics/#mean-average-precision) |
| """ |
|
|
|
|
| _KWARGS_DESCRIPTION = """ |
| Args: |
| predictions: dictionary of dictionaries where each dictionary consists of document relevancy scores produced by the model for a given query |
| One dictionary per query. |
| references: List of list of strings where each lists consists of the relevant document names for a given query in a sorted relevancy order. |
| The outer list is sorted from query one to n. |
| k: `int`, optional, default is None, it is to calculate map@k |
| Returns: |
| map (`float`): mean average precision score. Minimum possible value is 0. Maximum possible value is 1.0 |
| Examples: |
| |
| >>> my_new_module = evaluate.load("map") |
| >>> references= [json.dumps({"q_1":{"d_1":1, "d_2":2} }), |
| json.dumps({"q_2":{"d_2":1, "d_3":2, "d_5":3}})] |
| >>> predictions = [json.dumps({"q_1": { "d_1": 0.8, "d_2": 0.9}}), |
| json.dumps({"q_2": {"d_2": 0.9, "d_1": 0.8, "d_5": 0.7, "d_3": 0.3}})] |
| >>> results = my_new_module.compute(references=references, predictions=predictions) |
| >>> print(results) |
| {'recall': 1.0} |
| """ |
|
|
| @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) |
| class map(evaluate.Metric): |
| def _info(self): |
| return evaluate.MetricInfo( |
| |
| module_type="metric", |
| description=_DESCRIPTION, |
| citation=_CITATION, |
| inputs_description=_KWARGS_DESCRIPTION, |
| |
| features=datasets.Features({ |
| 'predictions': datasets.Value("string"), |
| 'references': datasets.Value("string") |
| }), |
| |
| reference_urls=["https://amenra.github.io/ranx/"] |
| ) |
|
|
| def _compute(self, predictions, references, k=None): |
| """Returns the scores""" |
| preds = {} |
| refs = {} |
| for pred in predictions: |
| preds = preds | json.loads(pred) |
| for ref in references: |
| refs = refs | json.loads(ref) |
| |
| run = Run(preds) |
| qrels = Qrels(refs) |
| metric = "map" if k is None else f"map@{k}" |
| map_score = ran_evaluate(qrels, run, metric) |
| return { |
| "map": map_score, |
| } |