cbrkit.eval.retrieval
1from collections.abc import Sequence 2from typing import Any, Literal 3 4from ..helpers import unpack_float 5from ..retrieval import Result, ResultStep 6from ..typing import EvalMetricFunc, Float, QueryCaseMatrix 7from .common import DEFAULT_METRICS, compute, similarities_to_qrels 8 9 10def retrieval_step[Q, C, S: Float]( 11 qrels: QueryCaseMatrix[Q, C, int], 12 step: ResultStep[Q, C, Any, S], 13 metrics: Sequence[str] = DEFAULT_METRICS, 14 metric_funcs: dict[str, EvalMetricFunc] | None = None, 15) -> dict[str, float]: 16 """Evaluate a single retrieval step against relevance judgments.""" 17 return compute( 18 qrels, 19 {query: entry.similarities for query, entry in step.queries.items()}, 20 metrics, 21 metric_funcs, 22 ) 23 24 25def retrieval[Q, C, S: Float]( 26 qrels: QueryCaseMatrix[Q, C, int], 27 result: Result[Q, C, Any, S], 28 metrics: Sequence[str] = DEFAULT_METRICS, 29 metric_funcs: dict[str, EvalMetricFunc] | None = None, 30) -> list[dict[str, float]]: 31 """Evaluate all retrieval steps against relevance judgments.""" 32 return [ 33 retrieval_step( 34 qrels, 35 step, 36 metrics, 37 metric_funcs, 38 ) 39 for step in result.steps 40 ] 41 42 43def retrieval_step_to_qrels[Q, C, S: Float]( 44 result: ResultStep[Q, C, Any, S], 45 max_qrel: int | None = None, 46 min_qrel: int = 1, 47 round_mode: Literal["floor", "ceil", "nearest"] = "nearest", 48 auto_scale: bool = True, 49) -> QueryCaseMatrix[Q, C, int]: 50 """Convert a retrieval step's similarities to integer relevance judgments.""" 51 unpacked_sims = { 52 query: {case: unpack_float(value) for case, value in entry.similarities.items()} 53 for query, entry in result.queries.items() 54 } 55 return similarities_to_qrels( 56 unpacked_sims, 57 max_qrel, 58 min_qrel, 59 round_mode, 60 auto_scale, 61 ) 62 63 64def retrieval_to_qrels[Q, C, S: Float]( 65 result: Result[Q, C, Any, S], 66 max_qrel: int = 5, 67 min_qrel: int = 1, 68 round_mode: Literal["floor", "ceil", "nearest"] = "nearest", 69 auto_scale: bool = True, 70) -> list[QueryCaseMatrix[Q, C, int]]: 71 """Convert all retrieval steps' similarities to integer relevance judgments.""" 72 return [ 73 retrieval_step_to_qrels( 74 step, 75 max_qrel, 76 min_qrel, 77 round_mode, 78 auto_scale, 79 ) 80 for step in result.steps 81 ]
def
retrieval_step( qrels: QueryCaseMatrix[Q, C, int], step: cbrkit.model.ResultStep[TypeVar, TypeVar, Any, TypeVar], metrics: Sequence[str] = ('precision', 'recall', 'f1', 'map', 'ndcg', 'correctness', 'completeness'), metric_funcs: dict[str, cbrkit.typing.EvalMetricFunc] | None = None) -> dict[str, float]:
11def retrieval_step[Q, C, S: Float]( 12 qrels: QueryCaseMatrix[Q, C, int], 13 step: ResultStep[Q, C, Any, S], 14 metrics: Sequence[str] = DEFAULT_METRICS, 15 metric_funcs: dict[str, EvalMetricFunc] | None = None, 16) -> dict[str, float]: 17 """Evaluate a single retrieval step against relevance judgments.""" 18 return compute( 19 qrels, 20 {query: entry.similarities for query, entry in step.queries.items()}, 21 metrics, 22 metric_funcs, 23 )
Evaluate a single retrieval step against relevance judgments.
def
retrieval( qrels: QueryCaseMatrix[Q, C, int], result: cbrkit.model.Result[TypeVar, TypeVar, Any, TypeVar], metrics: Sequence[str] = ('precision', 'recall', 'f1', 'map', 'ndcg', 'correctness', 'completeness'), metric_funcs: dict[str, cbrkit.typing.EvalMetricFunc] | None = None) -> list[dict[str, float]]:
26def retrieval[Q, C, S: Float]( 27 qrels: QueryCaseMatrix[Q, C, int], 28 result: Result[Q, C, Any, S], 29 metrics: Sequence[str] = DEFAULT_METRICS, 30 metric_funcs: dict[str, EvalMetricFunc] | None = None, 31) -> list[dict[str, float]]: 32 """Evaluate all retrieval steps against relevance judgments.""" 33 return [ 34 retrieval_step( 35 qrels, 36 step, 37 metrics, 38 metric_funcs, 39 ) 40 for step in result.steps 41 ]
Evaluate all retrieval steps against relevance judgments.
def
retrieval_step_to_qrels( result: cbrkit.model.ResultStep[TypeVar, TypeVar, Any, TypeVar], max_qrel: int | None = None, min_qrel: int = 1, round_mode: Literal['floor', 'ceil', 'nearest'] = 'nearest', auto_scale: bool = True) -> QueryCaseMatrix[Q, C, int]:
44def retrieval_step_to_qrels[Q, C, S: Float]( 45 result: ResultStep[Q, C, Any, S], 46 max_qrel: int | None = None, 47 min_qrel: int = 1, 48 round_mode: Literal["floor", "ceil", "nearest"] = "nearest", 49 auto_scale: bool = True, 50) -> QueryCaseMatrix[Q, C, int]: 51 """Convert a retrieval step's similarities to integer relevance judgments.""" 52 unpacked_sims = { 53 query: {case: unpack_float(value) for case, value in entry.similarities.items()} 54 for query, entry in result.queries.items() 55 } 56 return similarities_to_qrels( 57 unpacked_sims, 58 max_qrel, 59 min_qrel, 60 round_mode, 61 auto_scale, 62 )
Convert a retrieval step's similarities to integer relevance judgments.
def
retrieval_to_qrels( result: cbrkit.model.Result[TypeVar, TypeVar, Any, TypeVar], max_qrel: int = 5, min_qrel: int = 1, round_mode: Literal['floor', 'ceil', 'nearest'] = 'nearest', auto_scale: bool = True) -> list[QueryCaseMatrix[Q, C, int]]:
65def retrieval_to_qrels[Q, C, S: Float]( 66 result: Result[Q, C, Any, S], 67 max_qrel: int = 5, 68 min_qrel: int = 1, 69 round_mode: Literal["floor", "ceil", "nearest"] = "nearest", 70 auto_scale: bool = True, 71) -> list[QueryCaseMatrix[Q, C, int]]: 72 """Convert all retrieval steps' similarities to integer relevance judgments.""" 73 return [ 74 retrieval_step_to_qrels( 75 step, 76 max_qrel, 77 min_qrel, 78 round_mode, 79 auto_scale, 80 ) 81 for step in result.steps 82 ]
Convert all retrieval steps' similarities to integer relevance judgments.