cbrkit.eval.retrieval

 1from collections.abc import Sequence
 2from typing import Any, Literal
 3
 4from ..helpers import unpack_float
 5from ..retrieval import Result, ResultStep
 6from ..typing import EvalMetricFunc, Float, QueryCaseMatrix
 7from .common import DEFAULT_METRICS, compute, similarities_to_qrels
 8
 9
10def retrieval_step[Q, C, S: Float](
11    qrels: QueryCaseMatrix[Q, C, int],
12    step: ResultStep[Q, C, Any, S],
13    metrics: Sequence[str] = DEFAULT_METRICS,
14    metric_funcs: dict[str, EvalMetricFunc] | None = None,
15) -> dict[str, float]:
16    """Evaluate a single retrieval step against relevance judgments."""
17    return compute(
18        qrels,
19        {query: entry.similarities for query, entry in step.queries.items()},
20        metrics,
21        metric_funcs,
22    )
23
24
25def retrieval[Q, C, S: Float](
26    qrels: QueryCaseMatrix[Q, C, int],
27    result: Result[Q, C, Any, S],
28    metrics: Sequence[str] = DEFAULT_METRICS,
29    metric_funcs: dict[str, EvalMetricFunc] | None = None,
30) -> list[dict[str, float]]:
31    """Evaluate all retrieval steps against relevance judgments."""
32    return [
33        retrieval_step(
34            qrels,
35            step,
36            metrics,
37            metric_funcs,
38        )
39        for step in result.steps
40    ]
41
42
43def retrieval_step_to_qrels[Q, C, S: Float](
44    result: ResultStep[Q, C, Any, S],
45    max_qrel: int | None = None,
46    min_qrel: int = 1,
47    round_mode: Literal["floor", "ceil", "nearest"] = "nearest",
48    auto_scale: bool = True,
49) -> QueryCaseMatrix[Q, C, int]:
50    """Convert a retrieval step's similarities to integer relevance judgments."""
51    unpacked_sims = {
52        query: {case: unpack_float(value) for case, value in entry.similarities.items()}
53        for query, entry in result.queries.items()
54    }
55    return similarities_to_qrels(
56        unpacked_sims,
57        max_qrel,
58        min_qrel,
59        round_mode,
60        auto_scale,
61    )
62
63
64def retrieval_to_qrels[Q, C, S: Float](
65    result: Result[Q, C, Any, S],
66    max_qrel: int = 5,
67    min_qrel: int = 1,
68    round_mode: Literal["floor", "ceil", "nearest"] = "nearest",
69    auto_scale: bool = True,
70) -> list[QueryCaseMatrix[Q, C, int]]:
71    """Convert all retrieval steps' similarities to integer relevance judgments."""
72    return [
73        retrieval_step_to_qrels(
74            step,
75            max_qrel,
76            min_qrel,
77            round_mode,
78            auto_scale,
79        )
80        for step in result.steps
81    ]
def retrieval_step( qrels: QueryCaseMatrix[Q, C, int], step: cbrkit.model.ResultStep[TypeVar, TypeVar, Any, TypeVar], metrics: Sequence[str] = ('precision', 'recall', 'f1', 'map', 'ndcg', 'correctness', 'completeness'), metric_funcs: dict[str, cbrkit.typing.EvalMetricFunc] | None = None) -> dict[str, float]:
11def retrieval_step[Q, C, S: Float](
12    qrels: QueryCaseMatrix[Q, C, int],
13    step: ResultStep[Q, C, Any, S],
14    metrics: Sequence[str] = DEFAULT_METRICS,
15    metric_funcs: dict[str, EvalMetricFunc] | None = None,
16) -> dict[str, float]:
17    """Evaluate a single retrieval step against relevance judgments."""
18    return compute(
19        qrels,
20        {query: entry.similarities for query, entry in step.queries.items()},
21        metrics,
22        metric_funcs,
23    )

Evaluate a single retrieval step against relevance judgments.

def retrieval( qrels: QueryCaseMatrix[Q, C, int], result: cbrkit.model.Result[TypeVar, TypeVar, Any, TypeVar], metrics: Sequence[str] = ('precision', 'recall', 'f1', 'map', 'ndcg', 'correctness', 'completeness'), metric_funcs: dict[str, cbrkit.typing.EvalMetricFunc] | None = None) -> list[dict[str, float]]:
26def retrieval[Q, C, S: Float](
27    qrels: QueryCaseMatrix[Q, C, int],
28    result: Result[Q, C, Any, S],
29    metrics: Sequence[str] = DEFAULT_METRICS,
30    metric_funcs: dict[str, EvalMetricFunc] | None = None,
31) -> list[dict[str, float]]:
32    """Evaluate all retrieval steps against relevance judgments."""
33    return [
34        retrieval_step(
35            qrels,
36            step,
37            metrics,
38            metric_funcs,
39        )
40        for step in result.steps
41    ]

Evaluate all retrieval steps against relevance judgments.

def retrieval_step_to_qrels( result: cbrkit.model.ResultStep[TypeVar, TypeVar, Any, TypeVar], max_qrel: int | None = None, min_qrel: int = 1, round_mode: Literal['floor', 'ceil', 'nearest'] = 'nearest', auto_scale: bool = True) -> QueryCaseMatrix[Q, C, int]:
44def retrieval_step_to_qrels[Q, C, S: Float](
45    result: ResultStep[Q, C, Any, S],
46    max_qrel: int | None = None,
47    min_qrel: int = 1,
48    round_mode: Literal["floor", "ceil", "nearest"] = "nearest",
49    auto_scale: bool = True,
50) -> QueryCaseMatrix[Q, C, int]:
51    """Convert a retrieval step's similarities to integer relevance judgments."""
52    unpacked_sims = {
53        query: {case: unpack_float(value) for case, value in entry.similarities.items()}
54        for query, entry in result.queries.items()
55    }
56    return similarities_to_qrels(
57        unpacked_sims,
58        max_qrel,
59        min_qrel,
60        round_mode,
61        auto_scale,
62    )

Convert a retrieval step's similarities to integer relevance judgments.

def retrieval_to_qrels( result: cbrkit.model.Result[TypeVar, TypeVar, Any, TypeVar], max_qrel: int = 5, min_qrel: int = 1, round_mode: Literal['floor', 'ceil', 'nearest'] = 'nearest', auto_scale: bool = True) -> list[QueryCaseMatrix[Q, C, int]]:
65def retrieval_to_qrels[Q, C, S: Float](
66    result: Result[Q, C, Any, S],
67    max_qrel: int = 5,
68    min_qrel: int = 1,
69    round_mode: Literal["floor", "ceil", "nearest"] = "nearest",
70    auto_scale: bool = True,
71) -> list[QueryCaseMatrix[Q, C, int]]:
72    """Convert all retrieval steps' similarities to integer relevance judgments."""
73    return [
74        retrieval_step_to_qrels(
75            step,
76            max_qrel,
77            min_qrel,
78            round_mode,
79            auto_scale,
80        )
81        for step in result.steps
82    ]

Convert all retrieval steps' similarities to integer relevance judgments.