cbrkit.synthesis.prompts

  1from collections.abc import Sequence
  2from dataclasses import dataclass, field
  3from typing import Any
  4
  5from ..dumpers import markdown
  6from ..helpers import get_value, sim_map2ranking, unpack_float, unpack_value
  7from ..typing import (
  8    Casebase,
  9    ConversionFunc,
 10    Float,
 11    JsonEntry,
 12    SimMap,
 13    StructuredValue,
 14    SynthesizerPromptFunc,
 15)
 16
 17__all__ = [
 18    "concat",
 19    "transpose",
 20    "transpose_value",
 21    "default",
 22    "pooling",
 23]
 24
 25
 26@dataclass(slots=True, frozen=True)
 27class concat[K, V, S: Float](SynthesizerPromptFunc[str, K, V, S]):
 28    """Concatenates multiple prompt functions into a single prompt."""
 29
 30    prompts: Sequence[SynthesizerPromptFunc[str, K, V, S] | str]
 31    separator: str = "\n\n"
 32
 33    def __call__(
 34        self,
 35        casebase: Casebase[K, V],
 36        query: V | None,
 37        similarities: SimMap[K, S] | None,
 38    ) -> str:
 39        return self.separator.join(
 40            prompt if isinstance(prompt, str) else prompt(casebase, query, similarities)
 41            for prompt in self.prompts
 42        )
 43
 44
 45@dataclass(slots=True, frozen=True)
 46class transpose[P, K, V1, V2, S: Float](SynthesizerPromptFunc[P, K, V1, S]):
 47    """Converts input values before passing them to a prompt function."""
 48
 49    prompt_func: SynthesizerPromptFunc[P, K, V2, S]
 50    conversion_func: ConversionFunc[V1, V2]
 51
 52    def __call__(
 53        self,
 54        casebase: Casebase[K, V1],
 55        query: V1 | None,
 56        similarities: SimMap[K, S] | None,
 57    ) -> P:
 58        return self.prompt_func(
 59            {key: self.conversion_func(value) for key, value in casebase.items()},
 60            self.conversion_func(query) if query is not None else None,
 61            similarities,
 62        )
 63
 64
 65def transpose_value[P, K, V, S: Float](
 66    func: SynthesizerPromptFunc[P, K, V, S],
 67) -> SynthesizerPromptFunc[P, K, StructuredValue[V], S]:
 68    """Wrap a prompt function to extract values from structured value inputs."""
 69    return transpose(func, get_value)
 70
 71
 72def encode[T](value: T, encoder: ConversionFunc[T, str]) -> str:
 73    """Encode a value to a string, using the encoder for non-primitive types."""
 74    if value is None:
 75        return ""
 76    elif isinstance(value, str):
 77        return value
 78    elif isinstance(value, int | float | bool):
 79        return str(value)
 80
 81    return encoder(value)
 82
 83
 84@dataclass(slots=True, frozen=True)
 85class default[V](SynthesizerPromptFunc[str, Any, V, Float]):
 86    """Produces an LLM input which provides context for the LLM to be able to perform instructions.
 87
 88    Args:
 89        instructions: Instructions for the LLM to execute on the input.
 90        encoder: Encoder function to convert the a case or query to a string.
 91        metadata: Optional metadata to include in the prompt.
 92
 93    Returns:
 94        A string to be used as an LLM input.
 95
 96    Examples:
 97        >>> prompt = default("Give me a summary of the found cars.")
 98        >>> result = prompt({"c1": "car A", "c2": "car B"}, "my query", {"c1": 0.9, "c2": 0.5})
 99        >>> "my query" in result
100        True
101    """
102
103    instructions: str | SynthesizerPromptFunc[str, Any, V, Float] | None = None
104    encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown)
105    metadata: JsonEntry | None = None
106
107    def __call__(
108        self,
109        casebase: Casebase[Any, V],
110        query: V | None,
111        similarities: SimMap[Any, Float] | None,
112    ) -> str:
113        result = ""
114
115        if isinstance(self.instructions, str):
116            result += self.instructions
117        elif self.instructions is not None:
118            result += self.instructions(casebase, query, similarities)
119
120        if query is not None:
121            result += f"""
122## Query
123
124{encode(query, self.encoder)}
125"""
126
127        result += """
128## Documents Collection
129"""
130
131        ranking = (
132            sim_map2ranking(similarities)
133            if similarities is not None
134            else list(casebase.keys())
135        )
136
137        for rank, key in enumerate(ranking, start=1):
138            if similarities is not None:
139                result += f"""
140### {key} (Rank: {rank}, Similarity: {unpack_float(similarities[key]):.3f})
141"""
142            else:
143                result += f"""
144### {key}
145"""
146
147            result += f"""
148{encode(casebase[key], self.encoder)}
149"""
150
151        if self.metadata is not None:
152            result += f"""
153## Metadata
154
155{encode(self.metadata, self.encoder)}
156"""
157
158        return result
159
160
161@dataclass(slots=True, frozen=True)
162class pooling[V](ConversionFunc[Sequence[V], str]):
163    """
164    Produces an LLM input to aggregate partial results (i.e., the LLM output for single chunks) to a final, global result.
165
166    Args:
167        instructions: Instructions for the LLM to execute on the input.
168        encoder: Encoder function to convert the a case or query to a string.
169        metadata: Optional metadata to include in the prompt.
170
171    Examples:
172        >>> prompt = pooling("Please find the best match from the following partial results.")
173        >>> result = prompt(["result A", "result B", "result C"])
174        >>> "result A" in result
175        True
176    """
177
178    instructions: str | ConversionFunc[Sequence[V], str] | None = None
179    encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown)
180    metadata: JsonEntry | None = None
181    unpack: bool = True
182
183    def __call__(
184        self,
185        values: Sequence[V],
186    ) -> str:
187        result = ""
188
189        if isinstance(self.instructions, str):
190            result += self.instructions
191        elif self.instructions is not None:
192            result += self.instructions(values)
193
194        result += """
195## Partial Results
196"""
197
198        for idx, value in enumerate(values, start=1):
199            if self.unpack:
200                value = unpack_value(value)
201
202            result += f"""
203### Result {idx}
204
205{encode(value, self.encoder)}
206"""
207
208        if self.metadata is not None:
209            result += f"""
210## Metadata
211
212{encode(self.metadata, self.encoder)}
213"""
214
215        return result
@dataclass(slots=True, frozen=True)
class concat(cbrkit.typing.SynthesizerPromptFunc[str, K, V, S], typing.Generic[K, V, S]):
27@dataclass(slots=True, frozen=True)
28class concat[K, V, S: Float](SynthesizerPromptFunc[str, K, V, S]):
29    """Concatenates multiple prompt functions into a single prompt."""
30
31    prompts: Sequence[SynthesizerPromptFunc[str, K, V, S] | str]
32    separator: str = "\n\n"
33
34    def __call__(
35        self,
36        casebase: Casebase[K, V],
37        query: V | None,
38        similarities: SimMap[K, S] | None,
39    ) -> str:
40        return self.separator.join(
41            prompt if isinstance(prompt, str) else prompt(casebase, query, similarities)
42            for prompt in self.prompts
43        )

Concatenates multiple prompt functions into a single prompt.

concat( prompts: Sequence[typing.Union[cbrkit.typing.SynthesizerPromptFunc[str, K, V, S], str]], separator: str = '\n\n')
prompts: Sequence[typing.Union[cbrkit.typing.SynthesizerPromptFunc[str, K, V, S], str]]
separator: str
@dataclass(slots=True, frozen=True)
class transpose(cbrkit.typing.SynthesizerPromptFunc[P, K, V1, S], typing.Generic[P, K, V1, V2, S]):
46@dataclass(slots=True, frozen=True)
47class transpose[P, K, V1, V2, S: Float](SynthesizerPromptFunc[P, K, V1, S]):
48    """Converts input values before passing them to a prompt function."""
49
50    prompt_func: SynthesizerPromptFunc[P, K, V2, S]
51    conversion_func: ConversionFunc[V1, V2]
52
53    def __call__(
54        self,
55        casebase: Casebase[K, V1],
56        query: V1 | None,
57        similarities: SimMap[K, S] | None,
58    ) -> P:
59        return self.prompt_func(
60            {key: self.conversion_func(value) for key, value in casebase.items()},
61            self.conversion_func(query) if query is not None else None,
62            similarities,
63        )

Converts input values before passing them to a prompt function.

transpose( prompt_func: cbrkit.typing.SynthesizerPromptFunc[P, K, V2, S], conversion_func: cbrkit.typing.ConversionFunc[V1, V2])
prompt_func: cbrkit.typing.SynthesizerPromptFunc[P, K, V2, S]
conversion_func: cbrkit.typing.ConversionFunc[V1, V2]
def transpose_value( func: cbrkit.typing.SynthesizerPromptFunc[P, K, V, S]) -> cbrkit.typing.SynthesizerPromptFunc[P, K, cbrkit.typing.StructuredValue[V], S]:
66def transpose_value[P, K, V, S: Float](
67    func: SynthesizerPromptFunc[P, K, V, S],
68) -> SynthesizerPromptFunc[P, K, StructuredValue[V], S]:
69    """Wrap a prompt function to extract values from structured value inputs."""
70    return transpose(func, get_value)

Wrap a prompt function to extract values from structured value inputs.

@dataclass(slots=True, frozen=True)
class default(cbrkit.typing.SynthesizerPromptFunc[str, typing.Any, V, Float], typing.Generic[V]):
 85@dataclass(slots=True, frozen=True)
 86class default[V](SynthesizerPromptFunc[str, Any, V, Float]):
 87    """Produces an LLM input which provides context for the LLM to be able to perform instructions.
 88
 89    Args:
 90        instructions: Instructions for the LLM to execute on the input.
 91        encoder: Encoder function to convert the a case or query to a string.
 92        metadata: Optional metadata to include in the prompt.
 93
 94    Returns:
 95        A string to be used as an LLM input.
 96
 97    Examples:
 98        >>> prompt = default("Give me a summary of the found cars.")
 99        >>> result = prompt({"c1": "car A", "c2": "car B"}, "my query", {"c1": 0.9, "c2": 0.5})
100        >>> "my query" in result
101        True
102    """
103
104    instructions: str | SynthesizerPromptFunc[str, Any, V, Float] | None = None
105    encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown)
106    metadata: JsonEntry | None = None
107
108    def __call__(
109        self,
110        casebase: Casebase[Any, V],
111        query: V | None,
112        similarities: SimMap[Any, Float] | None,
113    ) -> str:
114        result = ""
115
116        if isinstance(self.instructions, str):
117            result += self.instructions
118        elif self.instructions is not None:
119            result += self.instructions(casebase, query, similarities)
120
121        if query is not None:
122            result += f"""
123## Query
124
125{encode(query, self.encoder)}
126"""
127
128        result += """
129## Documents Collection
130"""
131
132        ranking = (
133            sim_map2ranking(similarities)
134            if similarities is not None
135            else list(casebase.keys())
136        )
137
138        for rank, key in enumerate(ranking, start=1):
139            if similarities is not None:
140                result += f"""
141### {key} (Rank: {rank}, Similarity: {unpack_float(similarities[key]):.3f})
142"""
143            else:
144                result += f"""
145### {key}
146"""
147
148            result += f"""
149{encode(casebase[key], self.encoder)}
150"""
151
152        if self.metadata is not None:
153            result += f"""
154## Metadata
155
156{encode(self.metadata, self.encoder)}
157"""
158
159        return result

Produces an LLM input which provides context for the LLM to be able to perform instructions.

Arguments:
  • instructions: Instructions for the LLM to execute on the input.
  • encoder: Encoder function to convert the a case or query to a string.
  • metadata: Optional metadata to include in the prompt.
Returns:

A string to be used as an LLM input.

Examples:
>>> prompt = default("Give me a summary of the found cars.")
>>> result = prompt({"c1": "car A", "c2": "car B"}, "my query", {"c1": 0.9, "c2": 0.5})
>>> "my query" in result
True
default( instructions: Union[str, cbrkit.typing.SynthesizerPromptFunc[str, Any, V, Float], NoneType] = None, encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str] = <factory>, metadata: JsonEntry | None = None)
instructions: Union[str, cbrkit.typing.SynthesizerPromptFunc[str, Any, V, Float], NoneType]
encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str]
metadata: JsonEntry | None
@dataclass(slots=True, frozen=True)
class pooling(cbrkit.typing.ConversionFunc[collections.abc.Sequence[V], str], typing.Generic[V]):
162@dataclass(slots=True, frozen=True)
163class pooling[V](ConversionFunc[Sequence[V], str]):
164    """
165    Produces an LLM input to aggregate partial results (i.e., the LLM output for single chunks) to a final, global result.
166
167    Args:
168        instructions: Instructions for the LLM to execute on the input.
169        encoder: Encoder function to convert the a case or query to a string.
170        metadata: Optional metadata to include in the prompt.
171
172    Examples:
173        >>> prompt = pooling("Please find the best match from the following partial results.")
174        >>> result = prompt(["result A", "result B", "result C"])
175        >>> "result A" in result
176        True
177    """
178
179    instructions: str | ConversionFunc[Sequence[V], str] | None = None
180    encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown)
181    metadata: JsonEntry | None = None
182    unpack: bool = True
183
184    def __call__(
185        self,
186        values: Sequence[V],
187    ) -> str:
188        result = ""
189
190        if isinstance(self.instructions, str):
191            result += self.instructions
192        elif self.instructions is not None:
193            result += self.instructions(values)
194
195        result += """
196## Partial Results
197"""
198
199        for idx, value in enumerate(values, start=1):
200            if self.unpack:
201                value = unpack_value(value)
202
203            result += f"""
204### Result {idx}
205
206{encode(value, self.encoder)}
207"""
208
209        if self.metadata is not None:
210            result += f"""
211## Metadata
212
213{encode(self.metadata, self.encoder)}
214"""
215
216        return result

Produces an LLM input to aggregate partial results (i.e., the LLM output for single chunks) to a final, global result.

Arguments:
  • instructions: Instructions for the LLM to execute on the input.
  • encoder: Encoder function to convert the a case or query to a string.
  • metadata: Optional metadata to include in the prompt.
Examples:
>>> prompt = pooling("Please find the best match from the following partial results.")
>>> result = prompt(["result A", "result B", "result C"])
>>> "result A" in result
True
pooling( instructions: Union[str, cbrkit.typing.ConversionFunc[Sequence[V], str], NoneType] = None, encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str] = <factory>, metadata: JsonEntry | None = None, unpack: bool = True)
instructions: Union[str, cbrkit.typing.ConversionFunc[Sequence[V], str], NoneType]
encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str]
metadata: JsonEntry | None
unpack: bool