cbrkit.synthesis.prompts
1from collections.abc import Sequence 2from dataclasses import dataclass, field 3from typing import Any 4 5from ..dumpers import markdown 6from ..helpers import get_value, sim_map2ranking, unpack_float, unpack_value 7from ..typing import ( 8 Casebase, 9 ConversionFunc, 10 Float, 11 JsonEntry, 12 SimMap, 13 StructuredValue, 14 SynthesizerPromptFunc, 15) 16 17__all__ = [ 18 "concat", 19 "transpose", 20 "transpose_value", 21 "default", 22 "pooling", 23] 24 25 26@dataclass(slots=True, frozen=True) 27class concat[K, V, S: Float](SynthesizerPromptFunc[str, K, V, S]): 28 """Concatenates multiple prompt functions into a single prompt.""" 29 30 prompts: Sequence[SynthesizerPromptFunc[str, K, V, S] | str] 31 separator: str = "\n\n" 32 33 def __call__( 34 self, 35 casebase: Casebase[K, V], 36 query: V | None, 37 similarities: SimMap[K, S] | None, 38 ) -> str: 39 return self.separator.join( 40 prompt if isinstance(prompt, str) else prompt(casebase, query, similarities) 41 for prompt in self.prompts 42 ) 43 44 45@dataclass(slots=True, frozen=True) 46class transpose[P, K, V1, V2, S: Float](SynthesizerPromptFunc[P, K, V1, S]): 47 """Converts input values before passing them to a prompt function.""" 48 49 prompt_func: SynthesizerPromptFunc[P, K, V2, S] 50 conversion_func: ConversionFunc[V1, V2] 51 52 def __call__( 53 self, 54 casebase: Casebase[K, V1], 55 query: V1 | None, 56 similarities: SimMap[K, S] | None, 57 ) -> P: 58 return self.prompt_func( 59 {key: self.conversion_func(value) for key, value in casebase.items()}, 60 self.conversion_func(query) if query is not None else None, 61 similarities, 62 ) 63 64 65def transpose_value[P, K, V, S: Float]( 66 func: SynthesizerPromptFunc[P, K, V, S], 67) -> SynthesizerPromptFunc[P, K, StructuredValue[V], S]: 68 """Wrap a prompt function to extract values from structured value inputs.""" 69 return transpose(func, get_value) 70 71 72def encode[T](value: T, encoder: ConversionFunc[T, str]) -> str: 73 """Encode a value to a string, using the encoder for non-primitive types.""" 74 if value is None: 75 return "" 76 elif isinstance(value, str): 77 return value 78 elif isinstance(value, int | float | bool): 79 return str(value) 80 81 return encoder(value) 82 83 84@dataclass(slots=True, frozen=True) 85class default[V](SynthesizerPromptFunc[str, Any, V, Float]): 86 """Produces an LLM input which provides context for the LLM to be able to perform instructions. 87 88 Args: 89 instructions: Instructions for the LLM to execute on the input. 90 encoder: Encoder function to convert the a case or query to a string. 91 metadata: Optional metadata to include in the prompt. 92 93 Returns: 94 A string to be used as an LLM input. 95 96 Examples: 97 >>> prompt = default("Give me a summary of the found cars.") 98 >>> result = prompt({"c1": "car A", "c2": "car B"}, "my query", {"c1": 0.9, "c2": 0.5}) 99 >>> "my query" in result 100 True 101 """ 102 103 instructions: str | SynthesizerPromptFunc[str, Any, V, Float] | None = None 104 encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown) 105 metadata: JsonEntry | None = None 106 107 def __call__( 108 self, 109 casebase: Casebase[Any, V], 110 query: V | None, 111 similarities: SimMap[Any, Float] | None, 112 ) -> str: 113 result = "" 114 115 if isinstance(self.instructions, str): 116 result += self.instructions 117 elif self.instructions is not None: 118 result += self.instructions(casebase, query, similarities) 119 120 if query is not None: 121 result += f""" 122## Query 123 124{encode(query, self.encoder)} 125""" 126 127 result += """ 128## Documents Collection 129""" 130 131 ranking = ( 132 sim_map2ranking(similarities) 133 if similarities is not None 134 else list(casebase.keys()) 135 ) 136 137 for rank, key in enumerate(ranking, start=1): 138 if similarities is not None: 139 result += f""" 140### {key} (Rank: {rank}, Similarity: {unpack_float(similarities[key]):.3f}) 141""" 142 else: 143 result += f""" 144### {key} 145""" 146 147 result += f""" 148{encode(casebase[key], self.encoder)} 149""" 150 151 if self.metadata is not None: 152 result += f""" 153## Metadata 154 155{encode(self.metadata, self.encoder)} 156""" 157 158 return result 159 160 161@dataclass(slots=True, frozen=True) 162class pooling[V](ConversionFunc[Sequence[V], str]): 163 """ 164 Produces an LLM input to aggregate partial results (i.e., the LLM output for single chunks) to a final, global result. 165 166 Args: 167 instructions: Instructions for the LLM to execute on the input. 168 encoder: Encoder function to convert the a case or query to a string. 169 metadata: Optional metadata to include in the prompt. 170 171 Examples: 172 >>> prompt = pooling("Please find the best match from the following partial results.") 173 >>> result = prompt(["result A", "result B", "result C"]) 174 >>> "result A" in result 175 True 176 """ 177 178 instructions: str | ConversionFunc[Sequence[V], str] | None = None 179 encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown) 180 metadata: JsonEntry | None = None 181 unpack: bool = True 182 183 def __call__( 184 self, 185 values: Sequence[V], 186 ) -> str: 187 result = "" 188 189 if isinstance(self.instructions, str): 190 result += self.instructions 191 elif self.instructions is not None: 192 result += self.instructions(values) 193 194 result += """ 195## Partial Results 196""" 197 198 for idx, value in enumerate(values, start=1): 199 if self.unpack: 200 value = unpack_value(value) 201 202 result += f""" 203### Result {idx} 204 205{encode(value, self.encoder)} 206""" 207 208 if self.metadata is not None: 209 result += f""" 210## Metadata 211 212{encode(self.metadata, self.encoder)} 213""" 214 215 return result
@dataclass(slots=True, frozen=True)
class
concat27@dataclass(slots=True, frozen=True) 28class concat[K, V, S: Float](SynthesizerPromptFunc[str, K, V, S]): 29 """Concatenates multiple prompt functions into a single prompt.""" 30 31 prompts: Sequence[SynthesizerPromptFunc[str, K, V, S] | str] 32 separator: str = "\n\n" 33 34 def __call__( 35 self, 36 casebase: Casebase[K, V], 37 query: V | None, 38 similarities: SimMap[K, S] | None, 39 ) -> str: 40 return self.separator.join( 41 prompt if isinstance(prompt, str) else prompt(casebase, query, similarities) 42 for prompt in self.prompts 43 )
Concatenates multiple prompt functions into a single prompt.
concat( prompts: Sequence[typing.Union[cbrkit.typing.SynthesizerPromptFunc[str, K, V, S], str]], separator: str = '\n\n')
prompts: Sequence[typing.Union[cbrkit.typing.SynthesizerPromptFunc[str, K, V, S], str]]
@dataclass(slots=True, frozen=True)
class
transpose46@dataclass(slots=True, frozen=True) 47class transpose[P, K, V1, V2, S: Float](SynthesizerPromptFunc[P, K, V1, S]): 48 """Converts input values before passing them to a prompt function.""" 49 50 prompt_func: SynthesizerPromptFunc[P, K, V2, S] 51 conversion_func: ConversionFunc[V1, V2] 52 53 def __call__( 54 self, 55 casebase: Casebase[K, V1], 56 query: V1 | None, 57 similarities: SimMap[K, S] | None, 58 ) -> P: 59 return self.prompt_func( 60 {key: self.conversion_func(value) for key, value in casebase.items()}, 61 self.conversion_func(query) if query is not None else None, 62 similarities, 63 )
Converts input values before passing them to a prompt function.
transpose( prompt_func: cbrkit.typing.SynthesizerPromptFunc[P, K, V2, S], conversion_func: cbrkit.typing.ConversionFunc[V1, V2])
prompt_func: cbrkit.typing.SynthesizerPromptFunc[P, K, V2, S]
conversion_func: cbrkit.typing.ConversionFunc[V1, V2]
def
transpose_value( func: cbrkit.typing.SynthesizerPromptFunc[P, K, V, S]) -> cbrkit.typing.SynthesizerPromptFunc[P, K, cbrkit.typing.StructuredValue[V], S]:
66def transpose_value[P, K, V, S: Float]( 67 func: SynthesizerPromptFunc[P, K, V, S], 68) -> SynthesizerPromptFunc[P, K, StructuredValue[V], S]: 69 """Wrap a prompt function to extract values from structured value inputs.""" 70 return transpose(func, get_value)
Wrap a prompt function to extract values from structured value inputs.
@dataclass(slots=True, frozen=True)
class
default85@dataclass(slots=True, frozen=True) 86class default[V](SynthesizerPromptFunc[str, Any, V, Float]): 87 """Produces an LLM input which provides context for the LLM to be able to perform instructions. 88 89 Args: 90 instructions: Instructions for the LLM to execute on the input. 91 encoder: Encoder function to convert the a case or query to a string. 92 metadata: Optional metadata to include in the prompt. 93 94 Returns: 95 A string to be used as an LLM input. 96 97 Examples: 98 >>> prompt = default("Give me a summary of the found cars.") 99 >>> result = prompt({"c1": "car A", "c2": "car B"}, "my query", {"c1": 0.9, "c2": 0.5}) 100 >>> "my query" in result 101 True 102 """ 103 104 instructions: str | SynthesizerPromptFunc[str, Any, V, Float] | None = None 105 encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown) 106 metadata: JsonEntry | None = None 107 108 def __call__( 109 self, 110 casebase: Casebase[Any, V], 111 query: V | None, 112 similarities: SimMap[Any, Float] | None, 113 ) -> str: 114 result = "" 115 116 if isinstance(self.instructions, str): 117 result += self.instructions 118 elif self.instructions is not None: 119 result += self.instructions(casebase, query, similarities) 120 121 if query is not None: 122 result += f""" 123## Query 124 125{encode(query, self.encoder)} 126""" 127 128 result += """ 129## Documents Collection 130""" 131 132 ranking = ( 133 sim_map2ranking(similarities) 134 if similarities is not None 135 else list(casebase.keys()) 136 ) 137 138 for rank, key in enumerate(ranking, start=1): 139 if similarities is not None: 140 result += f""" 141### {key} (Rank: {rank}, Similarity: {unpack_float(similarities[key]):.3f}) 142""" 143 else: 144 result += f""" 145### {key} 146""" 147 148 result += f""" 149{encode(casebase[key], self.encoder)} 150""" 151 152 if self.metadata is not None: 153 result += f""" 154## Metadata 155 156{encode(self.metadata, self.encoder)} 157""" 158 159 return result
Produces an LLM input which provides context for the LLM to be able to perform instructions.
Arguments:
- instructions: Instructions for the LLM to execute on the input.
- encoder: Encoder function to convert the a case or query to a string.
- metadata: Optional metadata to include in the prompt.
Returns:
A string to be used as an LLM input.
Examples:
>>> prompt = default("Give me a summary of the found cars.") >>> result = prompt({"c1": "car A", "c2": "car B"}, "my query", {"c1": 0.9, "c2": 0.5}) >>> "my query" in result True
default( instructions: Union[str, cbrkit.typing.SynthesizerPromptFunc[str, Any, V, Float], NoneType] = None, encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str] = <factory>, metadata: JsonEntry | None = None)
instructions: Union[str, cbrkit.typing.SynthesizerPromptFunc[str, Any, V, Float], NoneType]
encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str]
@dataclass(slots=True, frozen=True)
class
pooling162@dataclass(slots=True, frozen=True) 163class pooling[V](ConversionFunc[Sequence[V], str]): 164 """ 165 Produces an LLM input to aggregate partial results (i.e., the LLM output for single chunks) to a final, global result. 166 167 Args: 168 instructions: Instructions for the LLM to execute on the input. 169 encoder: Encoder function to convert the a case or query to a string. 170 metadata: Optional metadata to include in the prompt. 171 172 Examples: 173 >>> prompt = pooling("Please find the best match from the following partial results.") 174 >>> result = prompt(["result A", "result B", "result C"]) 175 >>> "result A" in result 176 True 177 """ 178 179 instructions: str | ConversionFunc[Sequence[V], str] | None = None 180 encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown) 181 metadata: JsonEntry | None = None 182 unpack: bool = True 183 184 def __call__( 185 self, 186 values: Sequence[V], 187 ) -> str: 188 result = "" 189 190 if isinstance(self.instructions, str): 191 result += self.instructions 192 elif self.instructions is not None: 193 result += self.instructions(values) 194 195 result += """ 196## Partial Results 197""" 198 199 for idx, value in enumerate(values, start=1): 200 if self.unpack: 201 value = unpack_value(value) 202 203 result += f""" 204### Result {idx} 205 206{encode(value, self.encoder)} 207""" 208 209 if self.metadata is not None: 210 result += f""" 211## Metadata 212 213{encode(self.metadata, self.encoder)} 214""" 215 216 return result
Produces an LLM input to aggregate partial results (i.e., the LLM output for single chunks) to a final, global result.
Arguments:
- instructions: Instructions for the LLM to execute on the input.
- encoder: Encoder function to convert the a case or query to a string.
- metadata: Optional metadata to include in the prompt.
Examples:
>>> prompt = pooling("Please find the best match from the following partial results.") >>> result = prompt(["result A", "result B", "result C"]) >>> "result A" in result True
pooling( instructions: Union[str, cbrkit.typing.ConversionFunc[Sequence[V], str], NoneType] = None, encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str] = <factory>, metadata: JsonEntry | None = None, unpack: bool = True)
instructions: Union[str, cbrkit.typing.ConversionFunc[Sequence[V], str], NoneType]
encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str]