cbrkit.synthesis.prompts
1from collections.abc import Callable, Sequence 2from dataclasses import dataclass, field 3from typing import Any 4 5from ..dumpers import markdown 6from ..helpers import get_value, sim_map2ranking, unpack_float, unpack_value 7from ..typing import ( 8 Casebase, 9 ConversionFunc, 10 Float, 11 JsonEntry, 12 SimMap, 13 StructuredValue, 14 SynthesizerPromptFunc, 15) 16from .providers.model import DocumentsPrompt 17 18__all__ = [ 19 "concat", 20 "transpose", 21 "transpose_value", 22 "default", 23 "documents_aware", 24 "pooling", 25] 26 27 28@dataclass(slots=True, frozen=True) 29class concat[K, V, S: Float](SynthesizerPromptFunc[str, K, V, S]): 30 prompts: Sequence[SynthesizerPromptFunc[str, K, V, S] | str] 31 separator: str = "\n\n" 32 33 def __call__( 34 self, 35 casebase: Casebase[K, V], 36 query: V | None, 37 similarities: SimMap[K, S] | None, 38 ) -> str: 39 return self.separator.join( 40 prompt if isinstance(prompt, str) else prompt(casebase, query, similarities) 41 for prompt in self.prompts 42 ) 43 44 45@dataclass(slots=True, frozen=True) 46class transpose[P, K, V1, V2, S: Float](SynthesizerPromptFunc[P, K, V1, S]): 47 prompt_func: SynthesizerPromptFunc[P, K, V2, S] 48 conversion_func: ConversionFunc[V1, V2] 49 50 def __call__( 51 self, 52 casebase: Casebase[K, V1], 53 query: V1 | None, 54 similarities: SimMap[K, S] | None, 55 ) -> P: 56 return self.prompt_func( 57 {key: self.conversion_func(value) for key, value in casebase.items()}, 58 self.conversion_func(query) if query is not None else None, 59 similarities, 60 ) 61 62 63def transpose_value[P, K, V, S: Float]( 64 func: SynthesizerPromptFunc[P, K, V, S], 65) -> SynthesizerPromptFunc[P, K, StructuredValue[V], S]: 66 return transpose(func, get_value) 67 68 69def encode[T](value: T, encoder: ConversionFunc[T, str]) -> str: 70 if value is None: 71 return "" 72 elif isinstance(value, str): 73 return value 74 elif isinstance(value, int | float | bool): 75 return str(value) 76 77 return encoder(value) 78 79 80@dataclass(slots=True, frozen=True) 81class default[V](SynthesizerPromptFunc[str, Any, V, Float]): 82 """Produces an LLM input which provides context for the LLM to be able to perform instructions. 83 84 Args: 85 instructions: Instructions for the LLM to execute on the input. 86 encoder: Encoder function to convert the a case or query to a string. 87 metadata: Optional metadata to include in the prompt. 88 89 Returns: 90 A string to be used as an LLM input. 91 92 Examples: 93 >>> prompt = default("Give me a summary of the found cars.") 94 >>> prompt(casebase, query, similarities) # doctest: +SKIP 95 """ 96 97 instructions: str | SynthesizerPromptFunc[str, Any, V, Float] | None = None 98 encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown) 99 metadata: JsonEntry | None = None 100 101 def __call__( 102 self, 103 casebase: Casebase[Any, V], 104 query: V | None, 105 similarities: SimMap[Any, Float] | None, 106 ) -> str: 107 result = "" 108 109 if isinstance(self.instructions, Callable): 110 result += self.instructions(casebase, query, similarities) 111 elif isinstance(self.instructions, str): 112 result += self.instructions 113 114 if query is not None: 115 result += f""" 116## Query 117 118{encode(query, self.encoder)} 119""" 120 121 result += """ 122## Documents Collection 123""" 124 125 ranking = ( 126 sim_map2ranking(similarities) 127 if similarities is not None 128 else list(casebase.keys()) 129 ) 130 131 for rank, key in enumerate(ranking, start=1): 132 if similarities is not None: 133 result += f""" 134### {key} (Rank: {rank}, Similarity: {unpack_float(similarities[key]):.3f}) 135""" 136 else: 137 result += f""" 138### {key} 139""" 140 141 result += f""" 142{encode(casebase[key], self.encoder)} 143""" 144 145 if self.metadata is not None: 146 result += f""" 147## Metadata 148 149{encode(self.metadata, self.encoder)} 150""" 151 152 return result 153 154 155@dataclass(slots=True, frozen=True) 156class documents_aware[V](SynthesizerPromptFunc[DocumentsPrompt[str], Any, V, Any]): 157 """ 158 Produces a structured LLM input (as of now: exclusive for cohere) which provides context for the LLM to be able to perform instructions. 159 160 Args: 161 instructions: Instructions for the LLM to execute on the input. 162 encoder: Encoder function to convert the a case or query to a string. 163 metadata: Optional metadata to include in the prompt. 164 165 Examples: 166 >>> prompt = documents_aware("Give me a summary of the found cars.") 167 >>> prompt(casebase, query, similarities) # doctest: +SKIP 168 """ 169 170 instructions: str | SynthesizerPromptFunc[str, Any, V, Float] | None = None 171 encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown) 172 metadata: JsonEntry | None = None 173 174 def __call__( 175 self, 176 casebase: Casebase[Any, V], 177 query: V | None, 178 similarities: SimMap[Any, Float] | None, 179 ) -> DocumentsPrompt: 180 result = "" 181 182 if isinstance(self.instructions, Callable): 183 result += self.instructions(casebase, query, similarities) 184 elif isinstance(self.instructions, str): 185 result += self.instructions 186 187 if query is not None: 188 result += f""" 189## Query 190 191{encode(query, self.encoder)} 192""" 193 194 if self.metadata is not None: 195 result += f""" 196## Metadata 197 198{encode(self.metadata, self.encoder)} 199""" 200 201 ranking = ( 202 sim_map2ranking(similarities) 203 if similarities is not None 204 else list(casebase.keys()) 205 ) 206 207 return DocumentsPrompt( 208 result, 209 { 210 key: { 211 "text": encode(casebase[key], self.encoder), 212 "similarity": f"{unpack_float(similarities[key]):.3f}", 213 "rank": str(rank), 214 } 215 if similarities is not None 216 else { 217 "text": encode(casebase[key], self.encoder), 218 } 219 for rank, key in enumerate(ranking) 220 }, 221 ) 222 223 224@dataclass(slots=True, frozen=True) 225class pooling[V](ConversionFunc[Sequence[V], str]): 226 """ 227 Produces an LLM input to aggregate partial results (i.e., the LLM output for single chunks) to a final, global result. 228 229 Args: 230 instructions: Instructions for the LLM to execute on the input. 231 encoder: Encoder function to convert the a case or query to a string. 232 metadata: Optional metadata to include in the prompt. 233 234 Examples: 235 >>> prompt = pooling("Please find the best match from the following partial results.") 236 >>> prompt([partial1, partial2, partial3]) # doctest: +SKIP 237 """ 238 239 instructions: str | ConversionFunc[Sequence[V], str] | None = None 240 encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown) 241 metadata: JsonEntry | None = None 242 unpack: bool = True 243 244 def __call__( 245 self, 246 values: Sequence[V], 247 ) -> str: 248 result = "" 249 250 if isinstance(self.instructions, Callable): 251 result += self.instructions(values) 252 elif isinstance(self.instructions, str): 253 result += self.instructions 254 255 result += """ 256## Partial Results 257""" 258 259 for idx, value in enumerate(values, start=1): 260 if self.unpack: 261 value = unpack_value(value) 262 263 result += f""" 264### Result {idx} 265 266{encode(value, self.encoder)} 267""" 268 269 if self.metadata is not None: 270 result += f""" 271## Metadata 272 273{encode(self.metadata, self.encoder)} 274""" 275 276 return result
@dataclass(slots=True, frozen=True)
class
concat29@dataclass(slots=True, frozen=True) 30class concat[K, V, S: Float](SynthesizerPromptFunc[str, K, V, S]): 31 prompts: Sequence[SynthesizerPromptFunc[str, K, V, S] | str] 32 separator: str = "\n\n" 33 34 def __call__( 35 self, 36 casebase: Casebase[K, V], 37 query: V | None, 38 similarities: SimMap[K, S] | None, 39 ) -> str: 40 return self.separator.join( 41 prompt if isinstance(prompt, str) else prompt(casebase, query, similarities) 42 for prompt in self.prompts 43 )
concat( prompts: Sequence[typing.Union[cbrkit.typing.SynthesizerPromptFunc[str, K, V, S], str]], separator: str = '\n\n')
prompts: Sequence[typing.Union[cbrkit.typing.SynthesizerPromptFunc[str, K, V, S], str]]
@dataclass(slots=True, frozen=True)
class
transpose46@dataclass(slots=True, frozen=True) 47class transpose[P, K, V1, V2, S: Float](SynthesizerPromptFunc[P, K, V1, S]): 48 prompt_func: SynthesizerPromptFunc[P, K, V2, S] 49 conversion_func: ConversionFunc[V1, V2] 50 51 def __call__( 52 self, 53 casebase: Casebase[K, V1], 54 query: V1 | None, 55 similarities: SimMap[K, S] | None, 56 ) -> P: 57 return self.prompt_func( 58 {key: self.conversion_func(value) for key, value in casebase.items()}, 59 self.conversion_func(query) if query is not None else None, 60 similarities, 61 )
transpose( prompt_func: cbrkit.typing.SynthesizerPromptFunc[P, K, V2, S], conversion_func: cbrkit.typing.ConversionFunc[V1, V2])
prompt_func: cbrkit.typing.SynthesizerPromptFunc[P, K, V2, S]
conversion_func: cbrkit.typing.ConversionFunc[V1, V2]
def
transpose_value( func: cbrkit.typing.SynthesizerPromptFunc[P, K, V, S]) -> cbrkit.typing.SynthesizerPromptFunc[P, K, cbrkit.typing.StructuredValue[V], S]:
@dataclass(slots=True, frozen=True)
class
default81@dataclass(slots=True, frozen=True) 82class default[V](SynthesizerPromptFunc[str, Any, V, Float]): 83 """Produces an LLM input which provides context for the LLM to be able to perform instructions. 84 85 Args: 86 instructions: Instructions for the LLM to execute on the input. 87 encoder: Encoder function to convert the a case or query to a string. 88 metadata: Optional metadata to include in the prompt. 89 90 Returns: 91 A string to be used as an LLM input. 92 93 Examples: 94 >>> prompt = default("Give me a summary of the found cars.") 95 >>> prompt(casebase, query, similarities) # doctest: +SKIP 96 """ 97 98 instructions: str | SynthesizerPromptFunc[str, Any, V, Float] | None = None 99 encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown) 100 metadata: JsonEntry | None = None 101 102 def __call__( 103 self, 104 casebase: Casebase[Any, V], 105 query: V | None, 106 similarities: SimMap[Any, Float] | None, 107 ) -> str: 108 result = "" 109 110 if isinstance(self.instructions, Callable): 111 result += self.instructions(casebase, query, similarities) 112 elif isinstance(self.instructions, str): 113 result += self.instructions 114 115 if query is not None: 116 result += f""" 117## Query 118 119{encode(query, self.encoder)} 120""" 121 122 result += """ 123## Documents Collection 124""" 125 126 ranking = ( 127 sim_map2ranking(similarities) 128 if similarities is not None 129 else list(casebase.keys()) 130 ) 131 132 for rank, key in enumerate(ranking, start=1): 133 if similarities is not None: 134 result += f""" 135### {key} (Rank: {rank}, Similarity: {unpack_float(similarities[key]):.3f}) 136""" 137 else: 138 result += f""" 139### {key} 140""" 141 142 result += f""" 143{encode(casebase[key], self.encoder)} 144""" 145 146 if self.metadata is not None: 147 result += f""" 148## Metadata 149 150{encode(self.metadata, self.encoder)} 151""" 152 153 return result
Produces an LLM input which provides context for the LLM to be able to perform instructions.
Arguments:
- instructions: Instructions for the LLM to execute on the input.
- encoder: Encoder function to convert the a case or query to a string.
- metadata: Optional metadata to include in the prompt.
Returns:
A string to be used as an LLM input.
Examples:
>>> prompt = default("Give me a summary of the found cars.") >>> prompt(casebase, query, similarities) # doctest: +SKIP
default( instructions: Union[str, cbrkit.typing.SynthesizerPromptFunc[str, Any, V, Float], NoneType] = None, encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str] = <factory>, metadata: JsonEntry | None = None)
instructions: Union[str, cbrkit.typing.SynthesizerPromptFunc[str, Any, V, Float], NoneType]
encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str]
@dataclass(slots=True, frozen=True)
class
documents_aware156@dataclass(slots=True, frozen=True) 157class documents_aware[V](SynthesizerPromptFunc[DocumentsPrompt[str], Any, V, Any]): 158 """ 159 Produces a structured LLM input (as of now: exclusive for cohere) which provides context for the LLM to be able to perform instructions. 160 161 Args: 162 instructions: Instructions for the LLM to execute on the input. 163 encoder: Encoder function to convert the a case or query to a string. 164 metadata: Optional metadata to include in the prompt. 165 166 Examples: 167 >>> prompt = documents_aware("Give me a summary of the found cars.") 168 >>> prompt(casebase, query, similarities) # doctest: +SKIP 169 """ 170 171 instructions: str | SynthesizerPromptFunc[str, Any, V, Float] | None = None 172 encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown) 173 metadata: JsonEntry | None = None 174 175 def __call__( 176 self, 177 casebase: Casebase[Any, V], 178 query: V | None, 179 similarities: SimMap[Any, Float] | None, 180 ) -> DocumentsPrompt: 181 result = "" 182 183 if isinstance(self.instructions, Callable): 184 result += self.instructions(casebase, query, similarities) 185 elif isinstance(self.instructions, str): 186 result += self.instructions 187 188 if query is not None: 189 result += f""" 190## Query 191 192{encode(query, self.encoder)} 193""" 194 195 if self.metadata is not None: 196 result += f""" 197## Metadata 198 199{encode(self.metadata, self.encoder)} 200""" 201 202 ranking = ( 203 sim_map2ranking(similarities) 204 if similarities is not None 205 else list(casebase.keys()) 206 ) 207 208 return DocumentsPrompt( 209 result, 210 { 211 key: { 212 "text": encode(casebase[key], self.encoder), 213 "similarity": f"{unpack_float(similarities[key]):.3f}", 214 "rank": str(rank), 215 } 216 if similarities is not None 217 else { 218 "text": encode(casebase[key], self.encoder), 219 } 220 for rank, key in enumerate(ranking) 221 }, 222 )
Produces a structured LLM input (as of now: exclusive for cohere) which provides context for the LLM to be able to perform instructions.
Arguments:
- instructions: Instructions for the LLM to execute on the input.
- encoder: Encoder function to convert the a case or query to a string.
- metadata: Optional metadata to include in the prompt.
Examples:
>>> prompt = documents_aware("Give me a summary of the found cars.") >>> prompt(casebase, query, similarities) # doctest: +SKIP
documents_aware( instructions: Union[str, cbrkit.typing.SynthesizerPromptFunc[str, Any, V, Float], NoneType] = None, encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str] = <factory>, metadata: JsonEntry | None = None)
instructions: Union[str, cbrkit.typing.SynthesizerPromptFunc[str, Any, V, Float], NoneType]
encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str]
@dataclass(slots=True, frozen=True)
class
pooling225@dataclass(slots=True, frozen=True) 226class pooling[V](ConversionFunc[Sequence[V], str]): 227 """ 228 Produces an LLM input to aggregate partial results (i.e., the LLM output for single chunks) to a final, global result. 229 230 Args: 231 instructions: Instructions for the LLM to execute on the input. 232 encoder: Encoder function to convert the a case or query to a string. 233 metadata: Optional metadata to include in the prompt. 234 235 Examples: 236 >>> prompt = pooling("Please find the best match from the following partial results.") 237 >>> prompt([partial1, partial2, partial3]) # doctest: +SKIP 238 """ 239 240 instructions: str | ConversionFunc[Sequence[V], str] | None = None 241 encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown) 242 metadata: JsonEntry | None = None 243 unpack: bool = True 244 245 def __call__( 246 self, 247 values: Sequence[V], 248 ) -> str: 249 result = "" 250 251 if isinstance(self.instructions, Callable): 252 result += self.instructions(values) 253 elif isinstance(self.instructions, str): 254 result += self.instructions 255 256 result += """ 257## Partial Results 258""" 259 260 for idx, value in enumerate(values, start=1): 261 if self.unpack: 262 value = unpack_value(value) 263 264 result += f""" 265### Result {idx} 266 267{encode(value, self.encoder)} 268""" 269 270 if self.metadata is not None: 271 result += f""" 272## Metadata 273 274{encode(self.metadata, self.encoder)} 275""" 276 277 return result
Produces an LLM input to aggregate partial results (i.e., the LLM output for single chunks) to a final, global result.
Arguments:
- instructions: Instructions for the LLM to execute on the input.
- encoder: Encoder function to convert the a case or query to a string.
- metadata: Optional metadata to include in the prompt.
Examples:
>>> prompt = pooling("Please find the best match from the following partial results.") >>> prompt([partial1, partial2, partial3]) # doctest: +SKIP
pooling( instructions: Union[str, cbrkit.typing.ConversionFunc[Sequence[V], str], NoneType] = None, encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str] = <factory>, metadata: JsonEntry | None = None, unpack: bool = True)
instructions: Union[str, cbrkit.typing.ConversionFunc[Sequence[V], str], NoneType]
encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str]