cbrkit.synthesis.prompts

View Source

  1from collections.abc import Callable, Sequence
  2from dataclasses import dataclass, field
  3from typing import Any
  4
  5from ..dumpers import markdown
  6from ..helpers import get_value, sim_map2ranking, unpack_float, unpack_value
  7from ..typing import (
  8    Casebase,
  9    ConversionFunc,
 10    Float,
 11    JsonEntry,
 12    SimMap,
 13    StructuredValue,
 14    SynthesizerPromptFunc,
 15)
 16from .providers.model import DocumentsPrompt
 17
 18__all__ = [
 19    "concat",
 20    "transpose",
 21    "transpose_value",
 22    "default",
 23    "documents_aware",
 24    "pooling",
 25]
 26
 27
 28@dataclass(slots=True, frozen=True)
 29class concat[K, V, S: Float](SynthesizerPromptFunc[str, K, V, S]):
 30    prompts: Sequence[SynthesizerPromptFunc[str, K, V, S] | str]
 31    separator: str = "\n\n"
 32
 33    def __call__(
 34        self,
 35        casebase: Casebase[K, V],
 36        query: V | None,
 37        similarities: SimMap[K, S] | None,
 38    ) -> str:
 39        return self.separator.join(
 40            prompt if isinstance(prompt, str) else prompt(casebase, query, similarities)
 41            for prompt in self.prompts
 42        )
 43
 44
 45@dataclass(slots=True, frozen=True)
 46class transpose[P, K, V1, V2, S: Float](SynthesizerPromptFunc[P, K, V1, S]):
 47    prompt_func: SynthesizerPromptFunc[P, K, V2, S]
 48    conversion_func: ConversionFunc[V1, V2]
 49
 50    def __call__(
 51        self,
 52        casebase: Casebase[K, V1],
 53        query: V1 | None,
 54        similarities: SimMap[K, S] | None,
 55    ) -> P:
 56        return self.prompt_func(
 57            {key: self.conversion_func(value) for key, value in casebase.items()},
 58            self.conversion_func(query) if query is not None else None,
 59            similarities,
 60        )
 61
 62
 63def transpose_value[P, K, V, S: Float](
 64    func: SynthesizerPromptFunc[P, K, V, S],
 65) -> SynthesizerPromptFunc[P, K, StructuredValue[V], S]:
 66    return transpose(func, get_value)
 67
 68
 69def encode[T](value: T, encoder: ConversionFunc[T, str]) -> str:
 70    if value is None:
 71        return ""
 72    elif isinstance(value, str):
 73        return value
 74    elif isinstance(value, int | float | bool):
 75        return str(value)
 76
 77    return encoder(value)
 78
 79
 80@dataclass(slots=True, frozen=True)
 81class default[V](SynthesizerPromptFunc[str, Any, V, Float]):
 82    """Produces an LLM input which provides context for the LLM to be able to perform instructions.
 83
 84    Args:
 85        instructions: Instructions for the LLM to execute on the input.
 86        encoder: Encoder function to convert the a case or query to a string.
 87        metadata: Optional metadata to include in the prompt.
 88
 89    Returns:
 90        A string to be used as an LLM input.
 91
 92    Examples:
 93        >>> prompt = default("Give me a summary of the found cars.")
 94        >>> prompt(casebase, query, similarities) # doctest: +SKIP
 95    """
 96
 97    instructions: str | SynthesizerPromptFunc[str, Any, V, Float] | None = None
 98    encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown)
 99    metadata: JsonEntry | None = None
100
101    def __call__(
102        self,
103        casebase: Casebase[Any, V],
104        query: V | None,
105        similarities: SimMap[Any, Float] | None,
106    ) -> str:
107        result = ""
108
109        if isinstance(self.instructions, Callable):
110            result += self.instructions(casebase, query, similarities)
111        elif isinstance(self.instructions, str):
112            result += self.instructions
113
114        if query is not None:
115            result += f"""
116## Query
117
118{encode(query, self.encoder)}
119"""
120
121        result += """
122## Documents Collection
123"""
124
125        ranking = (
126            sim_map2ranking(similarities)
127            if similarities is not None
128            else list(casebase.keys())
129        )
130
131        for rank, key in enumerate(ranking, start=1):
132            if similarities is not None:
133                result += f"""
134### {key} (Rank: {rank}, Similarity: {unpack_float(similarities[key]):.3f})
135"""
136            else:
137                result += f"""
138### {key}
139"""
140
141            result += f"""
142{encode(casebase[key], self.encoder)}
143"""
144
145        if self.metadata is not None:
146            result += f"""
147## Metadata
148
149{encode(self.metadata, self.encoder)}
150"""
151
152        return result
153
154
155@dataclass(slots=True, frozen=True)
156class documents_aware[V](SynthesizerPromptFunc[DocumentsPrompt[str], Any, V, Any]):
157    """
158    Produces a structured LLM input (as of now: exclusive for cohere) which provides context for the LLM to be able to perform instructions.
159
160    Args:
161        instructions: Instructions for the LLM to execute on the input.
162        encoder: Encoder function to convert the a case or query to a string.
163        metadata: Optional metadata to include in the prompt.
164
165    Examples:
166        >>> prompt = documents_aware("Give me a summary of the found cars.")
167        >>> prompt(casebase, query, similarities) # doctest: +SKIP
168    """
169
170    instructions: str | SynthesizerPromptFunc[str, Any, V, Float] | None = None
171    encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown)
172    metadata: JsonEntry | None = None
173
174    def __call__(
175        self,
176        casebase: Casebase[Any, V],
177        query: V | None,
178        similarities: SimMap[Any, Float] | None,
179    ) -> DocumentsPrompt:
180        result = ""
181
182        if isinstance(self.instructions, Callable):
183            result += self.instructions(casebase, query, similarities)
184        elif isinstance(self.instructions, str):
185            result += self.instructions
186
187        if query is not None:
188            result += f"""
189## Query
190
191{encode(query, self.encoder)}
192"""
193
194        if self.metadata is not None:
195            result += f"""
196## Metadata
197
198{encode(self.metadata, self.encoder)}
199"""
200
201        ranking = (
202            sim_map2ranking(similarities)
203            if similarities is not None
204            else list(casebase.keys())
205        )
206
207        return DocumentsPrompt(
208            result,
209            {
210                key: {
211                    "text": encode(casebase[key], self.encoder),
212                    "similarity": f"{unpack_float(similarities[key]):.3f}",
213                    "rank": str(rank),
214                }
215                if similarities is not None
216                else {
217                    "text": encode(casebase[key], self.encoder),
218                }
219                for rank, key in enumerate(ranking)
220            },
221        )
222
223
224@dataclass(slots=True, frozen=True)
225class pooling[V](ConversionFunc[Sequence[V], str]):
226    """
227    Produces an LLM input to aggregate partial results (i.e., the LLM output for single chunks) to a final, global result.
228
229    Args:
230        instructions: Instructions for the LLM to execute on the input.
231        encoder: Encoder function to convert the a case or query to a string.
232        metadata: Optional metadata to include in the prompt.
233
234    Examples:
235        >>> prompt = pooling("Please find the best match from the following partial results.")
236        >>> prompt([partial1, partial2, partial3]) # doctest: +SKIP
237    """
238
239    instructions: str | ConversionFunc[Sequence[V], str] | None = None
240    encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown)
241    metadata: JsonEntry | None = None
242    unpack: bool = True
243
244    def __call__(
245        self,
246        values: Sequence[V],
247    ) -> str:
248        result = ""
249
250        if isinstance(self.instructions, Callable):
251            result += self.instructions(values)
252        elif isinstance(self.instructions, str):
253            result += self.instructions
254
255        result += """
256## Partial Results
257"""
258
259        for idx, value in enumerate(values, start=1):
260            if self.unpack:
261                value = unpack_value(value)
262
263            result += f"""
264### Result {idx}
265
266{encode(value, self.encoder)}
267"""
268
269        if self.metadata is not None:
270            result += f"""
271## Metadata
272
273{encode(self.metadata, self.encoder)}
274"""
275
276        return result

@dataclass(slots=True, frozen=True)

class concat(cbrkit.typing.SynthesizerPromptFunc[str, K, V, S], typing.Generic[K, V, S]): View Source

29@dataclass(slots=True, frozen=True)
30class concat[K, V, S: Float](SynthesizerPromptFunc[str, K, V, S]):
31    prompts: Sequence[SynthesizerPromptFunc[str, K, V, S] | str]
32    separator: str = "\n\n"
33
34    def __call__(
35        self,
36        casebase: Casebase[K, V],
37        query: V | None,
38        similarities: SimMap[K, S] | None,
39    ) -> str:
40        return self.separator.join(
41            prompt if isinstance(prompt, str) else prompt(casebase, query, similarities)
42            for prompt in self.prompts
43        )

concat( prompts: Sequence[typing.Union[cbrkit.typing.SynthesizerPromptFunc[str, K, V, S], str]], separator: str = '\n\n')

prompts: Sequence[typing.Union[cbrkit.typing.SynthesizerPromptFunc[str, K, V, S], str]]

separator: str

@dataclass(slots=True, frozen=True)

class transpose(cbrkit.typing.SynthesizerPromptFunc[P, K, V1, S], typing.Generic[P, K, V1, V2, S]): View Source

46@dataclass(slots=True, frozen=True)
47class transpose[P, K, V1, V2, S: Float](SynthesizerPromptFunc[P, K, V1, S]):
48    prompt_func: SynthesizerPromptFunc[P, K, V2, S]
49    conversion_func: ConversionFunc[V1, V2]
50
51    def __call__(
52        self,
53        casebase: Casebase[K, V1],
54        query: V1 | None,
55        similarities: SimMap[K, S] | None,
56    ) -> P:
57        return self.prompt_func(
58            {key: self.conversion_func(value) for key, value in casebase.items()},
59            self.conversion_func(query) if query is not None else None,
60            similarities,
61        )

transpose( prompt_func: cbrkit.typing.SynthesizerPromptFunc[P, K, V2, S], conversion_func: cbrkit.typing.ConversionFunc[V1, V2])

prompt_func: cbrkit.typing.SynthesizerPromptFunc[P, K, V2, S]

conversion_func: cbrkit.typing.ConversionFunc[V1, V2]

def transpose_value( func: cbrkit.typing.SynthesizerPromptFunc[P, K, V, S]) -> cbrkit.typing.SynthesizerPromptFunc[P, K, cbrkit.typing.StructuredValue[V], S]: View Source

64def transpose_value[P, K, V, S: Float](
65    func: SynthesizerPromptFunc[P, K, V, S],
66) -> SynthesizerPromptFunc[P, K, StructuredValue[V], S]:
67    return transpose(func, get_value)

@dataclass(slots=True, frozen=True)

class default(cbrkit.typing.SynthesizerPromptFunc[str, typing.Any, V, Float], typing.Generic[V]): View Source

 81@dataclass(slots=True, frozen=True)
 82class default[V](SynthesizerPromptFunc[str, Any, V, Float]):
 83    """Produces an LLM input which provides context for the LLM to be able to perform instructions.
 84
 85    Args:
 86        instructions: Instructions for the LLM to execute on the input.
 87        encoder: Encoder function to convert the a case or query to a string.
 88        metadata: Optional metadata to include in the prompt.
 89
 90    Returns:
 91        A string to be used as an LLM input.
 92
 93    Examples:
 94        >>> prompt = default("Give me a summary of the found cars.")
 95        >>> prompt(casebase, query, similarities) # doctest: +SKIP
 96    """
 97
 98    instructions: str | SynthesizerPromptFunc[str, Any, V, Float] | None = None
 99    encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown)
100    metadata: JsonEntry | None = None
101
102    def __call__(
103        self,
104        casebase: Casebase[Any, V],
105        query: V | None,
106        similarities: SimMap[Any, Float] | None,
107    ) -> str:
108        result = ""
109
110        if isinstance(self.instructions, Callable):
111            result += self.instructions(casebase, query, similarities)
112        elif isinstance(self.instructions, str):
113            result += self.instructions
114
115        if query is not None:
116            result += f"""
117## Query
118
119{encode(query, self.encoder)}
120"""
121
122        result += """
123## Documents Collection
124"""
125
126        ranking = (
127            sim_map2ranking(similarities)
128            if similarities is not None
129            else list(casebase.keys())
130        )
131
132        for rank, key in enumerate(ranking, start=1):
133            if similarities is not None:
134                result += f"""
135### {key} (Rank: {rank}, Similarity: {unpack_float(similarities[key]):.3f})
136"""
137            else:
138                result += f"""
139### {key}
140"""
141
142            result += f"""
143{encode(casebase[key], self.encoder)}
144"""
145
146        if self.metadata is not None:
147            result += f"""
148## Metadata
149
150{encode(self.metadata, self.encoder)}
151"""
152
153        return result

Produces an LLM input which provides context for the LLM to be able to perform instructions.

Arguments:

instructions: Instructions for the LLM to execute on the input.
encoder: Encoder function to convert the a case or query to a string.
metadata: Optional metadata to include in the prompt.

Returns:

A string to be used as an LLM input.

Examples:

>>> prompt = default("Give me a summary of the found cars.")
>>> prompt(casebase, query, similarities) # doctest: +SKIP

default( instructions: Union[str, cbrkit.typing.SynthesizerPromptFunc[str, Any, V, Float], NoneType] = None, encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str] = <factory>, metadata: JsonEntry | None = None)

instructions: Union[str, cbrkit.typing.SynthesizerPromptFunc[str, Any, V, Float], NoneType]

encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str]

metadata: JsonEntry | None

@dataclass(slots=True, frozen=True)

class documents_aware(cbrkit.typing.SynthesizerPromptFunc[cbrkit.synthesis.providers.model.DocumentsPrompt[str], typing.Any, V, typing.Any], typing.Generic[V]): View Source

156@dataclass(slots=True, frozen=True)
157class documents_aware[V](SynthesizerPromptFunc[DocumentsPrompt[str], Any, V, Any]):
158    """
159    Produces a structured LLM input (as of now: exclusive for cohere) which provides context for the LLM to be able to perform instructions.
160
161    Args:
162        instructions: Instructions for the LLM to execute on the input.
163        encoder: Encoder function to convert the a case or query to a string.
164        metadata: Optional metadata to include in the prompt.
165
166    Examples:
167        >>> prompt = documents_aware("Give me a summary of the found cars.")
168        >>> prompt(casebase, query, similarities) # doctest: +SKIP
169    """
170
171    instructions: str | SynthesizerPromptFunc[str, Any, V, Float] | None = None
172    encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown)
173    metadata: JsonEntry | None = None
174
175    def __call__(
176        self,
177        casebase: Casebase[Any, V],
178        query: V | None,
179        similarities: SimMap[Any, Float] | None,
180    ) -> DocumentsPrompt:
181        result = ""
182
183        if isinstance(self.instructions, Callable):
184            result += self.instructions(casebase, query, similarities)
185        elif isinstance(self.instructions, str):
186            result += self.instructions
187
188        if query is not None:
189            result += f"""
190## Query
191
192{encode(query, self.encoder)}
193"""
194
195        if self.metadata is not None:
196            result += f"""
197## Metadata
198
199{encode(self.metadata, self.encoder)}
200"""
201
202        ranking = (
203            sim_map2ranking(similarities)
204            if similarities is not None
205            else list(casebase.keys())
206        )
207
208        return DocumentsPrompt(
209            result,
210            {
211                key: {
212                    "text": encode(casebase[key], self.encoder),
213                    "similarity": f"{unpack_float(similarities[key]):.3f}",
214                    "rank": str(rank),
215                }
216                if similarities is not None
217                else {
218                    "text": encode(casebase[key], self.encoder),
219                }
220                for rank, key in enumerate(ranking)
221            },
222        )

Produces a structured LLM input (as of now: exclusive for cohere) which provides context for the LLM to be able to perform instructions.

Arguments:

instructions: Instructions for the LLM to execute on the input.
encoder: Encoder function to convert the a case or query to a string.
metadata: Optional metadata to include in the prompt.

Examples:

>>> prompt = documents_aware("Give me a summary of the found cars.")
>>> prompt(casebase, query, similarities) # doctest: +SKIP

documents_aware( instructions: Union[str, cbrkit.typing.SynthesizerPromptFunc[str, Any, V, Float], NoneType] = None, encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str] = <factory>, metadata: JsonEntry | None = None)

instructions: Union[str, cbrkit.typing.SynthesizerPromptFunc[str, Any, V, Float], NoneType]

encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str]

metadata: JsonEntry | None

@dataclass(slots=True, frozen=True)

class pooling(cbrkit.typing.ConversionFunc[collections.abc.Sequence[V], str], typing.Generic[V]): View Source

225@dataclass(slots=True, frozen=True)
226class pooling[V](ConversionFunc[Sequence[V], str]):
227    """
228    Produces an LLM input to aggregate partial results (i.e., the LLM output for single chunks) to a final, global result.
229
230    Args:
231        instructions: Instructions for the LLM to execute on the input.
232        encoder: Encoder function to convert the a case or query to a string.
233        metadata: Optional metadata to include in the prompt.
234
235    Examples:
236        >>> prompt = pooling("Please find the best match from the following partial results.")
237        >>> prompt([partial1, partial2, partial3]) # doctest: +SKIP
238    """
239
240    instructions: str | ConversionFunc[Sequence[V], str] | None = None
241    encoder: ConversionFunc[V | JsonEntry, str] = field(default_factory=markdown)
242    metadata: JsonEntry | None = None
243    unpack: bool = True
244
245    def __call__(
246        self,
247        values: Sequence[V],
248    ) -> str:
249        result = ""
250
251        if isinstance(self.instructions, Callable):
252            result += self.instructions(values)
253        elif isinstance(self.instructions, str):
254            result += self.instructions
255
256        result += """
257## Partial Results
258"""
259
260        for idx, value in enumerate(values, start=1):
261            if self.unpack:
262                value = unpack_value(value)
263
264            result += f"""
265### Result {idx}
266
267{encode(value, self.encoder)}
268"""
269
270        if self.metadata is not None:
271            result += f"""
272## Metadata
273
274{encode(self.metadata, self.encoder)}
275"""
276
277        return result

Produces an LLM input to aggregate partial results (i.e., the LLM output for single chunks) to a final, global result.

Arguments:

instructions: Instructions for the LLM to execute on the input.
encoder: Encoder function to convert the a case or query to a string.
metadata: Optional metadata to include in the prompt.

Examples:

>>> prompt = pooling("Please find the best match from the following partial results.")
>>> prompt([partial1, partial2, partial3]) # doctest: +SKIP

pooling( instructions: Union[str, cbrkit.typing.ConversionFunc[Sequence[V], str], NoneType] = None, encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str] = <factory>, metadata: JsonEntry | None = None, unpack: bool = True)

instructions: Union[str, cbrkit.typing.ConversionFunc[Sequence[V], str], NoneType]

encoder: cbrkit.typing.ConversionFunc[typing.Union[V, JsonEntry], str]

metadata: JsonEntry | None

unpack: bool