cbrkit.adapt.strings
1import re 2import sys 3from collections.abc import Callable 4from dataclasses import dataclass 5from typing import override 6 7from ..helpers import HasMetadata, get_metadata 8from ..typing import AdaptationFunc, JsonDict 9 10__all__ = [ 11 "regex", 12] 13 14 15@dataclass(slots=True) 16class regex(AdaptationFunc[str], HasMetadata): 17 """Replace parts of a string using regular expressions. 18 19 Args: 20 case_pattern: Regular expression pattern to match against the case. 21 query_pattern: Regular expression pattern to match against the query. 22 replacement: Replacement string or function that takes two match objects. 23 count: Maximum number of replacements to make. 24 pos: Position in the string to start searching. 25 endpos: Position in the string to stop searching. 26 27 Returns: 28 The modified string. 29 30 Examples: 31 >>> func = regex("25", "30", "NUMBER") 32 >>> func("Alice is 25 years old.", "Peter is 30 years old.") 33 'Alice is NUMBER years old.' 34 """ 35 36 case_pattern: re.Pattern[str] 37 query_pattern: re.Pattern[str] 38 replacement: Callable[[re.Match[str], re.Match[str]], str] 39 count: int 40 pos: int 41 endpos: int 42 _metadata: JsonDict 43 44 def __init__( 45 self, 46 case_pattern: str | re.Pattern[str], 47 query_pattern: str | re.Pattern[str], 48 replacement: str | Callable[[re.Match[str], re.Match[str]], str], 49 count: int = 0, 50 pos: int = 0, 51 endpos: int = sys.maxsize, 52 ): 53 if isinstance(query_pattern, str): 54 self.query_pattern = re.compile(query_pattern) 55 else: 56 self.query_pattern = query_pattern 57 58 if isinstance(case_pattern, str): 59 self.case_pattern = re.compile(case_pattern) 60 else: 61 self.case_pattern = case_pattern 62 63 if isinstance(replacement, str): 64 self.replacement = lambda case_match, query_match: replacement 65 else: 66 self.replacement = replacement 67 68 self.count = count 69 self.pos = pos 70 self.endpos = endpos 71 72 self._metadata = { 73 "query_pattern": str(query_pattern), 74 "case_pattern": str(case_pattern), 75 "replacement": replacement 76 if isinstance(replacement, str) 77 else get_metadata(replacement), 78 "count": count, 79 "pos": pos, 80 "endpos": endpos, 81 } 82 83 @property 84 @override 85 def metadata(self) -> JsonDict: 86 return self._metadata 87 88 @override 89 def __call__(self, case: str, query: str) -> str: 90 query_match = self.query_pattern.search( 91 query, 92 self.pos, 93 self.endpos, 94 ) 95 96 if query_match is not None: 97 return self.case_pattern.sub( 98 lambda case_match: self.replacement(case_match, query_match), 99 case, 100 self.count, 101 ) 102 103 return case
16@dataclass(slots=True) 17class regex(AdaptationFunc[str], HasMetadata): 18 """Replace parts of a string using regular expressions. 19 20 Args: 21 case_pattern: Regular expression pattern to match against the case. 22 query_pattern: Regular expression pattern to match against the query. 23 replacement: Replacement string or function that takes two match objects. 24 count: Maximum number of replacements to make. 25 pos: Position in the string to start searching. 26 endpos: Position in the string to stop searching. 27 28 Returns: 29 The modified string. 30 31 Examples: 32 >>> func = regex("25", "30", "NUMBER") 33 >>> func("Alice is 25 years old.", "Peter is 30 years old.") 34 'Alice is NUMBER years old.' 35 """ 36 37 case_pattern: re.Pattern[str] 38 query_pattern: re.Pattern[str] 39 replacement: Callable[[re.Match[str], re.Match[str]], str] 40 count: int 41 pos: int 42 endpos: int 43 _metadata: JsonDict 44 45 def __init__( 46 self, 47 case_pattern: str | re.Pattern[str], 48 query_pattern: str | re.Pattern[str], 49 replacement: str | Callable[[re.Match[str], re.Match[str]], str], 50 count: int = 0, 51 pos: int = 0, 52 endpos: int = sys.maxsize, 53 ): 54 if isinstance(query_pattern, str): 55 self.query_pattern = re.compile(query_pattern) 56 else: 57 self.query_pattern = query_pattern 58 59 if isinstance(case_pattern, str): 60 self.case_pattern = re.compile(case_pattern) 61 else: 62 self.case_pattern = case_pattern 63 64 if isinstance(replacement, str): 65 self.replacement = lambda case_match, query_match: replacement 66 else: 67 self.replacement = replacement 68 69 self.count = count 70 self.pos = pos 71 self.endpos = endpos 72 73 self._metadata = { 74 "query_pattern": str(query_pattern), 75 "case_pattern": str(case_pattern), 76 "replacement": replacement 77 if isinstance(replacement, str) 78 else get_metadata(replacement), 79 "count": count, 80 "pos": pos, 81 "endpos": endpos, 82 } 83 84 @property 85 @override 86 def metadata(self) -> JsonDict: 87 return self._metadata 88 89 @override 90 def __call__(self, case: str, query: str) -> str: 91 query_match = self.query_pattern.search( 92 query, 93 self.pos, 94 self.endpos, 95 ) 96 97 if query_match is not None: 98 return self.case_pattern.sub( 99 lambda case_match: self.replacement(case_match, query_match), 100 case, 101 self.count, 102 ) 103 104 return case
Replace parts of a string using regular expressions.
Arguments:
- case_pattern: Regular expression pattern to match against the case.
- query_pattern: Regular expression pattern to match against the query.
- replacement: Replacement string or function that takes two match objects.
- count: Maximum number of replacements to make.
- pos: Position in the string to start searching.
- endpos: Position in the string to stop searching.
Returns:
The modified string.
Examples:
>>> func = regex("25", "30", "NUMBER") >>> func("Alice is 25 years old.", "Peter is 30 years old.") 'Alice is NUMBER years old.'
regex( case_pattern: str | re.Pattern[str], query_pattern: str | re.Pattern[str], replacement: str | Callable[[re.Match[str], re.Match[str]], str], count: int = 0, pos: int = 0, endpos: int = 9223372036854775807)
45 def __init__( 46 self, 47 case_pattern: str | re.Pattern[str], 48 query_pattern: str | re.Pattern[str], 49 replacement: str | Callable[[re.Match[str], re.Match[str]], str], 50 count: int = 0, 51 pos: int = 0, 52 endpos: int = sys.maxsize, 53 ): 54 if isinstance(query_pattern, str): 55 self.query_pattern = re.compile(query_pattern) 56 else: 57 self.query_pattern = query_pattern 58 59 if isinstance(case_pattern, str): 60 self.case_pattern = re.compile(case_pattern) 61 else: 62 self.case_pattern = case_pattern 63 64 if isinstance(replacement, str): 65 self.replacement = lambda case_match, query_match: replacement 66 else: 67 self.replacement = replacement 68 69 self.count = count 70 self.pos = pos 71 self.endpos = endpos 72 73 self._metadata = { 74 "query_pattern": str(query_pattern), 75 "case_pattern": str(case_pattern), 76 "replacement": replacement 77 if isinstance(replacement, str) 78 else get_metadata(replacement), 79 "count": count, 80 "pos": pos, 81 "endpos": endpos, 82 }