cbrkit.adapt.strings

  1import re
  2import sys
  3from collections.abc import Callable
  4from dataclasses import dataclass
  5from typing import override
  6
  7from ..helpers import HasMetadata, get_metadata
  8from ..typing import AdaptationFunc, JsonDict
  9
 10__all__ = [
 11    "regex",
 12]
 13
 14
 15@dataclass(slots=True)
 16class regex(AdaptationFunc[str], HasMetadata):
 17    """Replace parts of a string using regular expressions.
 18
 19    Args:
 20        case_pattern: Regular expression pattern to match against the case.
 21        query_pattern: Regular expression pattern to match against the query.
 22        replacement: Replacement string or function that takes two match objects.
 23        count: Maximum number of replacements to make.
 24        pos: Position in the string to start searching.
 25        endpos: Position in the string to stop searching.
 26
 27    Returns:
 28        The modified string.
 29
 30    Examples:
 31        >>> func = regex("25", "30", "NUMBER")
 32        >>> func("Alice is 25 years old.", "Peter is 30 years old.")
 33        'Alice is NUMBER years old.'
 34    """
 35
 36    case_pattern: re.Pattern[str]
 37    query_pattern: re.Pattern[str]
 38    replacement: Callable[[re.Match[str], re.Match[str]], str]
 39    count: int
 40    pos: int
 41    endpos: int
 42    _metadata: JsonDict
 43
 44    def __init__(
 45        self,
 46        case_pattern: str | re.Pattern[str],
 47        query_pattern: str | re.Pattern[str],
 48        replacement: str | Callable[[re.Match[str], re.Match[str]], str],
 49        count: int = 0,
 50        pos: int = 0,
 51        endpos: int = sys.maxsize,
 52    ):
 53        if isinstance(query_pattern, str):
 54            self.query_pattern = re.compile(query_pattern)
 55        else:
 56            self.query_pattern = query_pattern
 57
 58        if isinstance(case_pattern, str):
 59            self.case_pattern = re.compile(case_pattern)
 60        else:
 61            self.case_pattern = case_pattern
 62
 63        if isinstance(replacement, str):
 64            self.replacement = lambda case_match, query_match: replacement
 65        else:
 66            self.replacement = replacement
 67
 68        self.count = count
 69        self.pos = pos
 70        self.endpos = endpos
 71
 72        self._metadata = {
 73            "query_pattern": str(query_pattern),
 74            "case_pattern": str(case_pattern),
 75            "replacement": replacement
 76            if isinstance(replacement, str)
 77            else get_metadata(replacement),
 78            "count": count,
 79            "pos": pos,
 80            "endpos": endpos,
 81        }
 82
 83    @property
 84    @override
 85    def metadata(self) -> JsonDict:
 86        return self._metadata
 87
 88    @override
 89    def __call__(self, case: str, query: str) -> str:
 90        query_match = self.query_pattern.search(
 91            query,
 92            self.pos,
 93            self.endpos,
 94        )
 95
 96        if query_match is not None:
 97            return self.case_pattern.sub(
 98                lambda case_match: self.replacement(case_match, query_match),
 99                case,
100                self.count,
101            )
102
103        return case
@dataclass(slots=True)
class regex(cbrkit.typing.AdaptationFunc[str], cbrkit.typing.HasMetadata):
 16@dataclass(slots=True)
 17class regex(AdaptationFunc[str], HasMetadata):
 18    """Replace parts of a string using regular expressions.
 19
 20    Args:
 21        case_pattern: Regular expression pattern to match against the case.
 22        query_pattern: Regular expression pattern to match against the query.
 23        replacement: Replacement string or function that takes two match objects.
 24        count: Maximum number of replacements to make.
 25        pos: Position in the string to start searching.
 26        endpos: Position in the string to stop searching.
 27
 28    Returns:
 29        The modified string.
 30
 31    Examples:
 32        >>> func = regex("25", "30", "NUMBER")
 33        >>> func("Alice is 25 years old.", "Peter is 30 years old.")
 34        'Alice is NUMBER years old.'
 35    """
 36
 37    case_pattern: re.Pattern[str]
 38    query_pattern: re.Pattern[str]
 39    replacement: Callable[[re.Match[str], re.Match[str]], str]
 40    count: int
 41    pos: int
 42    endpos: int
 43    _metadata: JsonDict
 44
 45    def __init__(
 46        self,
 47        case_pattern: str | re.Pattern[str],
 48        query_pattern: str | re.Pattern[str],
 49        replacement: str | Callable[[re.Match[str], re.Match[str]], str],
 50        count: int = 0,
 51        pos: int = 0,
 52        endpos: int = sys.maxsize,
 53    ):
 54        if isinstance(query_pattern, str):
 55            self.query_pattern = re.compile(query_pattern)
 56        else:
 57            self.query_pattern = query_pattern
 58
 59        if isinstance(case_pattern, str):
 60            self.case_pattern = re.compile(case_pattern)
 61        else:
 62            self.case_pattern = case_pattern
 63
 64        if isinstance(replacement, str):
 65            self.replacement = lambda case_match, query_match: replacement
 66        else:
 67            self.replacement = replacement
 68
 69        self.count = count
 70        self.pos = pos
 71        self.endpos = endpos
 72
 73        self._metadata = {
 74            "query_pattern": str(query_pattern),
 75            "case_pattern": str(case_pattern),
 76            "replacement": replacement
 77            if isinstance(replacement, str)
 78            else get_metadata(replacement),
 79            "count": count,
 80            "pos": pos,
 81            "endpos": endpos,
 82        }
 83
 84    @property
 85    @override
 86    def metadata(self) -> JsonDict:
 87        return self._metadata
 88
 89    @override
 90    def __call__(self, case: str, query: str) -> str:
 91        query_match = self.query_pattern.search(
 92            query,
 93            self.pos,
 94            self.endpos,
 95        )
 96
 97        if query_match is not None:
 98            return self.case_pattern.sub(
 99                lambda case_match: self.replacement(case_match, query_match),
100                case,
101                self.count,
102            )
103
104        return case

Replace parts of a string using regular expressions.

Arguments:
  • case_pattern: Regular expression pattern to match against the case.
  • query_pattern: Regular expression pattern to match against the query.
  • replacement: Replacement string or function that takes two match objects.
  • count: Maximum number of replacements to make.
  • pos: Position in the string to start searching.
  • endpos: Position in the string to stop searching.
Returns:

The modified string.

Examples:
>>> func = regex("25", "30", "NUMBER")
>>> func("Alice is 25 years old.", "Peter is 30 years old.")
'Alice is NUMBER years old.'
regex( case_pattern: str | re.Pattern[str], query_pattern: str | re.Pattern[str], replacement: str | Callable[[re.Match[str], re.Match[str]], str], count: int = 0, pos: int = 0, endpos: int = 9223372036854775807)
45    def __init__(
46        self,
47        case_pattern: str | re.Pattern[str],
48        query_pattern: str | re.Pattern[str],
49        replacement: str | Callable[[re.Match[str], re.Match[str]], str],
50        count: int = 0,
51        pos: int = 0,
52        endpos: int = sys.maxsize,
53    ):
54        if isinstance(query_pattern, str):
55            self.query_pattern = re.compile(query_pattern)
56        else:
57            self.query_pattern = query_pattern
58
59        if isinstance(case_pattern, str):
60            self.case_pattern = re.compile(case_pattern)
61        else:
62            self.case_pattern = case_pattern
63
64        if isinstance(replacement, str):
65            self.replacement = lambda case_match, query_match: replacement
66        else:
67            self.replacement = replacement
68
69        self.count = count
70        self.pos = pos
71        self.endpos = endpos
72
73        self._metadata = {
74            "query_pattern": str(query_pattern),
75            "case_pattern": str(case_pattern),
76            "replacement": replacement
77            if isinstance(replacement, str)
78            else get_metadata(replacement),
79            "count": count,
80            "pos": pos,
81            "endpos": endpos,
82        }
case_pattern: re.Pattern[str]
query_pattern: re.Pattern[str]
replacement: Callable[[re.Match[str], re.Match[str]], str]
count: int
pos: int
endpos: int