# actions.py from __future__ import annotations from typing import Union, Callable, Any from .exceptions import ParseException from .util import col, replaced_by_pep8 from .results import ParseResults ParseAction = Union[ Callable[[], Any], Callable[[ParseResults], Any], Callable[[int, ParseResults], Any], Callable[[str, int, ParseResults], Any], ] class OnlyOnce: """ Wrapper for parse actions, to ensure they are only called once. Note: parse action signature must include all 3 arguments. """ def __init__(self, method_call: Callable[[str, int, ParseResults], Any]): from .core import _trim_arity self.callable = _trim_arity(method_call) self.called = False def __call__(self, s: str, l: int, t: ParseResults) -> ParseResults: if not self.called: results = self.callable(s, l, t) self.called = True return results raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset") def reset(self): """ Allow the associated parse action to be called once more. """ self.called = False def match_only_at_col(n: int) -> ParseAction: """ Helper method for defining parse actions that require matching at a specific column in the input text. """ def verify_col(strg: str, locn: int, toks: ParseResults) -> None: if col(locn, strg) != n: raise ParseException(strg, locn, f"matched token not at column {n}") return verify_col def replace_with(repl_str: str) -> ParseAction: """ Helper method for common parse actions that simply return a literal value. Especially useful when used with :class:`transform_string` (). Example:: num = Word(nums).set_parse_action(lambda toks: int(toks[0])) na = one_of("N/A NA").set_parse_action(replace_with(math.nan)) term = na | num term[1, ...].parse_string("324 234 N/A 234") # -> [324, 234, nan, 234] """ return lambda s, l, t: [repl_str] def remove_quotes(s: str, l: int, t: ParseResults) -> Any: """ Helper parse action for removing quotation marks from parsed quoted strings. Example:: # by default, quotation marks are included in parsed results quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] # use remove_quotes to strip quotation marks from parsed results quoted_string.set_parse_action(remove_quotes) quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] """ return t[0][1:-1] def with_attribute(*args: tuple[str, str], **attr_dict) -> ParseAction: """ Helper to create a validating parse action to be used with start tags created with :class:`make_xml_tags` or :class:`make_html_tags`. Use ``with_attribute`` to qualify a starting tag with a required attribute value, to avoid false matches on common tags such as ```` or ``
``. Call ``with_attribute`` with a series of attribute names and values. Specify the list of filter attributes names and values as: - keyword arguments, as in ``(align="right")``, or - as an explicit dict with ``**`` operator, when an attribute name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}`` - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))`` For attribute names with a namespace prefix, you must use the second form. Attribute names are matched insensitive to upper/lower case. If just testing for ``class`` (with or without a namespace), use :class:`with_class`. To verify that the attribute exists, but without specifying a value, pass ``with_attribute.ANY_VALUE`` as the value. Example:: html = '''
Some text
1 4 0 1 0
1,3 2,3 1,1
this has no type
''' div,div_end = make_html_tags("div") # only match div tag having a type attribute with value "grid" div_grid = div().set_parse_action(with_attribute(type="grid")) grid_expr = div_grid + SkipTo(div | div_end)("body") for grid_header in grid_expr.search_string(html): print(grid_header.body) # construct a match with any div tag having a type attribute, regardless of the value div_any_type = div().set_parse_action(with_attribute(type=with_attribute.ANY_VALUE)) div_expr = div_any_type + SkipTo(div | div_end)("body") for div_header in div_expr.search_string(html): print(div_header.body) prints:: 1 4 0 1 0 1 4 0 1 0 1,3 2,3 1,1 """ attrs_list: list[tuple[str, str]] = [] if args: attrs_list.extend(args) else: attrs_list.extend(attr_dict.items()) def pa(s: str, l: int, tokens: ParseResults) -> None: for attrName, attrValue in attrs_list: if attrName not in tokens: raise ParseException(s, l, "no matching attribute " + attrName) if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue: # type: ignore [attr-defined] raise ParseException( s, l, f"attribute {attrName!r} has value {tokens[attrName]!r}, must be {attrValue!r}", ) return pa with_attribute.ANY_VALUE = object() # type: ignore [attr-defined] def with_class(classname: str, namespace: str = "") -> ParseAction: """ Simplified version of :class:`with_attribute` when matching on a div class - made difficult because ``class`` is a reserved word in Python. Example:: html = '''
Some text
1 4 0 1 0
1,3 2,3 1,1
this <div> has no class
''' div,div_end = make_html_tags("div") div_grid = div().set_parse_action(with_class("grid")) grid_expr = div_grid + SkipTo(div | div_end)("body") for grid_header in grid_expr.search_string(html): print(grid_header.body) div_any_type = div().set_parse_action(with_class(withAttribute.ANY_VALUE)) div_expr = div_any_type + SkipTo(div | div_end)("body") for div_header in div_expr.search_string(html): print(div_header.body) prints:: 1 4 0 1 0 1 4 0 1 0 1,3 2,3 1,1 """ classattr = f"{namespace}:class" if namespace else "class" return with_attribute(**{classattr: classname}) # Compatibility synonyms # fmt: off replaceWith = replaced_by_pep8("replaceWith", replace_with) removeQuotes = replaced_by_pep8("removeQuotes", remove_quotes) withAttribute = replaced_by_pep8("withAttribute", with_attribute) withClass = replaced_by_pep8("withClass", with_class) matchOnlyAtCol = replaced_by_pep8("matchOnlyAtCol", match_only_at_col) # fmt: on