123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760 |
- # results.py
- from collections.abc import MutableMapping, Mapping, MutableSequence, Iterator
- import pprint
- from weakref import ref as wkref
- from typing import Tuple, Any
- str_type: Tuple[type, ...] = (str, bytes)
- _generator_type = type((_ for _ in ()))
- class _ParseResultsWithOffset:
- __slots__ = ["tup"]
- def __init__(self, p1, p2):
- self.tup = (p1, p2)
- def __getitem__(self, i):
- return self.tup[i]
- def __getstate__(self):
- return self.tup
- def __setstate__(self, *args):
- self.tup = args[0]
- class ParseResults:
- """Structured parse results, to provide multiple means of access to
- the parsed data:
- - as a list (``len(results)``)
- - by list index (``results[0], results[1]``, etc.)
- - by attribute (``results.<results_name>`` - see :class:`ParserElement.set_results_name`)
- Example::
- integer = Word(nums)
- date_str = (integer.set_results_name("year") + '/'
- + integer.set_results_name("month") + '/'
- + integer.set_results_name("day"))
- # equivalent form:
- # date_str = (integer("year") + '/'
- # + integer("month") + '/'
- # + integer("day"))
- # parse_string returns a ParseResults object
- result = date_str.parse_string("1999/12/31")
- def test(s, fn=repr):
- print("{} -> {}".format(s, fn(eval(s))))
- test("list(result)")
- test("result[0]")
- test("result['month']")
- test("result.day")
- test("'month' in result")
- test("'minutes' in result")
- test("result.dump()", str)
- prints::
- list(result) -> ['1999', '/', '12', '/', '31']
- result[0] -> '1999'
- result['month'] -> '12'
- result.day -> '31'
- 'month' in result -> True
- 'minutes' in result -> False
- result.dump() -> ['1999', '/', '12', '/', '31']
- - day: 31
- - month: 12
- - year: 1999
- """
- _null_values: Tuple[Any, ...] = (None, [], "", ())
- __slots__ = [
- "_name",
- "_parent",
- "_all_names",
- "_modal",
- "_toklist",
- "_tokdict",
- "__weakref__",
- ]
- class List(list):
- """
- Simple wrapper class to distinguish parsed list results that should be preserved
- as actual Python lists, instead of being converted to :class:`ParseResults`:
- LBRACK, RBRACK = map(pp.Suppress, "[]")
- element = pp.Forward()
- item = ppc.integer
- element_list = LBRACK + pp.delimited_list(element) + RBRACK
- # add parse actions to convert from ParseResults to actual Python collection types
- def as_python_list(t):
- return pp.ParseResults.List(t.as_list())
- element_list.add_parse_action(as_python_list)
- element <<= item | element_list
- element.run_tests('''
- 100
- [2,3,4]
- [[2, 1],3,4]
- [(2, 1),3,4]
- (2,3,4)
- ''', post_parse=lambda s, r: (r[0], type(r[0])))
- prints:
- 100
- (100, <class 'int'>)
- [2,3,4]
- ([2, 3, 4], <class 'list'>)
- [[2, 1],3,4]
- ([[2, 1], 3, 4], <class 'list'>)
- (Used internally by :class:`Group` when `aslist=True`.)
- """
- def __new__(cls, contained=None):
- if contained is None:
- contained = []
- if not isinstance(contained, list):
- raise TypeError(
- "{} may only be constructed with a list,"
- " not {}".format(cls.__name__, type(contained).__name__)
- )
- return list.__new__(cls)
- def __new__(cls, toklist=None, name=None, **kwargs):
- if isinstance(toklist, ParseResults):
- return toklist
- self = object.__new__(cls)
- self._name = None
- self._parent = None
- self._all_names = set()
- if toklist is None:
- self._toklist = []
- elif isinstance(toklist, (list, _generator_type)):
- self._toklist = (
- [toklist[:]]
- if isinstance(toklist, ParseResults.List)
- else list(toklist)
- )
- else:
- self._toklist = [toklist]
- self._tokdict = dict()
- return self
- # Performance tuning: we construct a *lot* of these, so keep this
- # constructor as small and fast as possible
- def __init__(
- self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance
- ):
- self._modal = modal
- if name is not None and name != "":
- if isinstance(name, int):
- name = str(name)
- if not modal:
- self._all_names = {name}
- self._name = name
- if toklist not in self._null_values:
- if isinstance(toklist, (str_type, type)):
- toklist = [toklist]
- if asList:
- if isinstance(toklist, ParseResults):
- self[name] = _ParseResultsWithOffset(
- ParseResults(toklist._toklist), 0
- )
- else:
- self[name] = _ParseResultsWithOffset(
- ParseResults(toklist[0]), 0
- )
- self[name]._name = name
- else:
- try:
- self[name] = toklist[0]
- except (KeyError, TypeError, IndexError):
- if toklist is not self:
- self[name] = toklist
- else:
- self._name = name
- def __getitem__(self, i):
- if isinstance(i, (int, slice)):
- return self._toklist[i]
- else:
- if i not in self._all_names:
- return self._tokdict[i][-1][0]
- else:
- return ParseResults([v[0] for v in self._tokdict[i]])
- def __setitem__(self, k, v, isinstance=isinstance):
- if isinstance(v, _ParseResultsWithOffset):
- self._tokdict[k] = self._tokdict.get(k, list()) + [v]
- sub = v[0]
- elif isinstance(k, (int, slice)):
- self._toklist[k] = v
- sub = v
- else:
- self._tokdict[k] = self._tokdict.get(k, list()) + [
- _ParseResultsWithOffset(v, 0)
- ]
- sub = v
- if isinstance(sub, ParseResults):
- sub._parent = wkref(self)
- def __delitem__(self, i):
- if isinstance(i, (int, slice)):
- mylen = len(self._toklist)
- del self._toklist[i]
- # convert int to slice
- if isinstance(i, int):
- if i < 0:
- i += mylen
- i = slice(i, i + 1)
- # get removed indices
- removed = list(range(*i.indices(mylen)))
- removed.reverse()
- # fixup indices in token dictionary
- for name, occurrences in self._tokdict.items():
- for j in removed:
- for k, (value, position) in enumerate(occurrences):
- occurrences[k] = _ParseResultsWithOffset(
- value, position - (position > j)
- )
- else:
- del self._tokdict[i]
- def __contains__(self, k) -> bool:
- return k in self._tokdict
- def __len__(self) -> int:
- return len(self._toklist)
- def __bool__(self) -> bool:
- return not not (self._toklist or self._tokdict)
- def __iter__(self) -> Iterator:
- return iter(self._toklist)
- def __reversed__(self) -> Iterator:
- return iter(self._toklist[::-1])
- def keys(self):
- return iter(self._tokdict)
- def values(self):
- return (self[k] for k in self.keys())
- def items(self):
- return ((k, self[k]) for k in self.keys())
- def haskeys(self) -> bool:
- """
- Since ``keys()`` returns an iterator, this method is helpful in bypassing
- code that looks for the existence of any defined results names."""
- return bool(self._tokdict)
- def pop(self, *args, **kwargs):
- """
- Removes and returns item at specified index (default= ``last``).
- Supports both ``list`` and ``dict`` semantics for ``pop()``. If
- passed no argument or an integer argument, it will use ``list``
- semantics and pop tokens from the list of parsed tokens. If passed
- a non-integer argument (most likely a string), it will use ``dict``
- semantics and pop the corresponding value from any defined results
- names. A second default return value argument is supported, just as in
- ``dict.pop()``.
- Example::
- numlist = Word(nums)[...]
- print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
- def remove_first(tokens):
- tokens.pop(0)
- numlist.add_parse_action(remove_first)
- print(numlist.parse_string("0 123 321")) # -> ['123', '321']
- label = Word(alphas)
- patt = label("LABEL") + OneOrMore(Word(nums))
- print(patt.parse_string("AAB 123 321").dump())
- # Use pop() in a parse action to remove named result (note that corresponding value is not
- # removed from list form of results)
- def remove_LABEL(tokens):
- tokens.pop("LABEL")
- return tokens
- patt.add_parse_action(remove_LABEL)
- print(patt.parse_string("AAB 123 321").dump())
- prints::
- ['AAB', '123', '321']
- - LABEL: AAB
- ['AAB', '123', '321']
- """
- if not args:
- args = [-1]
- for k, v in kwargs.items():
- if k == "default":
- args = (args[0], v)
- else:
- raise TypeError(
- "pop() got an unexpected keyword argument {!r}".format(k)
- )
- if isinstance(args[0], int) or len(args) == 1 or args[0] in self:
- index = args[0]
- ret = self[index]
- del self[index]
- return ret
- else:
- defaultvalue = args[1]
- return defaultvalue
- def get(self, key, default_value=None):
- """
- Returns named result matching the given key, or if there is no
- such name, then returns the given ``default_value`` or ``None`` if no
- ``default_value`` is specified.
- Similar to ``dict.get()``.
- Example::
- integer = Word(nums)
- date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
- result = date_str.parse_string("1999/12/31")
- print(result.get("year")) # -> '1999'
- print(result.get("hour", "not specified")) # -> 'not specified'
- print(result.get("hour")) # -> None
- """
- if key in self:
- return self[key]
- else:
- return default_value
- def insert(self, index, ins_string):
- """
- Inserts new element at location index in the list of parsed tokens.
- Similar to ``list.insert()``.
- Example::
- numlist = Word(nums)[...]
- print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
- # use a parse action to insert the parse location in the front of the parsed results
- def insert_locn(locn, tokens):
- tokens.insert(0, locn)
- numlist.add_parse_action(insert_locn)
- print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321']
- """
- self._toklist.insert(index, ins_string)
- # fixup indices in token dictionary
- for name, occurrences in self._tokdict.items():
- for k, (value, position) in enumerate(occurrences):
- occurrences[k] = _ParseResultsWithOffset(
- value, position + (position > index)
- )
- def append(self, item):
- """
- Add single element to end of ``ParseResults`` list of elements.
- Example::
- numlist = Word(nums)[...]
- print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
- # use a parse action to compute the sum of the parsed integers, and add it to the end
- def append_sum(tokens):
- tokens.append(sum(map(int, tokens)))
- numlist.add_parse_action(append_sum)
- print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444]
- """
- self._toklist.append(item)
- def extend(self, itemseq):
- """
- Add sequence of elements to end of ``ParseResults`` list of elements.
- Example::
- patt = OneOrMore(Word(alphas))
- # use a parse action to append the reverse of the matched strings, to make a palindrome
- def make_palindrome(tokens):
- tokens.extend(reversed([t[::-1] for t in tokens]))
- return ''.join(tokens)
- patt.add_parse_action(make_palindrome)
- print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
- """
- if isinstance(itemseq, ParseResults):
- self.__iadd__(itemseq)
- else:
- self._toklist.extend(itemseq)
- def clear(self):
- """
- Clear all elements and results names.
- """
- del self._toklist[:]
- self._tokdict.clear()
- def __getattr__(self, name):
- try:
- return self[name]
- except KeyError:
- if name.startswith("__"):
- raise AttributeError(name)
- return ""
- def __add__(self, other) -> "ParseResults":
- ret = self.copy()
- ret += other
- return ret
- def __iadd__(self, other) -> "ParseResults":
- if other._tokdict:
- offset = len(self._toklist)
- addoffset = lambda a: offset if a < 0 else a + offset
- otheritems = other._tokdict.items()
- otherdictitems = [
- (k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
- for k, vlist in otheritems
- for v in vlist
- ]
- for k, v in otherdictitems:
- self[k] = v
- if isinstance(v[0], ParseResults):
- v[0]._parent = wkref(self)
- self._toklist += other._toklist
- self._all_names |= other._all_names
- return self
- def __radd__(self, other) -> "ParseResults":
- if isinstance(other, int) and other == 0:
- # useful for merging many ParseResults using sum() builtin
- return self.copy()
- else:
- # this may raise a TypeError - so be it
- return other + self
- def __repr__(self) -> str:
- return "{}({!r}, {})".format(type(self).__name__, self._toklist, self.as_dict())
- def __str__(self) -> str:
- return (
- "["
- + ", ".join(
- [
- str(i) if isinstance(i, ParseResults) else repr(i)
- for i in self._toklist
- ]
- )
- + "]"
- )
- def _asStringList(self, sep=""):
- out = []
- for item in self._toklist:
- if out and sep:
- out.append(sep)
- if isinstance(item, ParseResults):
- out += item._asStringList()
- else:
- out.append(str(item))
- return out
- def as_list(self) -> list:
- """
- Returns the parse results as a nested list of matching tokens, all converted to strings.
- Example::
- patt = OneOrMore(Word(alphas))
- result = patt.parse_string("sldkj lsdkj sldkj")
- # even though the result prints in string-like form, it is actually a pyparsing ParseResults
- print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
- # Use as_list() to create an actual list
- result_list = result.as_list()
- print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
- """
- return [
- res.as_list() if isinstance(res, ParseResults) else res
- for res in self._toklist
- ]
- def as_dict(self) -> dict:
- """
- Returns the named parse results as a nested dictionary.
- Example::
- integer = Word(nums)
- date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
- result = date_str.parse_string('12/31/1999')
- print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
- result_dict = result.as_dict()
- print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
- # even though a ParseResults supports dict-like access, sometime you just need to have a dict
- import json
- print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
- print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"}
- """
- def to_item(obj):
- if isinstance(obj, ParseResults):
- return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj]
- else:
- return obj
- return dict((k, to_item(v)) for k, v in self.items())
- def copy(self) -> "ParseResults":
- """
- Returns a new copy of a :class:`ParseResults` object.
- """
- ret = ParseResults(self._toklist)
- ret._tokdict = self._tokdict.copy()
- ret._parent = self._parent
- ret._all_names |= self._all_names
- ret._name = self._name
- return ret
- def get_name(self):
- r"""
- Returns the results name for this token expression. Useful when several
- different expressions might match at a particular location.
- Example::
- integer = Word(nums)
- ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
- house_number_expr = Suppress('#') + Word(nums, alphanums)
- user_data = (Group(house_number_expr)("house_number")
- | Group(ssn_expr)("ssn")
- | Group(integer)("age"))
- user_info = OneOrMore(user_data)
- result = user_info.parse_string("22 111-22-3333 #221B")
- for item in result:
- print(item.get_name(), ':', item[0])
- prints::
- age : 22
- ssn : 111-22-3333
- house_number : 221B
- """
- if self._name:
- return self._name
- elif self._parent:
- par = self._parent()
- def find_in_parent(sub):
- return next(
- (
- k
- for k, vlist in par._tokdict.items()
- for v, loc in vlist
- if sub is v
- ),
- None,
- )
- return find_in_parent(self) if par else None
- elif (
- len(self) == 1
- and len(self._tokdict) == 1
- and next(iter(self._tokdict.values()))[0][1] in (0, -1)
- ):
- return next(iter(self._tokdict.keys()))
- else:
- return None
- def dump(self, indent="", full=True, include_list=True, _depth=0) -> str:
- """
- Diagnostic method for listing out the contents of
- a :class:`ParseResults`. Accepts an optional ``indent`` argument so
- that this string can be embedded in a nested display of other data.
- Example::
- integer = Word(nums)
- date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
- result = date_str.parse_string('12/31/1999')
- print(result.dump())
- prints::
- ['12', '/', '31', '/', '1999']
- - day: 1999
- - month: 31
- - year: 12
- """
- out = []
- NL = "\n"
- out.append(indent + str(self.as_list()) if include_list else "")
- if full:
- if self.haskeys():
- items = sorted((str(k), v) for k, v in self.items())
- for k, v in items:
- if out:
- out.append(NL)
- out.append("{}{}- {}: ".format(indent, (" " * _depth), k))
- if isinstance(v, ParseResults):
- if v:
- out.append(
- v.dump(
- indent=indent,
- full=full,
- include_list=include_list,
- _depth=_depth + 1,
- )
- )
- else:
- out.append(str(v))
- else:
- out.append(repr(v))
- if any(isinstance(vv, ParseResults) for vv in self):
- v = self
- for i, vv in enumerate(v):
- if isinstance(vv, ParseResults):
- out.append(
- "\n{}{}[{}]:\n{}{}{}".format(
- indent,
- (" " * (_depth)),
- i,
- indent,
- (" " * (_depth + 1)),
- vv.dump(
- indent=indent,
- full=full,
- include_list=include_list,
- _depth=_depth + 1,
- ),
- )
- )
- else:
- out.append(
- "\n%s%s[%d]:\n%s%s%s"
- % (
- indent,
- (" " * (_depth)),
- i,
- indent,
- (" " * (_depth + 1)),
- str(vv),
- )
- )
- return "".join(out)
- def pprint(self, *args, **kwargs):
- """
- Pretty-printer for parsed results as a list, using the
- `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
- Accepts additional positional or keyword args as defined for
- `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
- Example::
- ident = Word(alphas, alphanums)
- num = Word(nums)
- func = Forward()
- term = ident | num | Group('(' + func + ')')
- func <<= ident + Group(Optional(delimited_list(term)))
- result = func.parse_string("fna a,b,(fnb c,d,200),100")
- result.pprint(width=40)
- prints::
- ['fna',
- ['a',
- 'b',
- ['(', 'fnb', ['c', 'd', '200'], ')'],
- '100']]
- """
- pprint.pprint(self.as_list(), *args, **kwargs)
- # add support for pickle protocol
- def __getstate__(self):
- return (
- self._toklist,
- (
- self._tokdict.copy(),
- self._parent is not None and self._parent() or None,
- self._all_names,
- self._name,
- ),
- )
- def __setstate__(self, state):
- self._toklist, (self._tokdict, par, inAccumNames, self._name) = state
- self._all_names = set(inAccumNames)
- if par is not None:
- self._parent = wkref(par)
- else:
- self._parent = None
- def __getnewargs__(self):
- return self._toklist, self._name
- def __dir__(self):
- return dir(type(self)) + list(self.keys())
- @classmethod
- def from_dict(cls, other, name=None) -> "ParseResults":
- """
- Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the
- name-value relations as results names. If an optional ``name`` argument is
- given, a nested ``ParseResults`` will be returned.
- """
- def is_iterable(obj):
- try:
- iter(obj)
- except Exception:
- return False
- else:
- return not isinstance(obj, str_type)
- ret = cls([])
- for k, v in other.items():
- if isinstance(v, Mapping):
- ret += cls.from_dict(v, name=k)
- else:
- ret += cls([v], name=k, asList=is_iterable(v))
- if name is not None:
- ret = cls([ret], name=name)
- return ret
- asList = as_list
- asDict = as_dict
- getName = get_name
- MutableMapping.register(ParseResults)
- MutableSequence.register(ParseResults)
|