diff options
author | robot-piglet <[email protected]> | 2025-04-07 12:29:31 +0300 |
---|---|---|
committer | robot-piglet <[email protected]> | 2025-04-07 12:41:45 +0300 |
commit | 4013e613e103e8c9d9a22206a7e1cdb5de185f72 (patch) | |
tree | 6d91c4661d6a7d0521bc4122e4292a2f4fc276c1 /contrib/python/pyparsing | |
parent | 3f113ee06ed713e51ace6c73c0bc8ba91def65ad (diff) |
Intermediate changes
commit_hash:32ce16d8158d416147fcd7231a39bd5b3e9fa50f
Diffstat (limited to 'contrib/python/pyparsing')
-rw-r--r-- | contrib/python/pyparsing/py3/.dist-info/METADATA | 7 | ||||
-rw-r--r-- | contrib/python/pyparsing/py3/README.rst | 4 | ||||
-rw-r--r-- | contrib/python/pyparsing/py3/pyparsing/__init__.py | 4 | ||||
-rw-r--r-- | contrib/python/pyparsing/py3/pyparsing/actions.py | 2 | ||||
-rw-r--r-- | contrib/python/pyparsing/py3/pyparsing/core.py | 323 | ||||
-rw-r--r-- | contrib/python/pyparsing/py3/pyparsing/diagram/__init__.py | 18 | ||||
-rw-r--r-- | contrib/python/pyparsing/py3/pyparsing/exceptions.py | 11 | ||||
-rw-r--r-- | contrib/python/pyparsing/py3/pyparsing/helpers.py | 44 | ||||
-rw-r--r-- | contrib/python/pyparsing/py3/pyparsing/results.py | 2 | ||||
-rw-r--r-- | contrib/python/pyparsing/py3/pyparsing/tools/__init__.py | 0 | ||||
-rw-r--r-- | contrib/python/pyparsing/py3/pyparsing/tools/cvt_pyparsing_pep8_names.py | 116 | ||||
-rw-r--r-- | contrib/python/pyparsing/py3/pyparsing/util.py | 60 | ||||
-rw-r--r-- | contrib/python/pyparsing/py3/ya.make | 4 |
13 files changed, 427 insertions, 168 deletions
diff --git a/contrib/python/pyparsing/py3/.dist-info/METADATA b/contrib/python/pyparsing/py3/.dist-info/METADATA index 6b5fbefef60..ed52278486a 100644 --- a/contrib/python/pyparsing/py3/.dist-info/METADATA +++ b/contrib/python/pyparsing/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: pyparsing -Version: 3.2.1 +Version: 3.2.2 Summary: pyparsing module - Classes and methods to define and execute parsing grammars Author-email: Paul McGuire <[email protected]> Requires-Python: >=3.9 @@ -17,6 +17,7 @@ Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 Classifier: Programming Language :: Python :: 3 :: Only Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy @@ -56,7 +57,7 @@ Here is a program to parse ``"Hello, World!"`` (or any greeting of the form from pyparsing import Word, alphas greet = Word(alphas) + "," + Word(alphas) + "!" hello = "Hello, World!" - print(hello, "->", greet.parseString(hello)) + print(hello, "->", greet.parse_string(hello)) The program outputs the following:: @@ -66,7 +67,7 @@ The Python representation of the grammar is quite readable, owing to the self-explanatory class names, and the use of '+', '|' and '^' operator definitions. -The parsed results returned from ``parseString()`` is a collection of type +The parsed results returned from ``parse_string()`` is a collection of type ``ParseResults``, which can be accessed as a nested list, a dictionary, or an object with named attributes. diff --git a/contrib/python/pyparsing/py3/README.rst b/contrib/python/pyparsing/py3/README.rst index 24d603c7bc4..cfb9889f854 100644 --- a/contrib/python/pyparsing/py3/README.rst +++ b/contrib/python/pyparsing/py3/README.rst @@ -26,7 +26,7 @@ Here is a program to parse ``"Hello, World!"`` (or any greeting of the form from pyparsing import Word, alphas greet = Word(alphas) + "," + Word(alphas) + "!" hello = "Hello, World!" - print(hello, "->", greet.parseString(hello)) + print(hello, "->", greet.parse_string(hello)) The program outputs the following:: @@ -36,7 +36,7 @@ The Python representation of the grammar is quite readable, owing to the self-explanatory class names, and the use of '+', '|' and '^' operator definitions. -The parsed results returned from ``parseString()`` is a collection of type +The parsed results returned from ``parse_string()`` is a collection of type ``ParseResults``, which can be accessed as a nested list, a dictionary, or an object with named attributes. diff --git a/contrib/python/pyparsing/py3/pyparsing/__init__.py b/contrib/python/pyparsing/py3/pyparsing/__init__.py index 726c76cb244..fa1f2abe67e 100644 --- a/contrib/python/pyparsing/py3/pyparsing/__init__.py +++ b/contrib/python/pyparsing/py3/pyparsing/__init__.py @@ -120,8 +120,8 @@ class version_info(NamedTuple): return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})" -__version_info__ = version_info(3, 2, 1, "final", 1) -__version_time__ = "31 Dec 2024 20:41 UTC" +__version_info__ = version_info(3, 2, 2, "final", 1) +__version_time__ = "22 Mar 2025 22:09 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire <[email protected]>" diff --git a/contrib/python/pyparsing/py3/pyparsing/actions.py b/contrib/python/pyparsing/py3/pyparsing/actions.py index f491aab986e..0153cc7132a 100644 --- a/contrib/python/pyparsing/py3/pyparsing/actions.py +++ b/contrib/python/pyparsing/py3/pyparsing/actions.py @@ -22,7 +22,7 @@ class OnlyOnce: Note: parse action signature must include all 3 arguments. """ - def __init__(self, method_call: Callable[[str, int, ParseResults], Any]): + def __init__(self, method_call: Callable[[str, int, ParseResults], Any]) -> None: from .core import _trim_arity self.callable = _trim_arity(method_call) diff --git a/contrib/python/pyparsing/py3/pyparsing/core.py b/contrib/python/pyparsing/py3/pyparsing/core.py index b884e2d4a40..86be949ad47 100644 --- a/contrib/python/pyparsing/py3/pyparsing/core.py +++ b/contrib/python/pyparsing/py3/pyparsing/core.py @@ -38,7 +38,6 @@ from .util import ( __config_flags, _collapse_string_to_ranges, _escape_regex_range_chars, - _bslash, _flatten, LRUMemo as _LRUMemo, UnboundedMemo as _UnboundedMemo, @@ -246,7 +245,7 @@ class _ParseActionIndexError(Exception): ParserElement parseImpl methods. """ - def __init__(self, msg: str, exc: BaseException): + def __init__(self, msg: str, exc: BaseException) -> None: self.msg: str = msg self.exc: BaseException = exc @@ -355,7 +354,7 @@ def _default_start_debug_action( ( f"{cache_hit_str}Match {expr} at loc {loc}({lineno(loc, instring)},{col(loc, instring)})\n" f" {line(loc, instring)}\n" - f" {' ' * (col(loc, instring) - 1)}^" + f" {'^':>{col(loc, instring)}}" ) ) @@ -454,7 +453,7 @@ class ParserElement(ABC): debug_match: typing.Optional[DebugSuccessAction] debug_fail: typing.Optional[DebugExceptionAction] - def __init__(self, savelist: bool = False): + def __init__(self, savelist: bool = False) -> None: self.parseAction: list[ParseAction] = list() self.failAction: typing.Optional[ParseFailAction] = None self.customName: str = None # type: ignore[assignment] @@ -465,7 +464,7 @@ class ParserElement(ABC): self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) self.copyDefaultWhiteChars = True # used when checking for left-recursion - self.mayReturnEmpty = False + self._may_return_empty = False self.keepTabs = False self.ignoreExprs: list[ParserElement] = list() self.debug = False @@ -483,6 +482,14 @@ class ParserElement(ABC): self.suppress_warnings_: list[Diagnostics] = [] self.show_in_diagram = True + @property + def mayReturnEmpty(self): + return self._may_return_empty + + @mayReturnEmpty.setter + def mayReturnEmpty(self, value): + self._may_return_empty = value + def suppress_warning(self, warning_type: Diagnostics) -> ParserElement: """ Suppress warnings emitted for a particular diagnostic on this expression. @@ -2264,6 +2271,7 @@ class ParserElement(ABC): show_results_names: bool = False, show_groups: bool = False, embed: bool = False, + show_hidden: bool = False, **kwargs, ) -> None: """ @@ -2278,6 +2286,7 @@ class ParserElement(ABC): - ``show_results_names`` - bool flag whether diagram should show annotations for defined results names - ``show_groups`` - bool flag whether groups should be highlighted with an unlabeled surrounding box + - ``show_hidden`` - bool flag to show diagram elements for internal elements that are usually hidden - ``embed`` - bool flag whether generated HTML should omit <HEAD>, <BODY>, and <DOCTYPE> tags to embed the resulting HTML in an enclosing HTML source - ``head`` - str containing additional HTML to insert into the <HEAD> section of the generated code; @@ -2303,6 +2312,7 @@ class ParserElement(ABC): vertical=vertical, show_results_names=show_results_names, show_groups=show_groups, + show_hidden=show_hidden, diagram_kwargs=kwargs, ) if not isinstance(output_html, (str, Path)): @@ -2352,7 +2362,7 @@ class ParserElement(ABC): class _PendingSkip(ParserElement): # internal placeholder class to hold a place were '...' is added to a parser element, # once another ParserElement is added, this placeholder will be replaced with a SkipTo - def __init__(self, expr: ParserElement, must_skip: bool = False): + def __init__(self, expr: ParserElement, must_skip: bool = False) -> None: super().__init__() self.anchor = expr self.must_skip = must_skip @@ -2395,7 +2405,7 @@ class Token(ParserElement): matching patterns. """ - def __init__(self): + def __init__(self) -> None: super().__init__(savelist=False) def _generateDefaultName(self) -> str: @@ -2407,9 +2417,9 @@ class NoMatch(Token): A token that will never match. """ - def __init__(self): + def __init__(self) -> None: super().__init__() - self.mayReturnEmpty = True + self._may_return_empty = True self.mayIndexError = False self.errmsg = "Unmatchable token" @@ -2449,14 +2459,14 @@ class Literal(Token): def __getnewargs__(self): return (self.match,) - def __init__(self, match_string: str = "", *, matchString: str = ""): + def __init__(self, match_string: str = "", *, matchString: str = "") -> None: super().__init__() match_string = matchString or match_string self.match = match_string self.matchLen = len(match_string) self.firstMatchChar = match_string[:1] self.errmsg = f"Expected {self.name}" - self.mayReturnEmpty = False + self._may_return_empty = False self.mayIndexError = False def _generateDefaultName(self) -> str: @@ -2475,9 +2485,9 @@ class Empty(Literal): An empty token, will always match. """ - def __init__(self, match_string="", *, matchString=""): + def __init__(self, match_string="", *, matchString="") -> None: super().__init__("") - self.mayReturnEmpty = True + self._may_return_empty = True self.mayIndexError = False def _generateDefaultName(self) -> str: @@ -2534,7 +2544,7 @@ class Keyword(Token): *, matchString: str = "", identChars: typing.Optional[str] = None, - ): + ) -> None: super().__init__() identChars = identChars or ident_chars if identChars is None: @@ -2546,7 +2556,7 @@ class Keyword(Token): if not self.firstMatchChar: raise ValueError("null string passed to Keyword; use Empty() instead") self.errmsg = f"Expected {type(self).__name__} {self.name}" - self.mayReturnEmpty = False + self._may_return_empty = False self.mayIndexError = False self.caseless = caseless if caseless: @@ -2628,7 +2638,7 @@ class CaselessLiteral(Literal): (Contrast with example for :class:`CaselessKeyword`.) """ - def __init__(self, match_string: str = "", *, matchString: str = ""): + def __init__(self, match_string: str = "", *, matchString: str = "") -> None: match_string = matchString or match_string super().__init__(match_string.upper()) # Preserve the defining literal. @@ -2660,7 +2670,7 @@ class CaselessKeyword(Keyword): *, matchString: str = "", identChars: typing.Optional[str] = None, - ): + ) -> None: identChars = identChars or ident_chars match_string = matchString or match_string super().__init__(match_string, identChars, caseless=True) @@ -2708,7 +2718,7 @@ class CloseMatch(Token): *, maxMismatches: int = 1, caseless=False, - ): + ) -> None: maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches super().__init__() self.match_string = match_string @@ -2716,7 +2726,7 @@ class CloseMatch(Token): self.errmsg = f"Expected {self.match_string!r} (with up to {self.maxMismatches} mismatches)" self.caseless = caseless self.mayIndexError = False - self.mayReturnEmpty = False + self._may_return_empty = False def _generateDefaultName(self) -> str: return f"{type(self).__name__}:{self.match_string!r}" @@ -2834,7 +2844,7 @@ class Word(Token): bodyChars: typing.Optional[str] = None, asKeyword: bool = False, excludeChars: typing.Optional[str] = None, - ): + ) -> None: initChars = initChars or init_chars bodyChars = bodyChars or body_chars asKeyword = asKeyword or as_keyword @@ -3018,7 +3028,7 @@ class Char(Word): *, asKeyword: bool = False, excludeChars: typing.Optional[str] = None, - ): + ) -> None: asKeyword = asKeyword or as_keyword excludeChars = excludeChars or exclude_chars super().__init__( @@ -3060,7 +3070,7 @@ class Regex(Token): *, asGroupList: bool = False, asMatch: bool = False, - ): + ) -> None: """The parameters ``pattern`` and ``flags`` are passed to the ``re.compile()`` function as-is. See the Python `re module <https://docs.python.org/3/library/re.html>`_ module for an @@ -3075,15 +3085,18 @@ class Regex(Token): raise ValueError("null string passed to Regex; use Empty() instead") self._re = None + self._may_return_empty = None # type: ignore [assignment] self.reString = self.pattern = pattern elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): self._re = pattern + self._may_return_empty = None # type: ignore [assignment] self.pattern = self.reString = pattern.pattern elif callable(pattern): # defer creating this pattern until we really need it self.pattern = pattern + self._may_return_empty = None # type: ignore [assignment] self._re = None else: @@ -3120,23 +3133,38 @@ class Regex(Token): try: self._re = re.compile(self.pattern, self.flags) - return self._re except re.error: raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") + else: + self._may_return_empty = self.re.match("", pos=0) is not None + return self._re @cached_property def re_match(self) -> Callable[[str, int], Any]: return self.re.match - @cached_property - def mayReturnEmpty(self) -> bool: # type: ignore[override] - return self.re_match("", 0) is not None + @property + def mayReturnEmpty(self): + if self._may_return_empty is None: + # force compile of regex pattern, to set may_return_empty flag + self.re # noqa + return self._may_return_empty + + @mayReturnEmpty.setter + def mayReturnEmpty(self, value): + self._may_return_empty = value def _generateDefaultName(self) -> str: unescaped = repr(self.pattern).replace("\\\\", "\\") return f"Re:({unescaped})" def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: + # explicit check for matching past the length of the string; + # this is done because the re module will not complain about + # a match with `pos > len(instring)`, it will just return "" + if loc > len(instring) and self.mayReturnEmpty: + raise ParseException(instring, loc, self.errmsg, self) + result = self.re_match(instring, loc) if not result: raise ParseException(instring, loc, self.errmsg, self) @@ -3151,6 +3179,9 @@ class Regex(Token): return loc, ret def parseImplAsGroupList(self, instring, loc, do_actions=True): + if loc > len(instring) and self.mayReturnEmpty: + raise ParseException(instring, loc, self.errmsg, self) + result = self.re_match(instring, loc) if not result: raise ParseException(instring, loc, self.errmsg, self) @@ -3160,6 +3191,9 @@ class Regex(Token): return loc, ret def parseImplAsMatch(self, instring, loc, do_actions=True): + if loc > len(instring) and self.mayReturnEmpty: + raise ParseException(instring, loc, self.errmsg, self) + result = self.re_match(instring, loc) if not result: raise ParseException(instring, loc, self.errmsg, self) @@ -3258,7 +3292,7 @@ class QuotedString(Token): unquoteResults: bool = True, endQuoteChar: typing.Optional[str] = None, convertWhitespaceEscapes: bool = True, - ): + ) -> None: super().__init__() esc_char = escChar or esc_char esc_quote = escQuote or esc_quote @@ -3362,7 +3396,7 @@ class QuotedString(Token): self.errmsg = f"Expected {self.name}" self.mayIndexError = False - self.mayReturnEmpty = True + self._may_return_empty = True def _generateDefaultName(self) -> str: if self.quote_char == self.end_quote_char and isinstance( @@ -3465,7 +3499,7 @@ class CharsNotIn(Token): exact: int = 0, *, notChars: str = "", - ): + ) -> None: super().__init__() self.skipWhitespace = False self.notChars = not_chars or notChars @@ -3489,7 +3523,7 @@ class CharsNotIn(Token): self.minLen = exact self.errmsg = f"Expected {self.name}" - self.mayReturnEmpty = self.minLen == 0 + self._may_return_empty = self.minLen == 0 self.mayIndexError = False def _generateDefaultName(self) -> str: @@ -3552,7 +3586,9 @@ class White(Token): "\u3000": "<IDEOGRAPHIC_SPACE>", } - def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): + def __init__( + self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0 + ) -> None: super().__init__() self.matchWhite = ws self.set_whitespace_chars( @@ -3560,7 +3596,7 @@ class White(Token): copy_defaults=True, ) # self.leave_whitespace() - self.mayReturnEmpty = True + self._may_return_empty = True self.errmsg = f"Expected {self.name}" self.minLen = min @@ -3594,9 +3630,9 @@ class White(Token): class PositionToken(Token): - def __init__(self): + def __init__(self) -> None: super().__init__() - self.mayReturnEmpty = True + self._may_return_empty = True self.mayIndexError = False @@ -3605,7 +3641,7 @@ class GoToColumn(PositionToken): tabular report scraping. """ - def __init__(self, colno: int): + def __init__(self, colno: int) -> None: super().__init__() self.col = colno @@ -3657,7 +3693,7 @@ class LineStart(PositionToken): """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.leave_whitespace() self.orig_whiteChars = set() | self.whiteChars @@ -3688,7 +3724,7 @@ class LineEnd(PositionToken): parse string """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.whiteChars.discard("\n") self.set_whitespace_chars(self.whiteChars, copy_defaults=False) @@ -3711,7 +3747,7 @@ class StringStart(PositionToken): string """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.set_name("start of text") @@ -3728,7 +3764,7 @@ class StringEnd(PositionToken): Matches if current position is at the end of the parse string """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.set_name("end of text") @@ -3753,7 +3789,9 @@ class WordStart(PositionToken): a line. """ - def __init__(self, word_chars: str = printables, *, wordChars: str = printables): + def __init__( + self, word_chars: str = printables, *, wordChars: str = printables + ) -> None: wordChars = word_chars if wordChars == printables else wordChars super().__init__() self.wordChars = set(wordChars) @@ -3778,7 +3816,9 @@ class WordEnd(PositionToken): of a line. """ - def __init__(self, word_chars: str = printables, *, wordChars: str = printables): + def __init__( + self, word_chars: str = printables, *, wordChars: str = printables + ) -> None: wordChars = word_chars if wordChars == printables else wordChars super().__init__() self.wordChars = set(wordChars) @@ -3822,14 +3862,15 @@ class Tag(Token): - enthusiastic: True """ - def __init__(self, tag_name: str, value: Any = True): + def __init__(self, tag_name: str, value: Any = True) -> None: super().__init__() - self.mayReturnEmpty = True + self._may_return_empty = True self.mayIndexError = False self.leave_whitespace() self.tag_name = tag_name self.tag_value = value self.add_parse_action(self._add_tag) + self.show_in_diagram = False def _add_tag(self, tokens: ParseResults): tokens[self.tag_name] = self.tag_value @@ -3843,7 +3884,9 @@ class ParseExpression(ParserElement): post-processing parsed tokens. """ - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): + def __init__( + self, exprs: typing.Iterable[ParserElement], savelist: bool = False + ) -> None: super().__init__(savelist) self.exprs: list[ParserElement] if isinstance(exprs, _generatorType): @@ -3939,7 +3982,7 @@ class ParseExpression(ParserElement): ): self.exprs = other.exprs[:] + [self.exprs[1]] self._defaultName = None - self.mayReturnEmpty |= other.mayReturnEmpty + self._may_return_empty |= other.mayReturnEmpty self.mayIndexError |= other.mayIndexError other = self.exprs[-1] @@ -3951,7 +3994,7 @@ class ParseExpression(ParserElement): ): self.exprs = self.exprs[:-1] + other.exprs[:] self._defaultName = None - self.mayReturnEmpty |= other.mayReturnEmpty + self._may_return_empty |= other.mayReturnEmpty self.mayIndexError |= other.mayIndexError self.errmsg = f"Expected {self}" @@ -4028,7 +4071,7 @@ class And(ParseExpression): """ class _ErrorStop(Empty): - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.leave_whitespace() @@ -4036,28 +4079,34 @@ class And(ParseExpression): return "-" def __init__( - self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True - ): - exprs: list[ParserElement] = list(exprs_arg) - if exprs and Ellipsis in exprs: - tmp: list[ParserElement] = [] - for i, expr in enumerate(exprs): - if expr is not Ellipsis: - tmp.append(expr) - continue + self, + exprs_arg: typing.Iterable[Union[ParserElement, str]], + savelist: bool = True, + ) -> None: + # instantiate exprs as a list, converting strs to ParserElements + exprs: list[ParserElement] = [ + self._literalStringClass(e) if isinstance(e, str) else e for e in exprs_arg + ] - if i < len(exprs) - 1: - skipto_arg: ParserElement = typing.cast( - ParseExpression, (Empty() + exprs[i + 1]) - ).exprs[-1] - tmp.append(SkipTo(skipto_arg)("_skipped*")) - continue + # convert any Ellipsis elements to SkipTo + if Ellipsis in exprs: + # Ellipsis cannot be the last element + if exprs[-1] is Ellipsis: raise Exception("cannot construct And with sequence ending in ...") - exprs[:] = tmp + + tmp: list[ParserElement] = [] + for cur_expr, next_expr in zip(exprs, exprs[1:]): + if cur_expr is Ellipsis: + tmp.append(SkipTo(next_expr)("_skipped*")) + else: + tmp.append(cur_expr) + + exprs[:-1] = tmp + super().__init__(exprs, savelist) if self.exprs: - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) if not isinstance(self.exprs[0], White): self.set_whitespace_chars( self.exprs[0].whiteChars, @@ -4067,7 +4116,7 @@ class And(ParseExpression): else: self.skipWhitespace = False else: - self.mayReturnEmpty = True + self._may_return_empty = True self.callPreparse = True def streamline(self) -> ParserElement: @@ -4117,7 +4166,7 @@ class And(ParseExpression): break cur = typing.cast(ParserElement, next_first) - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) return self def parseImpl(self, instring, loc, do_actions=True): @@ -4189,18 +4238,20 @@ class Or(ParseExpression): [['123'], ['3.1416'], ['789']] """ - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): + def __init__( + self, exprs: typing.Iterable[ParserElement], savelist: bool = False + ) -> None: super().__init__(exprs, savelist) if self.exprs: - self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) else: - self.mayReturnEmpty = True + self._may_return_empty = True def streamline(self) -> ParserElement: super().streamline() if self.exprs: - self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) self.saveAsList = any(e.saveAsList for e in self.exprs) self.skipWhitespace = all( e.skipWhitespace and not isinstance(e, White) for e in self.exprs @@ -4286,7 +4337,8 @@ class Or(ParseExpression): if maxException is not None: # infer from this check that all alternatives failed at the current position # so emit this collective error message instead of any single error message - if maxExcLoc == loc: + parse_start_loc = self.preParse(instring, loc) + if maxExcLoc == parse_start_loc: maxException.msg = self.errmsg or "" raise maxException @@ -4344,13 +4396,15 @@ class MatchFirst(ParseExpression): print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] """ - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): + def __init__( + self, exprs: typing.Iterable[ParserElement], savelist: bool = False + ) -> None: super().__init__(exprs, savelist) if self.exprs: - self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) else: - self.mayReturnEmpty = True + self._may_return_empty = True def streamline(self) -> ParserElement: if self.streamlined: @@ -4359,13 +4413,13 @@ class MatchFirst(ParseExpression): super().streamline() if self.exprs: self.saveAsList = any(e.saveAsList for e in self.exprs) - self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + self._may_return_empty = any(e.mayReturnEmpty for e in self.exprs) self.skipWhitespace = all( e.skipWhitespace and not isinstance(e, White) for e in self.exprs ) else: self.saveAsList = False - self.mayReturnEmpty = True + self._may_return_empty = True return self def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: @@ -4393,7 +4447,8 @@ class MatchFirst(ParseExpression): if maxException is not None: # infer from this check that all alternatives failed at the current position # so emit this collective error message instead of any individual error message - if maxExcLoc == loc: + parse_start_loc = self.preParse(instring, loc) + if maxExcLoc == parse_start_loc: maxException.msg = self.errmsg or "" raise maxException @@ -4491,12 +4546,14 @@ class Each(ParseExpression): - size: 20 """ - def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = True): + def __init__( + self, exprs: typing.Iterable[ParserElement], savelist: bool = True + ) -> None: super().__init__(exprs, savelist) if self.exprs: - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) else: - self.mayReturnEmpty = True + self._may_return_empty = True self.skipWhitespace = True self.initExprGroups = True self.saveAsList = True @@ -4511,9 +4568,9 @@ class Each(ParseExpression): def streamline(self) -> ParserElement: super().streamline() if self.exprs: - self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + self._may_return_empty = all(e.mayReturnEmpty for e in self.exprs) else: - self.mayReturnEmpty = True + self._may_return_empty = True return self def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: @@ -4612,7 +4669,7 @@ class ParseElementEnhance(ParserElement): post-processing parsed tokens. """ - def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): + def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: super().__init__(savelist) if isinstance(expr, str_type): expr_str = typing.cast(str, expr) @@ -4626,7 +4683,7 @@ class ParseElementEnhance(ParserElement): self.expr = expr if expr is not None: self.mayIndexError = expr.mayIndexError - self.mayReturnEmpty = expr.mayReturnEmpty + self._may_return_empty = expr.mayReturnEmpty self.set_whitespace_chars( expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars ) @@ -4724,20 +4781,20 @@ class IndentedBlock(ParseElementEnhance): """ class _Indent(Empty): - def __init__(self, ref_col: int): + def __init__(self, ref_col: int) -> None: super().__init__() self.errmsg = f"expected indent at column {ref_col}" self.add_condition(lambda s, l, t: col(l, s) == ref_col) class _IndentGreater(Empty): - def __init__(self, ref_col: int): + def __init__(self, ref_col: int) -> None: super().__init__() self.errmsg = f"expected indent at column greater than {ref_col}" self.add_condition(lambda s, l, t: col(l, s) > ref_col) def __init__( self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True - ): + ) -> None: super().__init__(expr, savelist=True) # if recursive: # raise NotImplementedError("IndentedBlock with recursive is not implemented") @@ -4792,7 +4849,7 @@ class AtStringStart(ParseElementEnhance): # raises ParseException """ - def __init__(self, expr: Union[ParserElement, str]): + def __init__(self, expr: Union[ParserElement, str]) -> None: super().__init__(expr) self.callPreparse = False @@ -4825,7 +4882,7 @@ class AtLineStart(ParseElementEnhance): """ - def __init__(self, expr: Union[ParserElement, str]): + def __init__(self, expr: Union[ParserElement, str]) -> None: super().__init__(expr) self.callPreparse = False @@ -4858,9 +4915,9 @@ class FollowedBy(ParseElementEnhance): [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] """ - def __init__(self, expr: Union[ParserElement, str]): + def __init__(self, expr: Union[ParserElement, str]) -> None: super().__init__(expr) - self.mayReturnEmpty = True + self._may_return_empty = True def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: # by using self._expr.parse and deleting the contents of the returned ParseResults list @@ -4901,10 +4958,10 @@ class PrecededBy(ParseElementEnhance): """ - def __init__(self, expr: Union[ParserElement, str], retreat: int = 0): + def __init__(self, expr: Union[ParserElement, str], retreat: int = 0) -> None: super().__init__(expr) self.expr = self.expr().leave_whitespace() - self.mayReturnEmpty = True + self._may_return_empty = True self.mayIndexError = False self.exact = False if isinstance(expr, str_type): @@ -5019,13 +5076,13 @@ class NotAny(ParseElementEnhance): integer = Word(nums) + ~Char(".") """ - def __init__(self, expr: Union[ParserElement, str]): + def __init__(self, expr: Union[ParserElement, str]) -> None: super().__init__(expr) # do NOT use self.leave_whitespace(), don't want to propagate to exprs # self.leave_whitespace() self.skipWhitespace = False - self.mayReturnEmpty = True + self._may_return_empty = True self.errmsg = f"Found unwanted token, {self.expr}" def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: @@ -5044,7 +5101,7 @@ class _MultipleMatch(ParseElementEnhance): stop_on: typing.Optional[Union[ParserElement, str]] = None, *, stopOn: typing.Optional[Union[ParserElement, str]] = None, - ): + ) -> None: super().__init__(expr) stopOn = stopOn or stop_on self.saveAsList = True @@ -5062,9 +5119,10 @@ class _MultipleMatch(ParseElementEnhance): def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: self_expr_parse = self.expr._parse self_skip_ignorables = self._skipIgnorables - check_ender = self.not_ender is not None - if check_ender: + check_ender = False + if self.not_ender is not None: try_not_ender = self.not_ender.try_parse + check_ender = True # must be at least one (but first see if we are the stopOn sentinel; # if so, fail) @@ -5165,9 +5223,9 @@ class ZeroOrMore(_MultipleMatch): stop_on: typing.Optional[Union[ParserElement, str]] = None, *, stopOn: typing.Optional[Union[ParserElement, str]] = None, - ): + ) -> None: super().__init__(expr, stopOn=stopOn or stop_on) - self.mayReturnEmpty = True + self._may_return_empty = True def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: try: @@ -5189,7 +5247,7 @@ class DelimitedList(ParseElementEnhance): max: typing.Optional[int] = None, *, allow_trailing_delim: bool = False, - ): + ) -> None: """Helper to define a delimited list of expressions - the delimiter defaults to ','. By default, the list elements and delimiters can have intervening whitespace, and comments, but this can be @@ -5296,11 +5354,11 @@ class Opt(ParseElementEnhance): def __init__( self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched - ): + ) -> None: super().__init__(expr, savelist=False) self.saveAsList = self.expr.saveAsList self.defaultValue = default - self.mayReturnEmpty = True + self._may_return_empty = True def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: self_expr = self.expr @@ -5401,11 +5459,11 @@ class SkipTo(ParseElementEnhance): fail_on: typing.Optional[Union[ParserElement, str]] = None, *, failOn: typing.Optional[Union[ParserElement, str]] = None, - ): + ) -> None: super().__init__(other) failOn = failOn or fail_on self.ignoreExpr = ignore - self.mayReturnEmpty = True + self._may_return_empty = True self.mayIndexError = False self.includeMatch = include self.saveAsList = False @@ -5512,7 +5570,9 @@ class Forward(ParseElementEnhance): parser created using ``Forward``. """ - def __init__(self, other: typing.Optional[Union[ParserElement, str]] = None): + def __init__( + self, other: typing.Optional[Union[ParserElement, str]] = None + ) -> None: self.caller_frame = traceback.extract_stack(limit=2)[0] super().__init__(other, savelist=False) # type: ignore[arg-type] self.lshift_line = None @@ -5529,7 +5589,7 @@ class Forward(ParseElementEnhance): self.expr = other self.streamlined = other.streamlined self.mayIndexError = self.expr.mayIndexError - self.mayReturnEmpty = self.expr.mayReturnEmpty + self._may_return_empty = self.expr.mayReturnEmpty self.set_whitespace_chars( self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars ) @@ -5648,7 +5708,7 @@ class Forward(ParseElementEnhance): try: new_loc, new_peek = super().parseImpl(instring, loc, False) except ParseException: - # we failed before getting any match – do not hide the error + # we failed before getting any match - do not hide the error if isinstance(prev_peek, Exception): raise new_loc, new_peek = prev_loc, prev_peek @@ -5703,17 +5763,20 @@ class Forward(ParseElementEnhance): def _generateDefaultName(self) -> str: # Avoid infinite recursion by setting a temporary _defaultName + save_default_name = self._defaultName self._defaultName = ": ..." # Use the string representation of main expression. - retString = "..." try: if self.expr is not None: - retString = str(self.expr)[:1000] + ret_string = str(self.expr)[:1000] else: - retString = "None" - finally: - return f"{type(self).__name__}: {retString}" + ret_string = "None" + except Exception: + ret_string = "..." + + self._defaultName = save_default_name + return f"{type(self).__name__}: {ret_string}" def copy(self) -> ParserElement: if self.expr is not None: @@ -5752,7 +5815,7 @@ class TokenConverter(ParseElementEnhance): Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results. """ - def __init__(self, expr: Union[ParserElement, str], savelist=False): + def __init__(self, expr: Union[ParserElement, str], savelist=False) -> None: super().__init__(expr) # , savelist) self.saveAsList = False @@ -5783,7 +5846,7 @@ class Combine(TokenConverter): adjacent: bool = True, *, joinString: typing.Optional[str] = None, - ): + ) -> None: super().__init__(expr) joinString = joinString if joinString is not None else join_string # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself @@ -5835,7 +5898,7 @@ class Group(TokenConverter): # -> ['fn', ['a', 'b', '100']] """ - def __init__(self, expr: ParserElement, aslist: bool = False): + def __init__(self, expr: ParserElement, aslist: bool = False) -> None: super().__init__(expr) self.saveAsList = True self._asPythonList = aslist @@ -5893,7 +5956,7 @@ class Dict(TokenConverter): See more examples at :class:`ParseResults` of accessing fields by results name. """ - def __init__(self, expr: ParserElement, asdict: bool = False): + def __init__(self, expr: ParserElement, asdict: bool = False) -> None: super().__init__(expr) self.saveAsList = True self._asPythonDict = asdict @@ -5969,7 +6032,7 @@ class Suppress(TokenConverter): (See also :class:`DelimitedList`.) """ - def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): + def __init__(self, expr: Union[ParserElement, str], savelist: bool = False) -> None: if expr is ...: expr = _PendingSkip(NoMatch()) super().__init__(expr) @@ -6094,13 +6157,17 @@ def srange(s: str) -> str: - any combination of the above (``'aeiouy'``, ``'a-zA-Z0-9_$'``, etc.) """ - _expanded = lambda p: ( - p - if not isinstance(p, ParseResults) - else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) - ) + + def _expanded(p): + if isinstance(p, ParseResults): + yield from (chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) + else: + yield p + try: - return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body) + return "".join( + [c for part in _reBracketExpr.parse_string(s).body for c in _expanded(part)] + ) except Exception as e: return "" @@ -6156,11 +6223,17 @@ def autoname_elements() -> None: Utility to simplify mass-naming of parser elements, for generating railroad diagram with named subdiagrams. """ - calling_frame = sys._getframe(1) + + # guard against _getframe not being implemented in the current Python + getframe_fn = getattr(sys, "_getframe", lambda _: None) + calling_frame = getframe_fn(1) if calling_frame is None: return + + # find all locals in the calling frame that are ParserElements calling_frame = typing.cast(types.FrameType, calling_frame) for name, var in calling_frame.f_locals.items(): + # if no custom name defined, set the name to the var name if isinstance(var, ParserElement) and not var.customName: var.set_name(name) diff --git a/contrib/python/pyparsing/py3/pyparsing/diagram/__init__.py b/contrib/python/pyparsing/py3/pyparsing/diagram/__init__.py index 56526b741b8..526cf3862a4 100644 --- a/contrib/python/pyparsing/py3/pyparsing/diagram/__init__.py +++ b/contrib/python/pyparsing/py3/pyparsing/diagram/__init__.py @@ -120,7 +120,7 @@ class EachItem(railroad.Group): all_label = "[ALL]" - def __init__(self, *items): + def __init__(self, *items) -> None: choice_item = railroad.Choice(len(items) - 1, *items) one_or_more_item = railroad.OneOrMore(item=choice_item) super().__init__(one_or_more_item, label=self.all_label) @@ -131,7 +131,7 @@ class AnnotatedItem(railroad.Group): Simple subclass of Group that creates an annotation label """ - def __init__(self, label: str, item): + def __init__(self, label: str, item) -> None: super().__init__(item=item, label=f"[{label}]" if label else "") @@ -144,7 +144,7 @@ class EditablePartial(Generic[T]): # We need this here because the railroad constructors actually transform the data, so can't be called until the # entire tree is assembled - def __init__(self, func: Callable[..., T], args: list, kwargs: dict): + def __init__(self, func: Callable[..., T], args: list, kwargs: dict) -> None: self.func = func self.args = args self.kwargs = kwargs @@ -226,6 +226,7 @@ def to_railroad( vertical: int = 3, show_results_names: bool = False, show_groups: bool = False, + show_hidden: bool = False, ) -> list[NamedDiagram]: """ Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram @@ -238,6 +239,8 @@ def to_railroad( included in the diagram :param show_groups - bool to indicate whether groups should be highlighted with an unlabeled surrounding box + :param show_hidden - bool to indicate whether internal elements that are typically hidden + should be shown """ # Convert the whole tree underneath the root lookup = ConverterState(diagram_kwargs=diagram_kwargs or {}) @@ -248,6 +251,7 @@ def to_railroad( vertical=vertical, show_results_names=show_results_names, show_groups=show_groups, + show_hidden=show_hidden, ) root_id = id(element) @@ -348,7 +352,7 @@ class ConverterState: Stores some state that persists between recursions into the element tree """ - def __init__(self, diagram_kwargs: typing.Optional[dict] = None): + def __init__(self, diagram_kwargs: typing.Optional[dict] = None) -> None: #: A dictionary mapping ParserElements to state relating to them self._element_diagram_states: dict[int, ElementState] = {} #: A dictionary mapping ParserElement IDs to subdiagrams generated from them @@ -453,6 +457,7 @@ def _apply_diagram_item_enhancements(fn): name_hint: str = None, show_results_names: bool = False, show_groups: bool = False, + show_hidden: bool = False, ) -> typing.Optional[EditablePartial]: ret = fn( element, @@ -463,6 +468,7 @@ def _apply_diagram_item_enhancements(fn): name_hint, show_results_names, show_groups, + show_hidden, ) # apply annotation for results name, if present @@ -555,6 +561,7 @@ def _to_diagram_element( name_hint=propagated_name, show_results_names=show_results_names, show_groups=show_groups, + show_hidden=show_hidden, ) # If the element isn't worth extracting, we always treat it as the first time we say it @@ -641,6 +648,7 @@ def _to_diagram_element( name_hint, show_results_names, show_groups, + show_hidden, ] return _to_diagram_element( (~element.not_ender.expr + element.expr)[1, ...].set_name(element.name), @@ -657,6 +665,7 @@ def _to_diagram_element( name_hint, show_results_names, show_groups, + show_hidden, ] return _to_diagram_element( (~element.not_ender.expr + element.expr)[...].set_name(element.name), @@ -707,6 +716,7 @@ def _to_diagram_element( index=i, show_results_names=show_results_names, show_groups=show_groups, + show_hidden=show_hidden, ) # Some elements don't need to be shown in the diagram diff --git a/contrib/python/pyparsing/py3/pyparsing/exceptions.py b/contrib/python/pyparsing/py3/pyparsing/exceptions.py index 57a1579d121..fe07a855856 100644 --- a/contrib/python/pyparsing/py3/pyparsing/exceptions.py +++ b/contrib/python/pyparsing/py3/pyparsing/exceptions.py @@ -52,7 +52,7 @@ class ParseBaseException(Exception): loc: int = 0, msg: typing.Optional[str] = None, elem=None, - ): + ) -> None: if msg is None: msg, pstr = pstr, "" @@ -87,7 +87,7 @@ class ParseBaseException(Exception): ret: list[str] = [] if isinstance(exc, ParseBaseException): ret.append(exc.line) - ret.append(f"{' ' * (exc.column - 1)}^") + ret.append(f"{'^':>{exc.column}}") ret.append(f"{type(exc).__name__}: {exc}") if depth <= 0 or exc.__traceback__ is None: @@ -272,12 +272,11 @@ class ParseException(ParseBaseException): try: integer.parse_string("ABC") except ParseException as pe: - print(pe) - print(f"column: {pe.column}") + print(pe, f"column: {pe.column}") prints:: - Expected integer (at char 0), (line:1, col:1) column: 1 + Expected integer, found 'ABC' (at char 0), (line:1, col:1) column: 1 """ @@ -307,7 +306,7 @@ class RecursiveGrammarException(Exception): Deprecated: only used by deprecated method ParserElement.validate. """ - def __init__(self, parseElementList): + def __init__(self, parseElementList) -> None: self.parseElementTrace = parseElementList def __str__(self) -> str: diff --git a/contrib/python/pyparsing/py3/pyparsing/helpers.py b/contrib/python/pyparsing/py3/pyparsing/helpers.py index f781e871327..7f62df86374 100644 --- a/contrib/python/pyparsing/py3/pyparsing/helpers.py +++ b/contrib/python/pyparsing/py3/pyparsing/helpers.py @@ -208,11 +208,9 @@ def one_of( if caseless: is_equal = lambda a, b: a.upper() == b.upper() masks = lambda a, b: b.upper().startswith(a.upper()) - parse_element_class = CaselessKeyword if asKeyword else CaselessLiteral else: is_equal = operator.eq masks = lambda a, b: b.startswith(a) - parse_element_class = Keyword if asKeyword else Literal symbols: list[str] if isinstance(strs, str_type): @@ -255,7 +253,8 @@ def one_of( if asKeyword: patt = rf"\b(?:{patt})\b" - ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols)) + ret = Regex(patt, flags=re_flags) + ret.set_name(" | ".join(re.escape(s) for s in symbols)) if caseless: # add parse action to return symbols as specified, not in random @@ -270,13 +269,21 @@ def one_of( "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 ) - # last resort, just use MatchFirst + # last resort, just use MatchFirst of Token class corresponding to caseless + # and asKeyword settings + CASELESS = KEYWORD = True + parse_element_class = { + (CASELESS, KEYWORD): CaselessKeyword, + (CASELESS, not KEYWORD): CaselessLiteral, + (not CASELESS, KEYWORD): Keyword, + (not CASELESS, not KEYWORD): Literal, + }[(caseless, asKeyword)] return MatchFirst(parse_element_class(sym) for sym in symbols).set_name( " | ".join(symbols) ) -def dict_of(key: ParserElement, value: ParserElement) -> ParserElement: +def dict_of(key: ParserElement, value: ParserElement) -> Dict: """Helper to easily and clearly define a dictionary by specifying the respective patterns for the key and value. Takes care of defining the :class:`Dict`, :class:`ZeroOrMore`, and @@ -411,13 +418,16 @@ def locatedExpr(expr: ParserElement) -> ParserElement: ) +_NO_IGNORE_EXPR_GIVEN = NoMatch() + + def nested_expr( opener: Union[str, ParserElement] = "(", closer: Union[str, ParserElement] = ")", content: typing.Optional[ParserElement] = None, - ignore_expr: ParserElement = quoted_string(), + ignore_expr: ParserElement = _NO_IGNORE_EXPR_GIVEN, *, - ignoreExpr: ParserElement = quoted_string(), + ignoreExpr: ParserElement = _NO_IGNORE_EXPR_GIVEN, ) -> ParserElement: """Helper method for defining nested lists enclosed in opening and closing delimiters (``"("`` and ``")"`` are the default). @@ -487,7 +497,10 @@ def nested_expr( dec_to_hex (int) args: [['char', 'hchar']] """ if ignoreExpr != ignore_expr: - ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr + ignoreExpr = ignore_expr if ignoreExpr is _NO_IGNORE_EXPR_GIVEN else ignoreExpr + if ignoreExpr is _NO_IGNORE_EXPR_GIVEN: + ignoreExpr = quoted_string() + if opener == closer: raise ValueError("opening and closing strings cannot be the same") if content is None: @@ -504,11 +517,11 @@ def nested_expr( exact=1, ) ) - ).set_parse_action(lambda t: t[0].strip()) + ) else: content = empty.copy() + CharsNotIn( opener + closer + ParserElement.DEFAULT_WHITE_CHARS - ).set_parse_action(lambda t: t[0].strip()) + ) else: if ignoreExpr is not None: content = Combine( @@ -518,7 +531,7 @@ def nested_expr( + ~Literal(closer) + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) ) - ).set_parse_action(lambda t: t[0].strip()) + ) else: content = Combine( OneOrMore( @@ -526,11 +539,16 @@ def nested_expr( + ~Literal(closer) + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1) ) - ).set_parse_action(lambda t: t[0].strip()) + ) else: raise ValueError( "opening and closing arguments must be strings if no content expression is given" ) + if ParserElement.DEFAULT_WHITE_CHARS: + content.set_parse_action( + lambda t: t[0].strip(ParserElement.DEFAULT_WHITE_CHARS) + ) + ret = Forward() if ignoreExpr is not None: ret <<= Group( @@ -691,7 +709,7 @@ def infix_notation( op_list: list[InfixNotationOperatorSpec], lpar: Union[str, ParserElement] = Suppress("("), rpar: Union[str, ParserElement] = Suppress(")"), -) -> ParserElement: +) -> Forward: """Helper method for constructing grammars of expressions made up of operators working in a precedence hierarchy. Operators may be unary or binary, left- or right-associative. Parse actions can also be diff --git a/contrib/python/pyparsing/py3/pyparsing/results.py b/contrib/python/pyparsing/py3/pyparsing/results.py index be834b7e607..956230352c8 100644 --- a/contrib/python/pyparsing/py3/pyparsing/results.py +++ b/contrib/python/pyparsing/py3/pyparsing/results.py @@ -23,7 +23,7 @@ class _ParseResultsWithOffset: tup: tuple[ParseResults, int] __slots__ = ["tup"] - def __init__(self, p1: ParseResults, p2: int): + def __init__(self, p1: ParseResults, p2: int) -> None: self.tup: tuple[ParseResults, int] = (p1, p2) def __getitem__(self, i): diff --git a/contrib/python/pyparsing/py3/pyparsing/tools/__init__.py b/contrib/python/pyparsing/py3/pyparsing/tools/__init__.py new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/contrib/python/pyparsing/py3/pyparsing/tools/__init__.py diff --git a/contrib/python/pyparsing/py3/pyparsing/tools/cvt_pyparsing_pep8_names.py b/contrib/python/pyparsing/py3/pyparsing/tools/cvt_pyparsing_pep8_names.py new file mode 100644 index 00000000000..f4a8bd9f516 --- /dev/null +++ b/contrib/python/pyparsing/py3/pyparsing/tools/cvt_pyparsing_pep8_names.py @@ -0,0 +1,116 @@ +from functools import lru_cache +import pyparsing as pp + + +@lru_cache(maxsize=None) +def camel_to_snake(s: str) -> str: + """ + Convert CamelCase to snake_case. + """ + return "".join("_" + c.lower() if c.isupper() else c for c in s).lstrip("_") + + +pre_pep8_method_names = """ +addCondition addParseAction anyCloseTag anyOpenTag asDict asList cStyleComment canParseNext conditionAsParseAction +convertToDate convertToDatetime convertToFloat convertToInteger countedArray cppStyleComment dblQuotedString +dblSlashComment defaultName dictOf disableMemoization downcaseTokens enableLeftRecursion enablePackrat getName +htmlComment ignoreWhitespace indentedBlock infixNotation inlineLiteralsUsing javaStyleComment leaveWhitespace +lineEnd lineStart locatedExpr matchOnlyAtCol matchPreviousExpr matchPreviousLiteral nestedExpr nullDebugAction oneOf +originalTextFor parseFile parseString parseWithTabs pythonStyleComment quotedString removeQuotes replaceWith +resetCache restOfLine runTests scanString searchString setBreak setDebug setDebugActions setDefaultWhitespaceChars +setFailAction setName setParseAction setResultsName setWhitespaceChars sglQuotedString stringEnd stringStart tokenMap +traceParseAction transformString tryParse unicodeString upcaseTokens withAttribute withClass +""".split() + +special_changes = { + "opAssoc": "OpAssoc", + "delimitedList": "DelimitedList", + "delimited_list": "DelimitedList", + "replaceHTMLEntity": "replace_html_entity", + "makeHTMLTags": "make_html_tags", + "makeXMLTags": "make_xml_tags", + "commonHTMLEntity": "common_html_entity", + "stripHTMLTags": "strip_html_tags", +} + +pre_pep8_arg_names = """parseAll maxMatches listAllMatches callDuringTry includeSeparators fullDump printResults +failureTests postParse matchString identChars maxMismatches initChars bodyChars asKeyword excludeChars asGroupList +asMatch quoteChar escChar escQuote unquoteResults endQuoteChar convertWhitespaceEscapes notChars wordChars stopOn +failOn joinString markerString intExpr useRegex asString ignoreExpr""".split() + +pre_pep8_method_name = pp.one_of(pre_pep8_method_names, as_keyword=True) +pre_pep8_method_name.set_parse_action(lambda t: camel_to_snake(t[0])) +special_pre_pep8_name = pp.one_of(special_changes, as_keyword=True) +special_pre_pep8_name.set_parse_action(lambda t: special_changes[t[0]]) +# only replace arg names if part of an arg list +pre_pep8_arg_name = pp.Regex( + rf"{pp.util.make_compressed_re(pre_pep8_arg_names)}\s*=" +) +pre_pep8_arg_name.set_parse_action(lambda t: camel_to_snake(t[0])) + +pep8_converter = pre_pep8_method_name | special_pre_pep8_name | pre_pep8_arg_name + +if __name__ == "__main__": + import argparse + from pathlib import Path + import sys + + argparser = argparse.ArgumentParser( + description = ( + "Utility to convert Python pyparsing scripts using legacy" + " camelCase names to use PEP8 snake_case names." + "\nBy default, this script will only show whether this script would make any changes." + ) + ) + argparser.add_argument("--verbose", "-v", action="store_true", help="Show unified diff for each source file") + argparser.add_argument("-vv", action="store_true", dest="verbose2", help="Show unified diff for each source file, plus names of scanned files with no changes") + argparser.add_argument("--update", "-u", action="store_true", help="Update source files in-place") + argparser.add_argument("--encoding", type=str, default="utf-8", help="Encoding of source files (default: utf-8)") + argparser.add_argument("--exit-zero-even-if-changed", "-exit0", action="store_true", help="Exit with status code 0 even if changes were made") + argparser.add_argument("source_filename", nargs="+", help="Source filenames or filename patterns of Python files to be converted") + args = argparser.parse_args() + + + def show_diffs(original, modified): + import difflib + + diff = difflib.unified_diff( + original.splitlines(), modified.splitlines(), lineterm="" + ) + sys.stdout.writelines(f"{diff_line}\n" for diff_line in diff) + + exit_status = 0 + + for filename_pattern in args.source_filename: + + for filename in Path().glob(filename_pattern): + if not Path(filename).is_file(): + continue + + try: + original_contents = Path(filename).read_text(encoding=args.encoding) + modified_contents = pep8_converter.transform_string( + original_contents + ) + + if modified_contents != original_contents: + if args.update: + Path(filename).write_text(modified_contents, encoding=args.encoding) + print(f"Converted {filename}") + else: + print(f"Found required changes in {filename}") + + if args.verbose: + show_diffs(original_contents, modified_contents) + print() + + exit_status = 1 + + else: + if args.verbose2: + print(f"No required changes in {filename}") + + except Exception as e: + print(f"Failed to convert {filename}: {type(e).__name__}: {e}") + + sys.exit(exit_status if not args.exit_zero_even_if_changed else 0) diff --git a/contrib/python/pyparsing/py3/pyparsing/util.py b/contrib/python/pyparsing/py3/pyparsing/util.py index 03a60d4fddc..1cb16e2e620 100644 --- a/contrib/python/pyparsing/py3/pyparsing/util.py +++ b/contrib/python/pyparsing/py3/pyparsing/util.py @@ -1,5 +1,6 @@ # util.py import contextlib +import re from functools import lru_cache, wraps import inspect import itertools @@ -193,7 +194,7 @@ class _GroupConsecutive: (3, iter(['p', 'q', 'r', 's'])) """ - def __init__(self): + def __init__(self) -> None: self.prev = 0 self.counter = itertools.count() self.value = -1 @@ -303,7 +304,11 @@ def _flatten(ll: Iterable) -> list: def make_compressed_re( - word_list: Iterable[str], max_level: int = 2, _level: int = 1 + word_list: Iterable[str], + max_level: int = 2, + *, + non_capturing_groups: bool = True, + _level: int = 1, ) -> str: """ Create a regular expression string from a list of words, collapsing by common @@ -320,15 +325,38 @@ def make_compressed_re( else: yield namelist[0][0], [namelist[0][1:]] + if _level == 1: + if not word_list: + raise ValueError("no words given to make_compressed_re()") + + if "" in word_list: + raise ValueError("word list cannot contain empty string") + else: + # internal recursive call, just return empty string if no words + if not word_list: + return "" + + # dedupe the word list + word_list = list({}.fromkeys(word_list)) + if max_level == 0: - return "|".join(sorted(word_list, key=len, reverse=True)) + if any(len(wd) > 1 for wd in word_list): + return "|".join( + sorted([re.escape(wd) for wd in word_list], key=len, reverse=True) + ) + else: + return f"[{''.join(_escape_regex_range_chars(wd) for wd in word_list)}]" ret = [] sep = "" + ncgroup = "?:" if non_capturing_groups else "" + for initial, suffixes in get_suffixes_from_common_prefixes(sorted(word_list)): ret.append(sep) sep = "|" + initial = re.escape(initial) + trailing = "" if "" in suffixes: trailing = "?" @@ -336,21 +364,33 @@ def make_compressed_re( if len(suffixes) > 1: if all(len(s) == 1 for s in suffixes): - ret.append(f"{initial}[{''.join(suffixes)}]{trailing}") + ret.append( + f"{initial}[{''.join(_escape_regex_range_chars(s) for s in suffixes)}]{trailing}" + ) else: if _level < max_level: suffix_re = make_compressed_re( - sorted(suffixes), max_level, _level + 1 + sorted(suffixes), + max_level, + non_capturing_groups=non_capturing_groups, + _level=_level + 1, ) - ret.append(f"{initial}({suffix_re}){trailing}") + ret.append(f"{initial}({ncgroup}{suffix_re}){trailing}") else: - suffixes.sort(key=len, reverse=True) - ret.append(f"{initial}({'|'.join(suffixes)}){trailing}") + if all(len(s) == 1 for s in suffixes): + ret.append( + f"{initial}[{''.join(_escape_regex_range_chars(s) for s in suffixes)}]{trailing}" + ) + else: + suffixes.sort(key=len, reverse=True) + ret.append( + f"{initial}({ncgroup}{'|'.join(re.escape(s) for s in suffixes)}){trailing}" + ) else: if suffixes: - suffix = suffixes[0] + suffix = re.escape(suffixes[0]) if len(suffix) > 1 and trailing: - ret.append(f"{initial}({suffix}){trailing}") + ret.append(f"{initial}({ncgroup}{suffix}){trailing}") else: ret.append(f"{initial}{suffix}{trailing}") else: diff --git a/contrib/python/pyparsing/py3/ya.make b/contrib/python/pyparsing/py3/ya.make index e229986ca67..a53ebf37ecf 100644 --- a/contrib/python/pyparsing/py3/ya.make +++ b/contrib/python/pyparsing/py3/ya.make @@ -4,7 +4,7 @@ PY3_LIBRARY() PROVIDES(pyparsing) -VERSION(3.2.1) +VERSION(3.2.2) LICENSE(MIT) @@ -25,6 +25,8 @@ PY_SRCS( pyparsing/helpers.py pyparsing/results.py pyparsing/testing.py + pyparsing/tools/__init__.py + pyparsing/tools/cvt_pyparsing_pep8_names.py pyparsing/unicode.py pyparsing/util.py ) |