diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-10-29 09:54:58 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-10-29 10:05:47 +0300 |
commit | 951b23b132d746894dce79549b664284d84ca912 (patch) | |
tree | 49a51797f20100c557d3d80f8dbf1ad7e7a8c00a | |
parent | c138ba0fd95b01235efb454180c5edb1bbe11834 (diff) | |
download | ydb-951b23b132d746894dce79549b664284d84ca912.tar.gz |
Intermediate changes
commit_hash:5e0e414190745fa0359941d411052207c42514ab
24 files changed, 853 insertions, 537 deletions
diff --git a/contrib/python/anyio/.dist-info/METADATA b/contrib/python/anyio/.dist-info/METADATA index e28bbd52d0..10d7aafc77 100644 --- a/contrib/python/anyio/.dist-info/METADATA +++ b/contrib/python/anyio/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: anyio -Version: 4.6.2 +Version: 4.6.2.post1 Summary: High level compatibility layer for multiple asynchronous event loop implementations Author-email: Alex Grönholm <alex.gronholm@nextday.fi> License: MIT @@ -15,13 +15,12 @@ Classifier: Framework :: AnyIO Classifier: Typing :: Typed Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.13 -Requires-Python: >=3.8 +Requires-Python: >=3.9 Description-Content-Type: text/x-rst License-File: LICENSE Requires-Dist: idna >=2.8 diff --git a/contrib/python/anyio/anyio/_backends/_asyncio.py b/contrib/python/anyio/anyio/_backends/_asyncio.py index fa5349a8c2..0a69e7ac61 100644 --- a/contrib/python/anyio/anyio/_backends/_asyncio.py +++ b/contrib/python/anyio/anyio/_backends/_asyncio.py @@ -20,9 +20,18 @@ from asyncio import ( ) from asyncio.base_events import _run_until_complete_cb # type: ignore[attr-defined] from collections import OrderedDict, deque -from collections.abc import AsyncIterator, Iterable +from collections.abc import ( + AsyncGenerator, + AsyncIterator, + Awaitable, + Callable, + Collection, + Coroutine, + Iterable, + Sequence, +) from concurrent.futures import Future -from contextlib import suppress +from contextlib import AbstractContextManager, suppress from contextvars import Context, copy_context from dataclasses import dataclass from functools import partial, wraps @@ -42,15 +51,7 @@ from types import TracebackType from typing import ( IO, Any, - AsyncGenerator, - Awaitable, - Callable, - Collection, - ContextManager, - Coroutine, Optional, - Sequence, - Tuple, TypeVar, cast, ) @@ -358,6 +359,14 @@ def _task_started(task: asyncio.Task) -> bool: # +def is_anyio_cancellation(exc: CancelledError) -> bool: + return ( + bool(exc.args) + and isinstance(exc.args[0], str) + and exc.args[0].startswith("Cancelled by cancel scope ") + ) + + class CancelScope(BaseCancelScope): def __new__( cls, *, deadline: float = math.inf, shield: bool = False @@ -416,6 +425,8 @@ class CancelScope(BaseCancelScope): exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> bool | None: + del exc_tb + if not self._active: raise RuntimeError("This cancel scope is not active") if current_task() is not self._host_task: @@ -432,47 +443,93 @@ class CancelScope(BaseCancelScope): "current cancel scope" ) - self._active = False - if self._timeout_handle: - self._timeout_handle.cancel() - self._timeout_handle = None - - self._tasks.remove(self._host_task) - if self._parent_scope is not None: - self._parent_scope._child_scopes.remove(self) - self._parent_scope._tasks.add(self._host_task) + try: + self._active = False + if self._timeout_handle: + self._timeout_handle.cancel() + self._timeout_handle = None - host_task_state.cancel_scope = self._parent_scope + self._tasks.remove(self._host_task) + if self._parent_scope is not None: + self._parent_scope._child_scopes.remove(self) + self._parent_scope._tasks.add(self._host_task) - # Restart the cancellation effort in the closest directly cancelled parent - # scope if this one was shielded - self._restart_cancellation_in_parent() + host_task_state.cancel_scope = self._parent_scope - if self._cancel_called and exc_val is not None: - for exc in iterate_exceptions(exc_val): - if isinstance(exc, CancelledError): - self._cancelled_caught = self._uncancel(exc) - if self._cancelled_caught: + # Undo all cancellations done by this scope + if self._cancelling is not None: + while self._cancel_calls: + self._cancel_calls -= 1 + if self._host_task.uncancel() <= self._cancelling: break - return self._cancelled_caught + # We only swallow the exception iff it was an AnyIO CancelledError, either + # directly as exc_val or inside an exception group and there are no cancelled + # parent cancel scopes visible to us here + not_swallowed_exceptions = 0 + swallow_exception = False + if exc_val is not None: + for exc in iterate_exceptions(exc_val): + if self._cancel_called and isinstance(exc, CancelledError): + if not (swallow_exception := self._uncancel(exc)): + not_swallowed_exceptions += 1 + else: + not_swallowed_exceptions += 1 - return None + # Restart the cancellation effort in the closest visible, cancelled parent + # scope if necessary + self._restart_cancellation_in_parent() + return swallow_exception and not not_swallowed_exceptions + finally: + self._host_task = None + del exc_val + + @property + def _effectively_cancelled(self) -> bool: + cancel_scope: CancelScope | None = self + while cancel_scope is not None: + if cancel_scope._cancel_called: + return True + + if cancel_scope.shield: + return False + + cancel_scope = cancel_scope._parent_scope + + return False + + @property + def _parent_cancellation_is_visible_to_us(self) -> bool: + return ( + self._parent_scope is not None + and not self.shield + and self._parent_scope._effectively_cancelled + ) def _uncancel(self, cancelled_exc: CancelledError) -> bool: - if sys.version_info < (3, 9) or self._host_task is None: + if self._host_task is None: self._cancel_calls = 0 return True - # Undo all cancellations done by this scope - if self._cancelling is not None: - while self._cancel_calls: - self._cancel_calls -= 1 - if self._host_task.uncancel() <= self._cancelling: - return True + while True: + if is_anyio_cancellation(cancelled_exc): + # Only swallow the cancellation exception if it's an AnyIO cancel + # exception and there are no other cancel scopes down the line pending + # cancellation + self._cancelled_caught = ( + self._effectively_cancelled + and not self._parent_cancellation_is_visible_to_us + ) + return self._cancelled_caught - self._cancel_calls = 0 - return f"Cancelled by cancel scope {id(self):x}" in cancelled_exc.args + # Sometimes third party frameworks catch a CancelledError and raise a new + # one, so as a workaround we have to look at the previous ones in + # __context__ too for a matching cancel message + if isinstance(cancelled_exc.__context__, CancelledError): + cancelled_exc = cancelled_exc.__context__ + continue + + return False def _timeout(self) -> None: if self._deadline != math.inf: @@ -496,19 +553,17 @@ class CancelScope(BaseCancelScope): should_retry = False current = current_task() for task in self._tasks: + should_retry = True if task._must_cancel: # type: ignore[attr-defined] continue # The task is eligible for cancellation if it has started - should_retry = True if task is not current and (task is self._host_task or _task_started(task)): waiter = task._fut_waiter # type: ignore[attr-defined] if not isinstance(waiter, asyncio.Future) or not waiter.done(): - origin._cancel_calls += 1 - if sys.version_info >= (3, 9): - task.cancel(f"Cancelled by cancel scope {id(origin):x}") - else: - task.cancel() + task.cancel(f"Cancelled by cancel scope {id(origin):x}") + if task is origin._host_task: + origin._cancel_calls += 1 # Deliver cancellation to child scopes that aren't shielded or running their own # cancellation callbacks @@ -546,17 +601,6 @@ class CancelScope(BaseCancelScope): scope = scope._parent_scope - def _parent_cancelled(self) -> bool: - # Check whether any parent has been cancelled - cancel_scope = self._parent_scope - while cancel_scope is not None and not cancel_scope._shield: - if cancel_scope._cancel_called: - return True - else: - cancel_scope = cancel_scope._parent_scope - - return False - def cancel(self) -> None: if not self._cancel_called: if self._timeout_handle: @@ -645,6 +689,26 @@ class _AsyncioTaskStatus(abc.TaskStatus): _task_states[task].parent_id = self._parent_id +async def _wait(tasks: Iterable[asyncio.Task[object]]) -> None: + tasks = set(tasks) + waiter = get_running_loop().create_future() + + def on_completion(task: asyncio.Task[object]) -> None: + tasks.discard(task) + if not tasks and not waiter.done(): + waiter.set_result(None) + + for task in tasks: + task.add_done_callback(on_completion) + del task + + try: + await waiter + finally: + while tasks: + tasks.pop().remove_done_callback(on_completion) + + class TaskGroup(abc.TaskGroup): def __init__(self) -> None: self.cancel_scope: CancelScope = CancelScope() @@ -663,38 +727,53 @@ class TaskGroup(abc.TaskGroup): exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> bool | None: - ignore_exception = self.cancel_scope.__exit__(exc_type, exc_val, exc_tb) - if exc_val is not None: - self.cancel_scope.cancel() - if not isinstance(exc_val, CancelledError): - self._exceptions.append(exc_val) - - cancelled_exc_while_waiting_tasks: CancelledError | None = None - while self._tasks: - try: - await asyncio.wait(self._tasks) - except CancelledError as exc: - # This task was cancelled natively; reraise the CancelledError later - # unless this task was already interrupted by another exception + try: + if exc_val is not None: self.cancel_scope.cancel() - if cancelled_exc_while_waiting_tasks is None: - cancelled_exc_while_waiting_tasks = exc + if not isinstance(exc_val, CancelledError): + self._exceptions.append(exc_val) - self._active = False - if self._exceptions: - raise BaseExceptionGroup( - "unhandled errors in a TaskGroup", self._exceptions - ) + try: + if self._tasks: + with CancelScope() as wait_scope: + while self._tasks: + try: + await _wait(self._tasks) + except CancelledError as exc: + # Shield the scope against further cancellation attempts, + # as they're not productive (#695) + wait_scope.shield = True + self.cancel_scope.cancel() + + # Set exc_val from the cancellation exception if it was + # previously unset. However, we should not replace a native + # cancellation exception with one raise by a cancel scope. + if exc_val is None or ( + isinstance(exc_val, CancelledError) + and not is_anyio_cancellation(exc) + ): + exc_val = exc + else: + # If there are no child tasks to wait on, run at least one checkpoint + # anyway + await AsyncIOBackend.cancel_shielded_checkpoint() - # Raise the CancelledError received while waiting for child tasks to exit, - # unless the context manager itself was previously exited with another - # exception, or if any of the child tasks raised an exception other than - # CancelledError - if cancelled_exc_while_waiting_tasks: - if exc_val is None or ignore_exception: - raise cancelled_exc_while_waiting_tasks + self._active = False + if self._exceptions: + raise BaseExceptionGroup( + "unhandled errors in a TaskGroup", self._exceptions + ) + elif exc_val: + raise exc_val + except BaseException as exc: + if self.cancel_scope.__exit__(type(exc), exc, exc.__traceback__): + return True - return ignore_exception + raise + + return self.cancel_scope.__exit__(exc_type, exc_val, exc_tb) + finally: + del exc_val, exc_tb, self._exceptions def _spawn( self, @@ -730,7 +809,7 @@ class TaskGroup(abc.TaskGroup): if not isinstance(exc, CancelledError): self._exceptions.append(exc) - if not self.cancel_scope._parent_cancelled(): + if not self.cancel_scope._effectively_cancelled: self.cancel_scope.cancel() else: task_status_future.set_exception(exc) @@ -806,7 +885,7 @@ class TaskGroup(abc.TaskGroup): # Threads # -_Retval_Queue_Type = Tuple[Optional[T_Retval], Optional[BaseException]] +_Retval_Queue_Type = tuple[Optional[T_Retval], Optional[BaseException]] class WorkerThread(Thread): @@ -955,7 +1034,7 @@ class Process(abc.Process): _stderr: StreamReaderWrapper | None async def aclose(self) -> None: - with CancelScope(shield=True): + with CancelScope(shield=True) as scope: if self._stdin: await self._stdin.aclose() if self._stdout: @@ -963,14 +1042,14 @@ class Process(abc.Process): if self._stderr: await self._stderr.aclose() - try: - await self.wait() - except BaseException: - self.kill() - with CancelScope(shield=True): + scope.shield = False + try: await self.wait() - - raise + except BaseException: + scope.shield = True + self.kill() + await self.wait() + raise async def wait(self) -> int: return await self._process.wait() @@ -2022,9 +2101,7 @@ class AsyncIOTaskInfo(TaskInfo): if task_state := _task_states.get(task): if cancel_scope := task_state.cancel_scope: - return cancel_scope.cancel_called or ( - not cancel_scope.shield and cancel_scope._parent_cancelled() - ) + return cancel_scope._effectively_cancelled return False @@ -2118,7 +2195,7 @@ class TestRunner(abc.TestRunner): ) -> T_Retval: if not self._runner_task: self._send_stream, receive_stream = create_memory_object_stream[ - Tuple[Awaitable[Any], asyncio.Future] + tuple[Awaitable[Any], asyncio.Future] ](1) self._runner_task = self.get_loop().create_task( self._run_tests_and_fixtures(receive_stream) @@ -2480,7 +2557,7 @@ class AsyncIOBackend(AsyncBackend): cls, host: str, port: int, local_address: IPSockAddrType | None = None ) -> abc.SocketStream: transport, protocol = cast( - Tuple[asyncio.Transport, StreamProtocol], + tuple[asyncio.Transport, StreamProtocol], await get_running_loop().create_connection( StreamProtocol, host, port, local_addr=local_address ), @@ -2659,7 +2736,7 @@ class AsyncIOBackend(AsyncBackend): @classmethod def open_signal_receiver( cls, *signals: Signals - ) -> ContextManager[AsyncIterator[Signals]]: + ) -> AbstractContextManager[AsyncIterator[Signals]]: return _SignalReceiver(signals) @classmethod diff --git a/contrib/python/anyio/anyio/_backends/_trio.py b/contrib/python/anyio/anyio/_backends/_trio.py index aee974deb6..24dcd74446 100644 --- a/contrib/python/anyio/anyio/_backends/_trio.py +++ b/contrib/python/anyio/anyio/_backends/_trio.py @@ -7,8 +7,18 @@ import socket import sys import types import weakref -from collections.abc import AsyncIterator, Iterable +from collections.abc import ( + AsyncGenerator, + AsyncIterator, + Awaitable, + Callable, + Collection, + Coroutine, + Iterable, + Sequence, +) from concurrent.futures import Future +from contextlib import AbstractContextManager from dataclasses import dataclass from functools import partial from io import IOBase @@ -19,15 +29,8 @@ from types import TracebackType from typing import ( IO, Any, - AsyncGenerator, - Awaitable, - Callable, - Collection, - ContextManager, - Coroutine, Generic, NoReturn, - Sequence, TypeVar, cast, overload, @@ -183,13 +186,12 @@ class TaskGroup(abc.TaskGroup): try: return await self._nursery_manager.__aexit__(exc_type, exc_val, exc_tb) except BaseExceptionGroup as exc: - _, rest = exc.split(trio.Cancelled) - if not rest: - cancelled_exc = trio.Cancelled._create() - raise cancelled_exc from exc + if not exc.split(trio.Cancelled)[1]: + raise trio.Cancelled._create() from exc raise finally: + del exc_val, exc_tb self._active = False def start_soon( @@ -1289,7 +1291,7 @@ class TrioBackend(AsyncBackend): @classmethod def open_signal_receiver( cls, *signals: Signals - ) -> ContextManager[AsyncIterator[Signals]]: + ) -> AbstractContextManager[AsyncIterator[Signals]]: return _SignalReceiver(signals) @classmethod diff --git a/contrib/python/anyio/anyio/_core/_fileio.py b/contrib/python/anyio/anyio/_core/_fileio.py index 214a90bfd8..53d3288c29 100644 --- a/contrib/python/anyio/anyio/_core/_fileio.py +++ b/contrib/python/anyio/anyio/_core/_fileio.py @@ -3,7 +3,7 @@ from __future__ import annotations import os import pathlib import sys -from collections.abc import Callable, Iterable, Iterator, Sequence +from collections.abc import AsyncIterator, Callable, Iterable, Iterator, Sequence from dataclasses import dataclass from functools import partial from os import PathLike @@ -12,7 +12,6 @@ from typing import ( TYPE_CHECKING, Any, AnyStr, - AsyncIterator, Final, Generic, overload, diff --git a/contrib/python/anyio/anyio/_core/_signals.py b/contrib/python/anyio/anyio/_core/_signals.py index 115c749bd9..f3451d302f 100644 --- a/contrib/python/anyio/anyio/_core/_signals.py +++ b/contrib/python/anyio/anyio/_core/_signals.py @@ -1,13 +1,15 @@ from __future__ import annotations from collections.abc import AsyncIterator +from contextlib import AbstractContextManager from signal import Signals -from typing import ContextManager from ._eventloop import get_async_backend -def open_signal_receiver(*signals: Signals) -> ContextManager[AsyncIterator[Signals]]: +def open_signal_receiver( + *signals: Signals, +) -> AbstractContextManager[AsyncIterator[Signals]]: """ Start receiving operating system signals. diff --git a/contrib/python/anyio/anyio/_core/_streams.py b/contrib/python/anyio/anyio/_core/_streams.py index aa6b0c222a..6a9814e5a9 100644 --- a/contrib/python/anyio/anyio/_core/_streams.py +++ b/contrib/python/anyio/anyio/_core/_streams.py @@ -1,7 +1,7 @@ from __future__ import annotations import math -from typing import Tuple, TypeVar +from typing import TypeVar from warnings import warn from ..streams.memory import ( @@ -14,7 +14,7 @@ T_Item = TypeVar("T_Item") class create_memory_object_stream( - Tuple[MemoryObjectSendStream[T_Item], MemoryObjectReceiveStream[T_Item]], + tuple[MemoryObjectSendStream[T_Item], MemoryObjectReceiveStream[T_Item]], ): """ Create a memory object stream. diff --git a/contrib/python/anyio/anyio/_core/_subprocesses.py b/contrib/python/anyio/anyio/_core/_subprocesses.py index 1ac2d549df..7ba41a5b03 100644 --- a/contrib/python/anyio/anyio/_core/_subprocesses.py +++ b/contrib/python/anyio/anyio/_core/_subprocesses.py @@ -160,38 +160,25 @@ async def open_process( child process prior to the execution of the subprocess. (POSIX only) :param pass_fds: sequence of file descriptors to keep open between the parent and child processes. (POSIX only) - :param user: effective user to run the process as (Python >= 3.9; POSIX only) - :param group: effective group to run the process as (Python >= 3.9; POSIX only) - :param extra_groups: supplementary groups to set in the subprocess (Python >= 3.9; - POSIX only) + :param user: effective user to run the process as (POSIX only) + :param group: effective group to run the process as (POSIX only) + :param extra_groups: supplementary groups to set in the subprocess (POSIX only) :param umask: if not negative, this umask is applied in the child process before - running the given command (Python >= 3.9; POSIX only) + running the given command (POSIX only) :return: an asynchronous process object """ kwargs: dict[str, Any] = {} if user is not None: - if sys.version_info < (3, 9): - raise TypeError("the 'user' argument requires Python 3.9 or later") - kwargs["user"] = user if group is not None: - if sys.version_info < (3, 9): - raise TypeError("the 'group' argument requires Python 3.9 or later") - kwargs["group"] = group if extra_groups is not None: - if sys.version_info < (3, 9): - raise TypeError("the 'extra_groups' argument requires Python 3.9 or later") - kwargs["extra_groups"] = group if umask >= 0: - if sys.version_info < (3, 9): - raise TypeError("the 'umask' argument requires Python 3.9 or later") - kwargs["umask"] = umask return await get_async_backend().open_process( diff --git a/contrib/python/anyio/anyio/abc/_eventloop.py b/contrib/python/anyio/anyio/abc/_eventloop.py index 2c73bb9ffb..93d0e9d25b 100644 --- a/contrib/python/anyio/anyio/abc/_eventloop.py +++ b/contrib/python/anyio/anyio/abc/_eventloop.py @@ -3,7 +3,8 @@ from __future__ import annotations import math import sys from abc import ABCMeta, abstractmethod -from collections.abc import AsyncIterator, Awaitable +from collections.abc import AsyncIterator, Awaitable, Callable, Sequence +from contextlib import AbstractContextManager from os import PathLike from signal import Signals from socket import AddressFamily, SocketKind, socket @@ -11,9 +12,6 @@ from typing import ( IO, TYPE_CHECKING, Any, - Callable, - ContextManager, - Sequence, TypeVar, Union, overload, @@ -352,7 +350,7 @@ class AsyncBackend(metaclass=ABCMeta): @abstractmethod def open_signal_receiver( cls, *signals: Signals - ) -> ContextManager[AsyncIterator[Signals]]: + ) -> AbstractContextManager[AsyncIterator[Signals]]: pass @classmethod diff --git a/contrib/python/anyio/anyio/abc/_sockets.py b/contrib/python/anyio/anyio/abc/_sockets.py index b321225a7b..1c6a450cdc 100644 --- a/contrib/python/anyio/anyio/abc/_sockets.py +++ b/contrib/python/anyio/anyio/abc/_sockets.py @@ -8,7 +8,7 @@ from io import IOBase from ipaddress import IPv4Address, IPv6Address from socket import AddressFamily from types import TracebackType -from typing import Any, Tuple, TypeVar, Union +from typing import Any, TypeVar, Union from .._core._typedattr import ( TypedAttributeProvider, @@ -19,10 +19,10 @@ from ._streams import ByteStream, Listener, UnreliableObjectStream from ._tasks import TaskGroup IPAddressType = Union[str, IPv4Address, IPv6Address] -IPSockAddrType = Tuple[str, int] +IPSockAddrType = tuple[str, int] SockAddrType = Union[IPSockAddrType, str] -UDPPacketType = Tuple[bytes, IPSockAddrType] -UNIXDatagramPacketType = Tuple[bytes, str] +UDPPacketType = tuple[bytes, IPSockAddrType] +UNIXDatagramPacketType = tuple[bytes, str] T_Retval = TypeVar("T_Retval") diff --git a/contrib/python/anyio/anyio/from_thread.py b/contrib/python/anyio/anyio/from_thread.py index b8785845ba..93a4cfe8e4 100644 --- a/contrib/python/anyio/anyio/from_thread.py +++ b/contrib/python/anyio/anyio/from_thread.py @@ -3,15 +3,17 @@ from __future__ import annotations import sys from collections.abc import Awaitable, Callable, Generator from concurrent.futures import Future -from contextlib import AbstractContextManager, contextmanager +from contextlib import ( + AbstractAsyncContextManager, + AbstractContextManager, + contextmanager, +) from dataclasses import dataclass, field from inspect import isawaitable from threading import Lock, Thread, get_ident from types import TracebackType from typing import ( Any, - AsyncContextManager, - ContextManager, Generic, TypeVar, cast, @@ -87,7 +89,9 @@ class _BlockingAsyncContextManager(Generic[T_co], AbstractContextManager): type[BaseException] | None, BaseException | None, TracebackType | None ] = (None, None, None) - def __init__(self, async_cm: AsyncContextManager[T_co], portal: BlockingPortal): + def __init__( + self, async_cm: AbstractAsyncContextManager[T_co], portal: BlockingPortal + ): self._async_cm = async_cm self._portal = portal @@ -374,8 +378,8 @@ class BlockingPortal: return f, task_status_future.result() def wrap_async_context_manager( - self, cm: AsyncContextManager[T_co] - ) -> ContextManager[T_co]: + self, cm: AbstractAsyncContextManager[T_co] + ) -> AbstractContextManager[T_co]: """ Wrap an async context manager as a synchronous context manager via this portal. diff --git a/contrib/python/anyio/anyio/pytest_plugin.py b/contrib/python/anyio/anyio/pytest_plugin.py index b7d9305614..4a0d59dd06 100644 --- a/contrib/python/anyio/anyio/pytest_plugin.py +++ b/contrib/python/anyio/anyio/pytest_plugin.py @@ -4,7 +4,7 @@ import sys from collections.abc import Generator, Iterator from contextlib import ExitStack, contextmanager from inspect import isasyncgenfunction, iscoroutinefunction, ismethod -from typing import Any, Dict, Tuple, cast +from typing import Any, cast import pytest import sniffio @@ -28,7 +28,7 @@ def extract_backend_and_options(backend: object) -> tuple[str, dict[str, Any]]: return backend, {} elif isinstance(backend, tuple) and len(backend) == 2: if isinstance(backend[0], str) and isinstance(backend[1], dict): - return cast(Tuple[str, Dict[str, Any]], backend) + return cast(tuple[str, dict[str, Any]], backend) raise TypeError("anyio_backend must be either a string or tuple of (string, dict)") diff --git a/contrib/python/anyio/anyio/streams/tls.py b/contrib/python/anyio/anyio/streams/tls.py index d01c8e6f4c..b6961bee16 100644 --- a/contrib/python/anyio/anyio/streams/tls.py +++ b/contrib/python/anyio/anyio/streams/tls.py @@ -7,7 +7,7 @@ import sys from collections.abc import Callable, Mapping from dataclasses import dataclass from functools import wraps -from typing import Any, Tuple, TypeVar +from typing import Any, TypeVar from .. import ( BrokenResourceError, @@ -25,8 +25,8 @@ else: T_Retval = TypeVar("T_Retval") PosArgsT = TypeVarTuple("PosArgsT") -_PCTRTT = Tuple[Tuple[str, str], ...] -_PCTRTTT = Tuple[_PCTRTT, ...] +_PCTRTT = tuple[tuple[str, str], ...] +_PCTRTTT = tuple[_PCTRTT, ...] class TLSAttribute(TypedAttributeSet): diff --git a/contrib/python/anyio/ya.make b/contrib/python/anyio/ya.make index bb56a53ce5..aadbb5b297 100644 --- a/contrib/python/anyio/ya.make +++ b/contrib/python/anyio/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(4.6.2) +VERSION(4.6.2.post1) LICENSE(MIT) diff --git a/contrib/python/pyparsing/py3/.dist-info/METADATA b/contrib/python/pyparsing/py3/.dist-info/METADATA index 1aa7a1fc04..ff6f9b6227 100644 --- a/contrib/python/pyparsing/py3/.dist-info/METADATA +++ b/contrib/python/pyparsing/py3/.dist-info/METADATA @@ -1,9 +1,9 @@ Metadata-Version: 2.1 Name: pyparsing -Version: 3.1.4 +Version: 3.2.0 Summary: pyparsing module - Classes and methods to define and execute parsing grammars Author-email: Paul McGuire <ptmcg.gm+pyparsing@gmail.com> -Requires-Python: >=3.6.8 +Requires-Python: >=3.9 Description-Content-Type: text/x-rst Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers @@ -12,9 +12,6 @@ Classifier: License :: OSI Approved :: MIT License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.6 -Classifier: Programming Language :: Python :: 3.7 -Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 diff --git a/contrib/python/pyparsing/py3/pyparsing/__init__.py b/contrib/python/pyparsing/py3/pyparsing/__init__.py index a440cfbefa..543ceb62bd 100644 --- a/contrib/python/pyparsing/py3/pyparsing/__init__.py +++ b/contrib/python/pyparsing/py3/pyparsing/__init__.py @@ -120,8 +120,8 @@ class version_info(NamedTuple): return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})" -__version_info__ = version_info(3, 1, 4, "final", 1) -__version_time__ = "25 Aug 2024 14:40 UTC" +__version_info__ = version_info(3, 2, 0, "final", 1) +__version_time__ = "13 Oct 2024 09:46 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>" @@ -131,9 +131,9 @@ from .exceptions import * from .actions import * from .core import __diag__, __compat__ from .results import * -from .core import * # type: ignore[misc, assignment] +from .core import * from .core import _builtin_exprs as core_builtin_exprs -from .helpers import * # type: ignore[misc, assignment] +from .helpers import * from .helpers import _builtin_exprs as helper_builtin_exprs from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode @@ -147,9 +147,9 @@ from .common import ( if "pyparsing_unicode" not in globals(): pyparsing_unicode = unicode # type: ignore[misc] if "pyparsing_common" not in globals(): - pyparsing_common = common # type: ignore[misc] + pyparsing_common = common if "pyparsing_test" not in globals(): - pyparsing_test = testing # type: ignore[misc] + pyparsing_test = testing core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs @@ -208,6 +208,7 @@ __all__ = [ "StringEnd", "StringStart", "Suppress", + "Tag", "Token", "TokenConverter", "White", diff --git a/contrib/python/pyparsing/py3/pyparsing/core.py b/contrib/python/pyparsing/py3/pyparsing/core.py index cbe73c987a..4f43c3bf99 100644 --- a/contrib/python/pyparsing/py3/pyparsing/core.py +++ b/contrib/python/pyparsing/py3/pyparsing/core.py @@ -1,7 +1,9 @@ # # core.py # +from __future__ import annotations +import collections.abc from collections import deque import os import typing @@ -9,12 +11,9 @@ from typing import ( Any, Callable, Generator, - List, NamedTuple, Sequence, - Set, TextIO, - Tuple, Union, cast, ) @@ -51,12 +50,7 @@ from .results import ParseResults, _ParseResultsWithOffset from .unicode import pyparsing_unicode _MAX_INT = sys.maxsize -str_type: Tuple[type, ...] = (str, bytes) - -if sys.version_info >= (3, 7): - _RePattern = re.Pattern -else: - _RePattern = typing.Pattern +str_type: tuple[type, ...] = (str, bytes) # # Copyright (c) 2003-2022 Paul T. McGuire @@ -81,18 +75,7 @@ else: # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # - -if sys.version_info >= (3, 8): - from functools import cached_property -else: - - class cached_property: - def __init__(self, func): - self._func = func - - def __get__(self, instance, owner=None): - ret = instance.__dict__[self._func.__name__] = self._func(instance) - return ret +from functools import cached_property class __compat__(__config_flags): @@ -230,7 +213,7 @@ _single_arg_builtins = { # fmt: on _generatorType = types.GeneratorType -ParseImplReturnType = Tuple[int, Any] +ParseImplReturnType = tuple[int, Any] PostParseReturnType = Union[ParseResults, Sequence[ParseResults]] ParseAction = Union[ Callable[[], Any], @@ -260,12 +243,26 @@ hexnums: str = nums + "ABCDEFabcdef" alphanums: str = alphas + nums printables: str = "".join([c for c in string.printable if c not in string.whitespace]) + +class _ParseActionIndexError(Exception): + """ + Internal wrapper around IndexError so that IndexErrors raised inside + parse actions aren't misinterpreted as IndexErrors raised inside + ParserElement parseImpl methods. + """ + + def __init__(self, msg: str, exc: BaseException): + self.msg: str = msg + self.exc: BaseException = exc + + _trim_arity_call_line: traceback.StackSummary = None # type: ignore[assignment] +pa_call_line_synth = () def _trim_arity(func, max_limit=3): """decorator to trim function calls to match the arity of the target""" - global _trim_arity_call_line + global _trim_arity_call_line, pa_call_line_synth if func in _single_arg_builtins: return lambda s, l, t: func(t) @@ -280,8 +277,8 @@ def _trim_arity(func, max_limit=3): LINE_DIFF = 9 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! - _trim_arity_call_line = (_trim_arity_call_line or traceback.extract_stack(limit=2)[-1]) - pa_call_line_synth = (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) + _trim_arity_call_line = _trim_arity_call_line or traceback.extract_stack(limit=2)[-1] + pa_call_line_synth = pa_call_line_synth or (_trim_arity_call_line[0], _trim_arity_call_line[1] + LINE_DIFF) def wrapper(*args): nonlocal found_arity, limit @@ -311,6 +308,11 @@ def _trim_arity(func, max_limit=3): continue raise + except IndexError as ie: + # wrap IndexErrors inside a _ParseActionIndexError + raise _ParseActionIndexError( + "IndexError raised in parse action", ie + ).with_traceback(None) # fmt: on # copy func name to wrapper for sensible debug output @@ -351,7 +353,7 @@ def condition_as_parse_action( def _default_start_debug_action( - instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False + instring: str, loc: int, expr: ParserElement, cache_hit: bool = False ): cache_hit_str = "*" if cache_hit else "" print( @@ -367,7 +369,7 @@ def _default_success_debug_action( instring: str, startloc: int, endloc: int, - expr: "ParserElement", + expr: ParserElement, toks: ParseResults, cache_hit: bool = False, ): @@ -378,7 +380,7 @@ def _default_success_debug_action( def _default_exception_debug_action( instring: str, loc: int, - expr: "ParserElement", + expr: ParserElement, exc: Exception, cache_hit: bool = False, ): @@ -443,7 +445,7 @@ class ParserElement(ABC): @classmethod def using_each(cls, seq, **class_kwargs): """ - Yields a sequence of class(obj, **class_kwargs) for obj in seq. + Yields a sequence of ``class(obj, **class_kwargs)`` for obj in seq. Example:: @@ -458,7 +460,7 @@ class ParserElement(ABC): debug_fail: typing.Optional[DebugExceptionAction] def __init__(self, savelist: bool = False): - self.parseAction: List[ParseAction] = list() + self.parseAction: list[ParseAction] = list() self.failAction: typing.Optional[ParseFailAction] = None self.customName: str = None # type: ignore[assignment] self._defaultName: typing.Optional[str] = None @@ -470,7 +472,7 @@ class ParserElement(ABC): # used when checking for left-recursion self.mayReturnEmpty = False self.keepTabs = False - self.ignoreExprs: List["ParserElement"] = list() + self.ignoreExprs: list[ParserElement] = list() self.debug = False self.streamlined = False # optimize exception handling for subclasses that don't advance parse index @@ -483,9 +485,9 @@ class ParserElement(ABC): # avoid redundant calls to preParse self.callPreparse = True self.callDuringTry = False - self.suppress_warnings_: List[Diagnostics] = [] + self.suppress_warnings_: list[Diagnostics] = [] - def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement": + def suppress_warning(self, warning_type: Diagnostics) -> ParserElement: """ Suppress warnings emitted for a particular diagnostic on this expression. @@ -518,7 +520,7 @@ class ParserElement(ABC): to_visit.extend(cur.recurse()) yield cur - def copy(self) -> "ParserElement": + def copy(self) -> ParserElement: """ Make a copy of this :class:`ParserElement`. Useful for defining different parse actions for the same parsing pattern, using copies of @@ -549,7 +551,7 @@ class ParserElement(ABC): def set_results_name( self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False - ) -> "ParserElement": + ) -> ParserElement: """ Define name for referencing matching tokens as a nested attribute of the returned parse results. @@ -581,7 +583,7 @@ class ParserElement(ABC): listAllMatches = listAllMatches or list_all_matches return self._setResultsName(name, listAllMatches) - def _setResultsName(self, name, list_all_matches=False) -> "ParserElement": + def _setResultsName(self, name, list_all_matches=False) -> ParserElement: if name is None: return self newself = self.copy() @@ -592,7 +594,7 @@ class ParserElement(ABC): newself.modalResults = not list_all_matches return newself - def set_break(self, break_flag: bool = True) -> "ParserElement": + def set_break(self, break_flag: bool = True) -> ParserElement: """ Method to invoke the Python pdb debugger when this element is about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to @@ -602,19 +604,17 @@ class ParserElement(ABC): _parseMethod = self._parse def breaker(instring, loc, do_actions=True, callPreParse=True): - import pdb - - # this call to pdb.set_trace() is intentional, not a checkin error - pdb.set_trace() + # this call to breakpoint() is intentional, not a checkin error + breakpoint() return _parseMethod(instring, loc, do_actions, callPreParse) breaker._originalParseMethod = _parseMethod # type: ignore [attr-defined] - self._parse = breaker # type: ignore [assignment] + self._parse = breaker # type: ignore [method-assign] elif hasattr(self._parse, "_originalParseMethod"): - self._parse = self._parse._originalParseMethod # type: ignore [attr-defined, assignment] + self._parse = self._parse._originalParseMethod # type: ignore [method-assign] return self - def set_parse_action(self, *fns: ParseAction, **kwargs: Any) -> "ParserElement": + def set_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement: """ Define one or more actions to perform when successfully matching parse element definition. @@ -702,7 +702,7 @@ class ParserElement(ABC): return self - def add_parse_action(self, *fns: ParseAction, **kwargs: Any) -> "ParserElement": + def add_parse_action(self, *fns: ParseAction, **kwargs: Any) -> ParserElement: """ Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. @@ -714,7 +714,7 @@ class ParserElement(ABC): ) return self - def add_condition(self, *fns: ParseCondition, **kwargs: Any) -> "ParserElement": + def add_condition(self, *fns: ParseCondition, **kwargs: Any) -> ParserElement: """Add a boolean predicate function to expression's list of parse actions. See :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, functions passed to ``add_condition`` need to return boolean success/fail of the condition. @@ -751,7 +751,7 @@ class ParserElement(ABC): ) return self - def set_fail_action(self, fn: ParseFailAction) -> "ParserElement": + def set_fail_action(self, fn: ParseFailAction) -> ParserElement: """ Define action to perform if parsing fails at this expression. Fail acton fn is a callable function that takes the arguments @@ -809,8 +809,7 @@ class ParserElement(ABC): # @profile def _parseNoCache( self, instring, loc, do_actions=True, callPreParse=True - ) -> Tuple[int, ParseResults]: - TRY, MATCH, FAIL = 0, 1, 2 + ) -> tuple[int, ParseResults]: debugging = self.debug # and do_actions) len_instring = len(instring) @@ -934,25 +933,42 @@ class ParserElement(ABC): # cache for left-recursion in Forward references recursion_lock = RLock() - recursion_memos: typing.Dict[ - Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]] + recursion_memos: collections.abc.MutableMapping[ + tuple[int, Forward, bool], tuple[int, Union[ParseResults, Exception]] ] = {} - class _CacheType(dict): + class _CacheType(typing.Protocol): """ - class to help type checking + Class to be used for packrat and left-recursion cacheing of results + and exceptions. """ not_in_cache: bool - def get(self, *args): ... + def get(self, *args) -> typing.Any: ... + + def set(self, *args) -> None: ... + + def clear(self) -> None: ... + + class NullCache(dict): + """ + A null cache type for initialization of the packrat_cache class variable. + If/when enable_packrat() is called, this null cache will be replaced by a + proper _CacheType class instance. + """ + + not_in_cache: bool = True + + def get(self, *args) -> typing.Any: ... - def set(self, *args): ... + def set(self, *args) -> None: ... - # argument cache for optimizing repeated calls when backtracking through recursive expressions - packrat_cache = ( - _CacheType() - ) # set later by enable_packrat(); this is here so that reset_cache() doesn't fail + def clear(self) -> None: ... + + # class-level argument cache for optimizing repeated calls when backtracking + # through recursive expressions + packrat_cache: _CacheType = NullCache() packrat_cache_lock = RLock() packrat_cache_stats = [0, 0] @@ -960,9 +976,8 @@ class ParserElement(ABC): # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression def _parseCache( self, instring, loc, do_actions=True, callPreParse=True - ) -> Tuple[int, ParseResults]: + ) -> tuple[int, ParseResults]: HIT, MISS = 0, 1 - TRY, MATCH, FAIL = 0, 1, 2 lookup = (self, instring, loc, callPreParse, do_actions) with ParserElement.packrat_cache_lock: cache = ParserElement.packrat_cache @@ -995,7 +1010,7 @@ class ParserElement(ABC): pass raise value - value = cast(Tuple[int, ParseResults, int], value) + value = cast(tuple[int, ParseResults, int], value) loc_, result, endloc = value[0], value[1].copy(), value[2] if self.debug and self.debugActions.debug_match: try: @@ -1075,7 +1090,7 @@ class ParserElement(ABC): elif ParserElement._packratEnabled: raise RuntimeError("Packrat and Bounded Recursion are not compatible") if cache_size_limit is None: - ParserElement.recursion_memos = _UnboundedMemo() # type: ignore[assignment] + ParserElement.recursion_memos = _UnboundedMemo() elif cache_size_limit > 0: ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) # type: ignore[assignment] else: @@ -1128,7 +1143,7 @@ class ParserElement(ABC): if cache_size_limit is None: ParserElement.packrat_cache = _UnboundedCache() else: - ParserElement.packrat_cache = _FifoCache(cache_size_limit) # type: ignore[assignment] + ParserElement.packrat_cache = _FifoCache(cache_size_limit) ParserElement._parse = ParserElement._parseCache def parse_string( @@ -1191,12 +1206,14 @@ class ParserElement(ABC): loc = self.preParse(instring, loc) se = Empty() + StringEnd().set_debug(False) se._parse(instring, loc) + except _ParseActionIndexError as pa_exc: + raise pa_exc.exc except ParseBaseException as exc: if ParserElement.verbose_stacktrace: raise - else: - # catch and re-raise exception from here, clearing out pyparsing internal stack trace - raise exc.with_traceback(None) + + # catch and re-raise exception from here, clearing out pyparsing internal stack trace + raise exc.with_traceback(None) else: return tokens @@ -1205,10 +1222,11 @@ class ParserElement(ABC): instring: str, max_matches: int = _MAX_INT, overlap: bool = False, + always_skip_whitespace=True, *, debug: bool = False, maxMatches: int = _MAX_INT, - ) -> Generator[Tuple[ParseResults, int, int], None, None]: + ) -> Generator[tuple[ParseResults, int, int], None, None]: """ Scan the input string for expression matches. Each match will return the matching tokens, start location, and end location. May be called with optional @@ -1249,7 +1267,13 @@ class ParserElement(ABC): instring = str(instring).expandtabs() instrlen = len(instring) loc = 0 - preparseFn = self.preParse + if always_skip_whitespace: + preparser = Empty() + preparser.ignoreExprs = self.ignoreExprs + preparser.whiteChars = self.whiteChars + preparseFn = preparser.preParse + else: + preparseFn = self.preParse parseFn = self._parse ParserElement.resetCache() matches = 0 @@ -1311,14 +1335,15 @@ class ParserElement(ABC): Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. """ - out: List[str] = [] + out: list[str] = [] lastE = 0 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to # keep string locs straight between transform_string and scan_string self.keepTabs = True try: for t, s, e in self.scan_string(instring, debug=debug): - out.append(instring[lastE:s]) + if s > lastE: + out.append(instring[lastE:s]) lastE = e if not t: @@ -1372,7 +1397,12 @@ class ParserElement(ABC): maxMatches = min(maxMatches, max_matches) try: return ParseResults( - [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)] + [ + t + for t, s, e in self.scan_string( + instring, maxMatches, always_skip_whitespace=False, debug=debug + ) + ] ) except ParseBaseException as exc: if ParserElement.verbose_stacktrace: @@ -1413,7 +1443,7 @@ class ParserElement(ABC): last = e yield instring[last:] - def __add__(self, other) -> "ParserElement": + def __add__(self, other) -> ParserElement: """ Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` converts them to :class:`Literal`\\ s by default. @@ -1449,7 +1479,7 @@ class ParserElement(ABC): return NotImplemented return And([self, other]) - def __radd__(self, other) -> "ParserElement": + def __radd__(self, other) -> ParserElement: """ Implementation of ``+`` operator when left operand is not a :class:`ParserElement` """ @@ -1462,7 +1492,7 @@ class ParserElement(ABC): return NotImplemented return other + self - def __sub__(self, other) -> "ParserElement": + def __sub__(self, other) -> ParserElement: """ Implementation of ``-`` operator, returns :class:`And` with error stop """ @@ -1472,7 +1502,7 @@ class ParserElement(ABC): return NotImplemented return self + And._ErrorStop() + other - def __rsub__(self, other) -> "ParserElement": + def __rsub__(self, other) -> ParserElement: """ Implementation of ``-`` operator when left operand is not a :class:`ParserElement` """ @@ -1482,7 +1512,7 @@ class ParserElement(ABC): return NotImplemented return other - self - def __mul__(self, other) -> "ParserElement": + def __mul__(self, other) -> ParserElement: """ Implementation of ``*`` operator, allows use of ``expr * 3`` in place of ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer @@ -1562,10 +1592,10 @@ class ParserElement(ABC): ret = And([self] * minElements) return ret - def __rmul__(self, other) -> "ParserElement": + def __rmul__(self, other) -> ParserElement: return self.__mul__(other) - def __or__(self, other) -> "ParserElement": + def __or__(self, other) -> ParserElement: """ Implementation of ``|`` operator - returns :class:`MatchFirst` """ @@ -1581,7 +1611,7 @@ class ParserElement(ABC): return NotImplemented return MatchFirst([self, other]) - def __ror__(self, other) -> "ParserElement": + def __ror__(self, other) -> ParserElement: """ Implementation of ``|`` operator when left operand is not a :class:`ParserElement` """ @@ -1591,7 +1621,7 @@ class ParserElement(ABC): return NotImplemented return other | self - def __xor__(self, other) -> "ParserElement": + def __xor__(self, other) -> ParserElement: """ Implementation of ``^`` operator - returns :class:`Or` """ @@ -1601,7 +1631,7 @@ class ParserElement(ABC): return NotImplemented return Or([self, other]) - def __rxor__(self, other) -> "ParserElement": + def __rxor__(self, other) -> ParserElement: """ Implementation of ``^`` operator when left operand is not a :class:`ParserElement` """ @@ -1611,7 +1641,7 @@ class ParserElement(ABC): return NotImplemented return other ^ self - def __and__(self, other) -> "ParserElement": + def __and__(self, other) -> ParserElement: """ Implementation of ``&`` operator - returns :class:`Each` """ @@ -1621,7 +1651,7 @@ class ParserElement(ABC): return NotImplemented return Each([self, other]) - def __rand__(self, other) -> "ParserElement": + def __rand__(self, other) -> ParserElement: """ Implementation of ``&`` operator when left operand is not a :class:`ParserElement` """ @@ -1631,7 +1661,7 @@ class ParserElement(ABC): return NotImplemented return other & self - def __invert__(self) -> "ParserElement": + def __invert__(self) -> ParserElement: """ Implementation of ``~`` operator - returns :class:`NotAny` """ @@ -1701,7 +1731,7 @@ class ParserElement(ABC): return ret - def __call__(self, name: typing.Optional[str] = None) -> "ParserElement": + def __call__(self, name: typing.Optional[str] = None) -> ParserElement: """ Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. @@ -1721,14 +1751,14 @@ class ParserElement(ABC): return self.copy() - def suppress(self) -> "ParserElement": + def suppress(self) -> ParserElement: """ Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from cluttering up returned output. """ return Suppress(self) - def ignore_whitespace(self, recursive: bool = True) -> "ParserElement": + def ignore_whitespace(self, recursive: bool = True) -> ParserElement: """ Enables the skipping of whitespace before matching the characters in the :class:`ParserElement`'s defined pattern. @@ -1738,7 +1768,7 @@ class ParserElement(ABC): self.skipWhitespace = True return self - def leave_whitespace(self, recursive: bool = True) -> "ParserElement": + def leave_whitespace(self, recursive: bool = True) -> ParserElement: """ Disables the skipping of whitespace before matching the characters in the :class:`ParserElement`'s defined pattern. This is normally only used internally by @@ -1750,8 +1780,8 @@ class ParserElement(ABC): return self def set_whitespace_chars( - self, chars: Union[Set[str], str], copy_defaults: bool = False - ) -> "ParserElement": + self, chars: Union[set[str], str], copy_defaults: bool = False + ) -> ParserElement: """ Overrides the default whitespace chars """ @@ -1760,7 +1790,7 @@ class ParserElement(ABC): self.copyDefaultWhiteChars = copy_defaults return self - def parse_with_tabs(self) -> "ParserElement": + def parse_with_tabs(self) -> ParserElement: """ Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. Must be called before ``parse_string`` when the input grammar contains elements that @@ -1769,7 +1799,7 @@ class ParserElement(ABC): self.keepTabs = True return self - def ignore(self, other: "ParserElement") -> "ParserElement": + def ignore(self, other: ParserElement) -> ParserElement: """ Define expression to be ignored (e.g., comments) while doing pattern matching; may be called repeatedly, to define multiple comment or other @@ -1800,7 +1830,7 @@ class ParserElement(ABC): start_action: DebugStartAction, success_action: DebugSuccessAction, exception_action: DebugExceptionAction, - ) -> "ParserElement": + ) -> ParserElement: """ Customize display of debugging messages while doing pattern matching: @@ -1821,7 +1851,7 @@ class ParserElement(ABC): self.debug = True return self - def set_debug(self, flag: bool = True, recurse: bool = False) -> "ParserElement": + def set_debug(self, flag: bool = True, recurse: bool = False) -> ParserElement: """ Enable display of debugging messages while doing pattern matching. Set ``flag`` to ``True`` to enable, ``False`` to disable. @@ -1886,7 +1916,7 @@ class ParserElement(ABC): Child classes must define this method, which defines how the ``default_name`` is set. """ - def set_name(self, name: typing.Optional[str]) -> "ParserElement": + def set_name(self, name: typing.Optional[str]) -> ParserElement: """ Define name for this expression, makes debugging and exception messages clearer. If `__diag__.enable_debug_on_named_expressions` is set to True, setting a name will also @@ -1903,7 +1933,7 @@ class ParserElement(ABC): integer.set_name("integer") integer.parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) """ - self.customName = name + self.customName = name # type: ignore[assignment] self.errmsg = f"Expected {str(self)}" if __diag__.enable_debug_on_named_expressions: @@ -1926,12 +1956,12 @@ class ParserElement(ABC): def __repr__(self) -> str: return str(self) - def streamline(self) -> "ParserElement": + def streamline(self) -> ParserElement: self.streamlined = True self._defaultName = None return self - def recurse(self) -> List["ParserElement"]: + def recurse(self) -> list[ParserElement]: return [] def _checkRecursion(self, parseElementList): @@ -2018,9 +2048,9 @@ class ParserElement(ABC): def run_tests( self, - tests: Union[str, List[str]], + tests: Union[str, list[str]], parse_all: bool = True, - comment: typing.Optional[Union["ParserElement", str]] = "#", + comment: typing.Optional[Union[ParserElement, str]] = "#", full_dump: bool = True, print_results: bool = True, failure_tests: bool = False, @@ -2037,7 +2067,7 @@ class ParserElement(ABC): postParse: typing.Optional[ Callable[[str, ParseResults], typing.Optional[str]] ] = None, - ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]: + ) -> tuple[bool, list[tuple[str, Union[ParseResults, Exception]]]]: """ Execute the parse expression on a series of test strings, showing each test, the parsed results or where the parse failed. Quick and easy way to @@ -2155,8 +2185,8 @@ class ParserElement(ABC): print_ = file.write result: Union[ParseResults, Exception] - allResults: List[Tuple[str, Union[ParseResults, Exception]]] = [] - comments: List[str] = [] + allResults: list[tuple[str, Union[ParseResults, Exception]]] = [] + comments: list[str] = [] success = True NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) BOM = "\ufeff" @@ -2187,7 +2217,18 @@ class ParserElement(ABC): success = success and failureTests result = pe except Exception as exc: - out.append(f"FAIL-EXCEPTION: {type(exc).__name__}: {exc}") + tag = "FAIL-EXCEPTION" + + # see if this exception was raised in a parse action + tb = exc.__traceback__ + it = iter(traceback.walk_tb(tb)) + for f, line in it: + if (f.f_code.co_filename, line) == pa_call_line_synth: + next_f = next(it)[0] + tag += f" (raised in parse action {next_f.f_code.co_name!r})" + break + + out.append(f"{tag}: {type(exc).__name__}: {exc}") if ParserElement.verbose_stacktrace: out.extend(traceback.format_tb(exc.__traceback__)) success = success and failureTests @@ -2323,7 +2364,7 @@ class _PendingSkip(ParserElement): def _generateDefaultName(self) -> str: return str(self.anchor + Empty()).replace("Empty", "...") - def __add__(self, other) -> "ParserElement": + def __add__(self, other) -> ParserElement: skipper = SkipTo(other).set_name("...")("_skipped*") if self.must_skip: @@ -2505,9 +2546,8 @@ class Keyword(Token): match_string = matchString or match_string self.match = match_string self.matchLen = len(match_string) - try: - self.firstMatchChar = match_string[0] - except IndexError: + self.firstMatchChar = match_string[:1] + if not self.firstMatchChar: raise ValueError("null string passed to Keyword; use Empty() instead") self.errmsg = f"Expected {type(self).__name__} {self.name}" self.mayReturnEmpty = False @@ -2522,7 +2562,7 @@ class Keyword(Token): return repr(self.match) def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: - errmsg = self.errmsg + errmsg = self.errmsg or "" errloc = loc if self.caseless: if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: @@ -2900,7 +2940,7 @@ class Word(Token): self.re = None # type: ignore[assignment] else: self.re_match = self.re.match - self.parseImpl = self.parseImpl_regex # type: ignore[assignment] + self.parseImpl = self.parseImpl_regex # type: ignore[method-assign] def _generateDefaultName(self) -> str: def charsAsStr(s): @@ -3040,48 +3080,65 @@ class Regex(Token): self._re = None self.reString = self.pattern = pattern - self.flags = flags elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): self._re = pattern self.pattern = self.reString = pattern.pattern - self.flags = flags + + elif callable(pattern): + # defer creating this pattern until we really need it + self.pattern = pattern + self._re = None else: raise TypeError( - "Regex may only be constructed with a string or a compiled RE object" + "Regex may only be constructed with a string or a compiled RE object," + " or a callable that takes no arguments and returns a string or a" + " compiled RE object" ) + self.flags = flags self.errmsg = f"Expected {self.name}" self.mayIndexError = False self.asGroupList = asGroupList self.asMatch = asMatch if self.asGroupList: - self.parseImpl = self.parseImplAsGroupList # type: ignore [assignment] + self.parseImpl = self.parseImplAsGroupList # type: ignore [method-assign] if self.asMatch: - self.parseImpl = self.parseImplAsMatch # type: ignore [assignment] + self.parseImpl = self.parseImplAsMatch # type: ignore [method-assign] @cached_property - def re(self) -> _RePattern: + def re(self) -> re.Pattern: if self._re: return self._re + if callable(self.pattern): + # replace self.pattern with the string returned by calling self.pattern() + self.pattern = cast(Callable[[], str], self.pattern)() + + # see if we got a compiled RE back instead of a str - if so, we're done + if hasattr(self.pattern, "pattern") and hasattr(self.pattern, "match"): + self._re = cast(re.Pattern[str], self.pattern) + self.pattern = self.reString = self._re.pattern + return self._re + try: - return re.compile(self.pattern, self.flags) + self._re = re.compile(self.pattern, self.flags) + return self._re except re.error: raise ValueError(f"invalid pattern ({self.pattern!r}) passed to Regex") @cached_property - def re_match(self) -> Callable[[str], Any]: + def re_match(self) -> Callable[[str, int], Any]: return self.re.match @cached_property - def mayReturnEmpty(self) -> bool: - return self.re_match("") is not None + def mayReturnEmpty(self) -> bool: # type: ignore[override] + return self.re_match("", 0) is not None def _generateDefaultName(self) -> str: - unescaped = self.pattern.replace("\\\\", "\\") - return f"Re:({unescaped!r})" + unescaped = repr(self.pattern).replace("\\\\", "\\") + return f"Re:({unescaped})" def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: result = self.re_match(instring, loc) @@ -3243,7 +3300,7 @@ class QuotedString(Token): # fmt: off # build up re pattern for the content between the quote delimiters - inner_pattern: List[str] = [] + inner_pattern: list[str] = [] if esc_quote: inner_pattern.append(rf"(?:{re.escape(esc_quote)})") @@ -3287,6 +3344,7 @@ class QuotedString(Token): if self.convert_whitespace_escapes: self.unquote_scan_re = re.compile( rf"({'|'.join(re.escape(k) for k in self.ws_map)})" + rf"|(\\[0-7]{3}|\\0|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4})" rf"|({re.escape(self.esc_char)}.)" rf"|(\n|.)", flags=self.re_flags, @@ -3333,6 +3391,16 @@ class QuotedString(Token): loc = result.end() ret = result.group() + def convert_escaped_numerics(s: str) -> str: + if s == "0": + return "\0" + if s.isdigit() and len(s) == 3: + return chr(int(s, base=8)) + elif s.startswith(("u", "x")): + return chr(int(s[1:], base=16)) + else: + return s + if self.unquote_results: # strip off quotes ret = ret[self.quote_char_len : -self.end_quote_char_len] @@ -3346,10 +3414,13 @@ class QuotedString(Token): ret = "".join( # match group 1 matches \t, \n, etc. self.ws_map[match.group(1)] if match.group(1) - # match group 2 matches escaped characters - else match.group(2)[-1] if match.group(2) - # match group 3 matches any character - else match.group(3) + # match group 2 matches escaped octal, null, hex, and Unicode + # sequences + else convert_escaped_numerics(match.group(2)[1:]) if match.group(2) + # match group 3 matches escaped characters + else match.group(3)[-1] if match.group(3) + # match group 4 matches any character + else match.group(4) for match in self.unquote_scan_re.finditer(ret) ) else: @@ -3754,6 +3825,7 @@ class Tag(Token): ['Hello,', 'World', '!'] - enthusiastic: True """ + def __init__(self, tag_name: str, value: Any = True): super().__init__() self.mayReturnEmpty = True @@ -3777,7 +3849,7 @@ class ParseExpression(ParserElement): def __init__(self, exprs: typing.Iterable[ParserElement], savelist: bool = False): super().__init__(savelist) - self.exprs: List[ParserElement] + self.exprs: list[ParserElement] if isinstance(exprs, _generatorType): exprs = list(exprs) @@ -3801,7 +3873,7 @@ class ParseExpression(ParserElement): self.exprs = [exprs] self.callPreparse = False - def recurse(self) -> List[ParserElement]: + def recurse(self) -> list[ParserElement]: return self.exprs[:] def append(self, other) -> ParserElement: @@ -3943,7 +4015,7 @@ class ParseExpression(ParserElement): class And(ParseExpression): """ - Requires all given :class:`ParseExpression` s to be found in the given order. + Requires all given :class:`ParserElement` s to be found in the given order. Expressions may be separated by whitespace. May be constructed using the ``'+'`` operator. May also be constructed using the ``'-'`` operator, which will @@ -3970,9 +4042,9 @@ class And(ParseExpression): def __init__( self, exprs_arg: typing.Iterable[ParserElement], savelist: bool = True ): - exprs: List[ParserElement] = list(exprs_arg) + exprs: list[ParserElement] = list(exprs_arg) if exprs and Ellipsis in exprs: - tmp: List[ParserElement] = [] + tmp: list[ParserElement] = [] for i, expr in enumerate(exprs): if expr is not Ellipsis: tmp.append(expr) @@ -4104,7 +4176,7 @@ class And(ParseExpression): class Or(ParseExpression): - """Requires that at least one :class:`ParseExpression` is found. If + """Requires that at least one :class:`ParserElement` is found. If two expressions match, the expression that matches the longest string will be used. May be constructed using the ``'^'`` operator. @@ -4144,8 +4216,8 @@ class Or(ParseExpression): def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: maxExcLoc = -1 maxException = None - matches: List[Tuple[int, ParserElement]] = [] - fatals: List[ParseFatalException] = [] + matches: list[tuple[int, ParserElement]] = [] + fatals: list[ParseFatalException] = [] if all(e.callPreparse for e in self.exprs): loc = self.preParse(instring, loc) for e in self.exprs: @@ -4184,7 +4256,7 @@ class Or(ParseExpression): best_expr = matches[0][1] return best_expr._parse(instring, loc, do_actions) - longest = -1, None + longest: tuple[int, typing.Optional[ParseResults]] = -1, None for loc1, expr1 in matches: if loc1 <= longest[0]: # already have a longer match than this one will deliver, we are done @@ -4219,7 +4291,7 @@ class Or(ParseExpression): # infer from this check that all alternatives failed at the current position # so emit this collective error message instead of any single error message if maxExcLoc == loc: - maxException.msg = self.errmsg + maxException.msg = self.errmsg or "" raise maxException raise ParseException(instring, loc, "no defined alternatives to match", self) @@ -4259,7 +4331,7 @@ class Or(ParseExpression): class MatchFirst(ParseExpression): - """Requires that at least one :class:`ParseExpression` is found. If + """Requires that at least one :class:`ParserElement` is found. If more than one expression matches, the first one listed is the one that will match. May be constructed using the ``'|'`` operator. @@ -4326,7 +4398,7 @@ class MatchFirst(ParseExpression): # infer from this check that all alternatives failed at the current position # so emit this collective error message instead of any individual error message if maxExcLoc == loc: - maxException.msg = self.errmsg + maxException.msg = self.errmsg or "" raise maxException raise ParseException(instring, loc, "no defined alternatives to match", self) @@ -4366,7 +4438,7 @@ class MatchFirst(ParseExpression): class Each(ParseExpression): - """Requires all given :class:`ParseExpression` s to be found, but in + """Requires all given :class:`ParserElement` s to be found, but in any order. Expressions may be separated by whitespace. May be constructed using the ``'&'`` operator. @@ -4480,11 +4552,11 @@ class Each(ParseExpression): tmpReqd = self.required[:] tmpOpt = self.optionals[:] multis = self.multioptionals[:] - matchOrder: List[ParserElement] = [] + matchOrder: list[ParserElement] = [] keepMatching = True - failed: List[ParserElement] = [] - fatals: List[ParseFatalException] = [] + failed: list[ParserElement] = [] + fatals: list[ParseFatalException] = [] while keepMatching: tmpExprs = tmpReqd + tmpOpt + multis failed.clear() @@ -4567,7 +4639,7 @@ class ParseElementEnhance(ParserElement): self.callPreparse = expr.callPreparse self.ignoreExprs.extend(expr.ignoreExprs) - def recurse(self) -> List[ParserElement]: + def recurse(self) -> list[ParserElement]: return [self.expr] if self.expr is not None else [] def parseImpl(self, instring, loc, do_actions=True): @@ -4579,7 +4651,10 @@ class ParseElementEnhance(ParserElement): except ParseSyntaxException: raise except ParseBaseException as pbe: - if not isinstance(self, Forward) or self.customName is not None: + pbe.pstr = pbe.pstr or instring + pbe.loc = pbe.loc or loc + pbe.parser_element = pbe.parser_element or self + if not isinstance(self, Forward) and self.customName is not None: if self.errmsg: pbe.msg = self.errmsg raise @@ -4704,7 +4779,7 @@ class IndentedBlock(ParseElementEnhance): if self._grouped: wrapper = Group else: - wrapper = lambda expr: expr + wrapper = lambda expr: expr # type: ignore[misc, assignment] return (wrapper(block) + Optional(trailing_undent)).parseImpl( instring, anchor_loc, do_actions ) @@ -4830,9 +4905,7 @@ class PrecededBy(ParseElementEnhance): """ - def __init__( - self, expr: Union[ParserElement, str], retreat: typing.Optional[int] = None - ): + def __init__(self, expr: Union[ParserElement, str], retreat: int = 0): super().__init__(expr) self.expr = self.expr().leave_whitespace() self.mayReturnEmpty = True @@ -4859,7 +4932,7 @@ class PrecededBy(ParseElementEnhance): def parseImpl(self, instring, loc=0, do_actions=True) -> ParseImplReturnType: if self.exact: if loc < self.retreat: - raise ParseException(instring, loc, self.errmsg) + raise ParseException(instring, loc, self.errmsg, self) start = loc - self.retreat _, ret = self.expr._parse(instring, start) return loc, ret @@ -4867,7 +4940,7 @@ class PrecededBy(ParseElementEnhance): # retreat specified a maximum lookbehind window, iterate test_expr = self.expr + StringEnd() instring_slice = instring[max(0, loc - self.retreat) : loc] - last_expr = ParseException(instring, loc, self.errmsg) + last_expr: ParseBaseException = ParseException(instring, loc, self.errmsg, self) for offset in range(1, min(loc, self.retreat + 1) + 1): try: @@ -5236,7 +5309,9 @@ class Opt(ParseElementEnhance): def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType: self_expr = self.expr try: - loc, tokens = self_expr._parse(instring, loc, do_actions, callPreParse=False) + loc, tokens = self_expr._parse( + instring, loc, do_actions, callPreParse=False + ) except (ParseException, IndexError): default_value = self.defaultValue if default_value is not self.__optionalNotMatched: @@ -5244,9 +5319,9 @@ class Opt(ParseElementEnhance): tokens = ParseResults([default_value]) tokens[self_expr.resultsName] = default_value else: - tokens = [default_value] + tokens = [default_value] # type: ignore[assignment] else: - tokens = [] + tokens = [] # type: ignore[assignment] return loc, tokens def _generateDefaultName(self) -> str: @@ -5446,7 +5521,7 @@ class Forward(ParseElementEnhance): super().__init__(other, savelist=False) # type: ignore[arg-type] self.lshift_line = None - def __lshift__(self, other) -> "Forward": + def __lshift__(self, other) -> Forward: if hasattr(self, "caller_frame"): del self.caller_frame if isinstance(other, str_type): @@ -5468,13 +5543,13 @@ class Forward(ParseElementEnhance): self.lshift_line = traceback.extract_stack(limit=2)[-2] # type: ignore[assignment] return self - def __ilshift__(self, other) -> "Forward": + def __ilshift__(self, other) -> Forward: if not isinstance(other, ParserElement): return NotImplemented return self << other - def __or__(self, other) -> "ParserElement": + def __or__(self, other) -> ParserElement: caller_line = traceback.extract_stack(limit=2)[-2] if ( __diag__.warn_on_match_first_with_lshift_operator @@ -5585,9 +5660,9 @@ class Forward(ParseElementEnhance): # in case the action did backtrack prev_loc, prev_result = memo[peek_key] = memo[act_key] del memo[peek_key], memo[act_key] - return prev_loc, prev_result.copy() + return prev_loc, copy.copy(prev_result) del memo[peek_key] - return prev_loc, prev_peek.copy() + return prev_loc, copy.copy(prev_peek) # the match did get better: see if we can improve further if do_actions: try: @@ -5675,7 +5750,7 @@ class Forward(ParseElementEnhance): class TokenConverter(ParseElementEnhance): """ - Abstract subclass of :class:`ParseExpression`, for converting parsed results. + Abstract subclass of :class:`ParseElementEnhance`, for converting parsed results. """ def __init__(self, expr: Union[ParserElement, str], savelist=False): @@ -5900,13 +5975,13 @@ class Suppress(TokenConverter): expr = _PendingSkip(NoMatch()) super().__init__(expr) - def __add__(self, other) -> "ParserElement": + def __add__(self, other) -> ParserElement: if isinstance(self.expr, _PendingSkip): return Suppress(SkipTo(other)) + other return super().__add__(other) - def __sub__(self, other) -> "ParserElement": + def __sub__(self, other) -> ParserElement: if isinstance(self.expr, _PendingSkip): return Suppress(SkipTo(other)) - other @@ -6131,7 +6206,7 @@ punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") # build list of built-in expressions, for future reference if a global default value # gets updated -_builtin_exprs: List[ParserElement] = [ +_builtin_exprs: list[ParserElement] = [ v for v in vars().values() if isinstance(v, ParserElement) ] diff --git a/contrib/python/pyparsing/py3/pyparsing/diagram/__init__.py b/contrib/python/pyparsing/py3/pyparsing/diagram/__init__.py index 3275adafb6..7926f2c355 100644 --- a/contrib/python/pyparsing/py3/pyparsing/diagram/__init__.py +++ b/contrib/python/pyparsing/py3/pyparsing/diagram/__init__.py @@ -1,20 +1,20 @@ # mypy: ignore-errors +from __future__ import annotations + import railroad import pyparsing +import dataclasses import typing from typing import ( - List, - NamedTuple, Generic, TypeVar, - Dict, Callable, - Set, Iterable, ) from jinja2 import Template from io import StringIO import inspect +import re jinja2_template_source = """\ @@ -55,14 +55,23 @@ jinja2_template_source = """\ template = Template(jinja2_template_source) -# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet -NamedDiagram = NamedTuple( - "NamedDiagram", - [("name", str), ("diagram", typing.Optional[railroad.DiagramItem]), ("index", int)], -) -""" -A simple structure for associating a name with a railroad diagram -""" + +def _collapse_verbose_regex(regex_str: str) -> str: + collapsed = pyparsing.Regex(r"#.*").suppress().transform_string(regex_str) + collapsed = re.sub(r"\s*\n\s*", "", collapsed) + return collapsed + + +@dataclasses.dataclass +class NamedDiagram: + """ + A simple structure for associating a name with a railroad diagram + """ + + name: str + index: int + diagram: railroad.DiagramItem = None + T = TypeVar("T") @@ -108,7 +117,7 @@ class EditablePartial(Generic[T]): self.kwargs = kwargs @classmethod - def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]": + def from_call(cls, func: Callable[..., T], *args, **kwargs) -> EditablePartial[T]: """ If you call this function in the same way that you would call the constructor, it will store the arguments as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3) @@ -135,7 +144,7 @@ class EditablePartial(Generic[T]): return self.func(*args, **kwargs) -def railroad_to_html(diagrams: List[NamedDiagram], embed=False, **kwargs) -> str: +def railroad_to_html(diagrams: list[NamedDiagram], embed=False, **kwargs) -> str: """ Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams :params kwargs: kwargs to be passed in to the template @@ -158,7 +167,7 @@ def railroad_to_html(diagrams: List[NamedDiagram], embed=False, **kwargs) -> str return template.render(diagrams=data, embed=embed, **kwargs) -def resolve_partial(partial: "EditablePartial[T]") -> T: +def resolve_partial(partial: EditablePartial[T]) -> T: """ Recursively resolves a collection of Partials into whatever type they are """ @@ -180,7 +189,7 @@ def to_railroad( vertical: int = 3, show_results_names: bool = False, show_groups: bool = False, -) -> List[NamedDiagram]: +) -> list[NamedDiagram]: """ Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram creation if you want to access the Railroad tree before it is converted to HTML @@ -244,40 +253,31 @@ def _should_vertical( return len(_visible_exprs(exprs)) >= specification +@dataclasses.dataclass class ElementState: """ State recorded for an individual pyparsing Element """ - # Note: this should be a dataclass, but we have to support Python 3.5 - def __init__( - self, - element: pyparsing.ParserElement, - converted: EditablePartial, - parent: EditablePartial, - number: int, - name: str = None, - parent_index: typing.Optional[int] = None, - ): - #: The pyparsing element that this represents - self.element: pyparsing.ParserElement = element - #: The name of the element - self.name: typing.Optional[str] = name - #: The output Railroad element in an unconverted state - self.converted: EditablePartial = converted - #: The parent Railroad element, which we store so that we can extract this if it's duplicated - self.parent: EditablePartial = parent - #: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram - self.number: int = number - #: The index of this inside its parent - self.parent_index: typing.Optional[int] = parent_index - #: If true, we should extract this out into a subdiagram - self.extract: bool = False - #: If true, all of this element's children have been filled out - self.complete: bool = False + #: The pyparsing element that this represents + element: pyparsing.ParserElement + #: The output Railroad element in an unconverted state + converted: EditablePartial + #: The parent Railroad element, which we store so that we can extract this if it's duplicated + parent: EditablePartial + #: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram + number: int + #: The name of the element + name: str = None + #: The index of this inside its parent + parent_index: typing.Optional[int] = None + #: If true, we should extract this out into a subdiagram + extract: bool = False + #: If true, all of this element's children have been filled out + complete: bool = False def mark_for_extraction( - self, el_id: int, state: "ConverterState", name: str = None, force: bool = False + self, el_id: int, state: ConverterState, name: str = None, force: bool = False ): """ Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram @@ -313,16 +313,16 @@ class ConverterState: def __init__(self, diagram_kwargs: typing.Optional[dict] = None): #: A dictionary mapping ParserElements to state relating to them - self._element_diagram_states: Dict[int, ElementState] = {} + self._element_diagram_states: dict[int, ElementState] = {} #: A dictionary mapping ParserElement IDs to subdiagrams generated from them - self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {} + self.diagrams: dict[int, EditablePartial[NamedDiagram]] = {} #: The index of the next unnamed element self.unnamed_index: int = 1 #: The index of the next element. This is used for sorting self.index: int = 0 #: Shared kwargs that are used to customize the construction of diagrams self.diagram_kwargs: dict = diagram_kwargs or {} - self.extracted_diagram_names: Set[str] = set() + self.extracted_diagram_names: set[str] = set() def __setitem__(self, key: int, value: ElementState): self._element_diagram_states[key] = value @@ -513,7 +513,7 @@ def _to_diagram_element( # If the element isn't worth extracting, we always treat it as the first time we say it if _worth_extracting(element): - if el_id in lookup: + if el_id in lookup and lookup[el_id].name is not None: # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate, # so we have to extract it into a new diagram. looked_up = lookup[el_id] @@ -618,6 +618,11 @@ def _to_diagram_element( ret = EditablePartial.from_call(railroad.Sequence, items=[]) elif len(exprs) > 0 and not element_results_name: ret = EditablePartial.from_call(railroad.Group, item="", label=name) + elif isinstance(element, pyparsing.Regex): + patt = _collapse_verbose_regex(element.pattern) + element.pattern = patt + element._defaultName = None + ret = EditablePartial.from_call(railroad.Terminal, element.defaultName) elif len(exprs) > 0: ret = EditablePartial.from_call(railroad.Sequence, items=[]) else: diff --git a/contrib/python/pyparsing/py3/pyparsing/exceptions.py b/contrib/python/pyparsing/py3/pyparsing/exceptions.py index 8db34f195a..57a1579d12 100644 --- a/contrib/python/pyparsing/py3/pyparsing/exceptions.py +++ b/contrib/python/pyparsing/py3/pyparsing/exceptions.py @@ -1,17 +1,20 @@ # exceptions.py +from __future__ import annotations +import copy import re import sys import typing +from functools import cached_property +from .unicode import pyparsing_unicode as ppu from .util import ( + _collapse_string_to_ranges, col, line, lineno, - _collapse_string_to_ranges, replaced_by_pep8, ) -from .unicode import pyparsing_unicode as ppu class _ExceptionWordUnicodeSet( @@ -31,7 +34,7 @@ class ParseBaseException(Exception): msg: str pstr: str parser_element: typing.Any # "ParserElement" - args: typing.Tuple[str, int, typing.Optional[str]] + args: tuple[str, int, typing.Optional[str]] __slots__ = ( "loc", @@ -50,18 +53,17 @@ class ParseBaseException(Exception): msg: typing.Optional[str] = None, elem=None, ): - self.loc = loc if msg is None: - self.msg = pstr - self.pstr = "" - else: - self.msg = msg - self.pstr = pstr + msg, pstr = pstr, "" + + self.loc = loc + self.msg = msg + self.pstr = pstr self.parser_element = elem self.args = (pstr, loc, msg) @staticmethod - def explain_exception(exc, depth=16): + def explain_exception(exc: Exception, depth: int = 16) -> str: """ Method to take an exception and translate the Python internal traceback into a list of the pyparsing expressions that caused the exception to be raised. @@ -82,17 +84,17 @@ class ParseBaseException(Exception): if depth is None: depth = sys.getrecursionlimit() - ret = [] + ret: list[str] = [] if isinstance(exc, ParseBaseException): ret.append(exc.line) ret.append(f"{' ' * (exc.column - 1)}^") ret.append(f"{type(exc).__name__}: {exc}") - if depth <= 0: + if depth <= 0 or exc.__traceback__ is None: return "\n".join(ret) callers = inspect.getinnerframes(exc.__traceback__, context=depth) - seen = set() + seen: set[int] = set() for ff in callers[-depth:]: frm = ff[0] @@ -125,41 +127,58 @@ class ParseBaseException(Exception): return "\n".join(ret) @classmethod - def _from_exception(cls, pe): + def _from_exception(cls, pe) -> ParseBaseException: """ internal factory method to simplify creating one type of ParseException from another - avoids having __init__ signature conflicts among subclasses """ return cls(pe.pstr, pe.loc, pe.msg, pe.parser_element) - @property + @cached_property def line(self) -> str: """ Return the line of text where the exception occurred. """ return line(self.loc, self.pstr) - @property + @cached_property def lineno(self) -> int: """ Return the 1-based line number of text where the exception occurred. """ return lineno(self.loc, self.pstr) - @property + @cached_property def col(self) -> int: """ Return the 1-based column on the line of text where the exception occurred. """ return col(self.loc, self.pstr) - @property + @cached_property def column(self) -> int: """ Return the 1-based column on the line of text where the exception occurred. """ return col(self.loc, self.pstr) + @cached_property + def found(self) -> str: + if not self.pstr: + return "" + + if self.loc >= len(self.pstr): + return "end of text" + + # pull out next word at error location + found_match = _exception_word_extractor.match(self.pstr, self.loc) + if found_match is not None: + found_text = found_match.group(0) + else: + found_text = self.pstr[self.loc : self.loc + 1] + + return repr(found_text).replace(r"\\", "\\") + # pre-PEP8 compatibility @property def parserElement(self): @@ -169,21 +188,15 @@ class ParseBaseException(Exception): def parserElement(self, elem): self.parser_element = elem + def copy(self): + return copy.copy(self) + + def formatted_message(self) -> str: + found_phrase = f", found {self.found}" if self.found else "" + return f"{self.msg}{found_phrase} (at char {self.loc}), (line:{self.lineno}, col:{self.column})" + def __str__(self) -> str: - if self.pstr: - if self.loc >= len(self.pstr): - foundstr = ", found end of text" - else: - # pull out next word at error location - found_match = _exception_word_extractor.match(self.pstr, self.loc) - if found_match is not None: - found = found_match.group(0) - else: - found = self.pstr[self.loc : self.loc + 1] - foundstr = (", found %r" % found).replace(r"\\", "\\") - else: - foundstr = "" - return f"{self.msg}{foundstr} (at char {self.loc}), (line:{self.lineno}, col:{self.column})" + return self.formatted_message() def __repr__(self): return str(self) @@ -199,12 +212,10 @@ class ParseBaseException(Exception): line_str = self.line line_column = self.column - 1 if markerString: - line_str = "".join( - (line_str[:line_column], markerString, line_str[line_column:]) - ) + line_str = f"{line_str[:line_column]}{markerString}{line_str[line_column:]}" return line_str.strip() - def explain(self, depth=16) -> str: + def explain(self, depth: int = 16) -> str: """ Method to translate the Python internal traceback into a list of the pyparsing expressions that caused the exception to be raised. @@ -292,6 +303,8 @@ class RecursiveGrammarException(Exception): Exception thrown by :class:`ParserElement.validate` if the grammar could be left-recursive; parser may need to enable left recursion using :class:`ParserElement.enable_left_recursion<ParserElement.enable_left_recursion>` + + Deprecated: only used by deprecated method ParserElement.validate. """ def __init__(self, parseElementList): diff --git a/contrib/python/pyparsing/py3/pyparsing/helpers.py b/contrib/python/pyparsing/py3/pyparsing/helpers.py index d5d14a08d6..d2bd05f3d3 100644 --- a/contrib/python/pyparsing/py3/pyparsing/helpers.py +++ b/contrib/python/pyparsing/py3/pyparsing/helpers.py @@ -1,5 +1,6 @@ # helpers.py import html.entities +import operator import re import sys import typing @@ -10,6 +11,7 @@ from .util import ( _bslash, _flatten, _escape_regex_range_chars, + make_compressed_re, replaced_by_pep8, ) @@ -203,15 +205,15 @@ def one_of( ) if caseless: - isequal = lambda a, b: a.upper() == b.upper() + is_equal = lambda a, b: a.upper() == b.upper() masks = lambda a, b: b.upper().startswith(a.upper()) - parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral + parse_element_class = CaselessKeyword if asKeyword else CaselessLiteral else: - isequal = lambda a, b: a == b + is_equal = operator.eq masks = lambda a, b: b.startswith(a) - parseElementClass = Keyword if asKeyword else Literal + parse_element_class = Keyword if asKeyword else Literal - symbols: List[str] = [] + symbols: list[str] if isinstance(strs, str_type): strs = typing.cast(str, strs) symbols = strs.split() @@ -224,20 +226,19 @@ def one_of( # reorder given symbols to take care to avoid masking longer choices with shorter ones # (but only if the given symbols are not just single characters) - if any(len(sym) > 1 for sym in symbols): - i = 0 - while i < len(symbols) - 1: - cur = symbols[i] - for j, other in enumerate(symbols[i + 1 :]): - if isequal(other, cur): - del symbols[i + j + 1] - break - if masks(cur, other): - del symbols[i + j + 1] - symbols.insert(i, other) - break - else: - i += 1 + i = 0 + while i < len(symbols) - 1: + cur = symbols[i] + for j, other in enumerate(symbols[i + 1 :]): + if is_equal(other, cur): + del symbols[i + j + 1] + break + if len(other) > len(cur) and masks(cur, other): + del symbols[i + j + 1] + symbols.insert(i, other) + break + else: + i += 1 if useRegex: re_flags: int = re.IGNORECASE if caseless else 0 @@ -269,7 +270,7 @@ def one_of( ) # last resort, just use MatchFirst - return MatchFirst(parseElementClass(sym) for sym in symbols).set_name( + return MatchFirst(parse_element_class(sym) for sym in symbols).set_name( " | ".join(symbols) ) @@ -602,7 +603,7 @@ def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")) def make_html_tags( tag_str: Union[str, ParserElement] -) -> Tuple[ParserElement, ParserElement]: +) -> tuple[ParserElement, ParserElement]: """Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. @@ -629,7 +630,7 @@ def make_html_tags( def make_xml_tags( tag_str: Union[str, ParserElement] -) -> Tuple[ParserElement, ParserElement]: +) -> tuple[ParserElement, ParserElement]: """Helper to construct opening and closing tag expressions for XML, given a tag name. Matches tags only in the given upper/lower case. @@ -645,9 +646,12 @@ any_open_tag, any_close_tag = make_html_tags( ) _htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()} -common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name( - "common HTML entity" +_most_common_entities = "nbsp lt gt amp quot apos cent pound euro copy".replace( + " ", "|" ) +common_html_entity = Regex( + lambda: f"&(?P<entity>{_most_common_entities}|{make_compressed_re(_htmlEntityMap)});" +).set_name("common HTML entity") def replace_html_entity(s, l, t): @@ -664,16 +668,16 @@ class OpAssoc(Enum): InfixNotationOperatorArgType = Union[ - ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]] + ParserElement, str, tuple[Union[ParserElement, str], Union[ParserElement, str]] ] InfixNotationOperatorSpec = Union[ - Tuple[ + tuple[ InfixNotationOperatorArgType, int, OpAssoc, typing.Optional[ParseAction], ], - Tuple[ + tuple[ InfixNotationOperatorArgType, int, OpAssoc, @@ -683,7 +687,7 @@ InfixNotationOperatorSpec = Union[ def infix_notation( base_expr: ParserElement, - op_list: List[InfixNotationOperatorSpec], + op_list: list[InfixNotationOperatorSpec], lpar: Union[str, ParserElement] = Suppress("("), rpar: Union[str, ParserElement] = Suppress(")"), ) -> ParserElement: @@ -1032,7 +1036,7 @@ python_style_comment = Regex(r"#.*").set_name("Python style comment") # build list of built-in expressions, for future reference if a global default value # gets updated -_builtin_exprs: List[ParserElement] = [ +_builtin_exprs: list[ParserElement] = [ v for v in vars().values() if isinstance(v, ParserElement) ] diff --git a/contrib/python/pyparsing/py3/pyparsing/results.py b/contrib/python/pyparsing/py3/pyparsing/results.py index 3bb7c948e0..245847832a 100644 --- a/contrib/python/pyparsing/py3/pyparsing/results.py +++ b/contrib/python/pyparsing/py3/pyparsing/results.py @@ -1,4 +1,7 @@ # results.py +from __future__ import annotations + +import collections from collections.abc import ( MutableMapping, Mapping, @@ -7,21 +10,21 @@ from collections.abc import ( Iterable, ) import pprint -from typing import Tuple, Any, Dict, Set, List +from typing import Any from .util import replaced_by_pep8 -str_type: Tuple[type, ...] = (str, bytes) +str_type: tuple[type, ...] = (str, bytes) _generator_type = type((_ for _ in ())) class _ParseResultsWithOffset: - tup: Tuple["ParseResults", int] + tup: tuple[ParseResults, int] __slots__ = ["tup"] - def __init__(self, p1: "ParseResults", p2: int): - self.tup: Tuple[ParseResults, int] = (p1, p2) + def __init__(self, p1: ParseResults, p2: int): + self.tup: tuple[ParseResults, int] = (p1, p2) def __getitem__(self, i): return self.tup[i] @@ -79,14 +82,14 @@ class ParseResults: - year: '1999' """ - _null_values: Tuple[Any, ...] = (None, [], ()) + _null_values: tuple[Any, ...] = (None, [], ()) _name: str - _parent: "ParseResults" - _all_names: Set[str] + _parent: ParseResults + _all_names: set[str] _modal: bool - _toklist: List[Any] - _tokdict: Dict[str, Any] + _toklist: list[Any] + _tokdict: dict[str, Any] __slots__ = ( "_name", @@ -172,8 +175,8 @@ class ParseResults: # constructor as small and fast as possible def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance - ): - self._tokdict: Dict[str, _ParseResultsWithOffset] + ) -> None: + self._tokdict: dict[str, _ParseResultsWithOffset] self._modal = modal if name is None or name == "": @@ -226,7 +229,7 @@ class ParseResults: self._toklist[k] = v sub = v else: - self._tokdict[k] = self._tokdict.get(k, list()) + [ + self._tokdict[k] = self._tokdict.get(k, []) + [ _ParseResultsWithOffset(v, 0) ] sub = v @@ -443,12 +446,12 @@ class ParseResults: raise AttributeError(name) return "" - def __add__(self, other: "ParseResults") -> "ParseResults": + def __add__(self, other: ParseResults) -> ParseResults: ret = self.copy() ret += other return ret - def __iadd__(self, other: "ParseResults") -> "ParseResults": + def __iadd__(self, other: ParseResults) -> ParseResults: if not other: return self @@ -470,7 +473,7 @@ class ParseResults: self._all_names |= other._all_names return self - def __radd__(self, other) -> "ParseResults": + def __radd__(self, other) -> ParseResults: if isinstance(other, int) and other == 0: # useful for merging many ParseResults using sum() builtin return self.copy() @@ -504,9 +507,10 @@ class ParseResults: out.append(str(item)) return out - def as_list(self) -> list: + def as_list(self, *, flatten: bool = False) -> list: """ Returns the parse results as a nested list of matching tokens, all converted to strings. + If flatten is True, all the nesting levels in the returned list are collapsed. Example:: @@ -519,10 +523,22 @@ class ParseResults: result_list = result.as_list() print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] """ - return [ - res.as_list() if isinstance(res, ParseResults) else res - for res in self._toklist - ] + def flattened(pr): + to_visit = collections.deque([*self]) + while to_visit: + to_do = to_visit.popleft() + if isinstance(to_do, ParseResults): + to_visit.extendleft(to_do[::-1]) + else: + yield to_do + + if flatten: + return [*flattened(self)] + else: + return [ + res.as_list() if isinstance(res, ParseResults) else res + for res in self._toklist + ] def as_dict(self) -> dict: """ @@ -553,7 +569,7 @@ class ParseResults: return dict((k, to_item(v)) for k, v in self.items()) - def copy(self) -> "ParseResults": + def copy(self) -> ParseResults: """ Returns a new shallow copy of a :class:`ParseResults` object. `ParseResults` items contained within the source are shared with the copy. Use @@ -567,7 +583,7 @@ class ParseResults: ret._name = self._name return ret - def deepcopy(self) -> "ParseResults": + def deepcopy(self) -> ParseResults: """ Returns a new deep copy of a :class:`ParseResults` object. """ @@ -584,11 +600,11 @@ class ParseResults: dest[k] = v.deepcopy() if isinstance(v, ParseResults) else v elif isinstance(obj, Iterable): ret._toklist[i] = type(obj)( - v.deepcopy() if isinstance(v, ParseResults) else v for v in obj + v.deepcopy() if isinstance(v, ParseResults) else v for v in obj # type: ignore[call-arg] ) return ret - def get_name(self) -> str: + def get_name(self) -> str | None: r""" Returns the results name for this token expression. Useful when several different expressions might match at a particular location. @@ -616,7 +632,7 @@ class ParseResults: if self._name: return self._name elif self._parent: - par: "ParseResults" = self._parent + par: ParseResults = self._parent parent_tokdict_items = par._tokdict.items() return next( ( @@ -761,7 +777,7 @@ class ParseResults: return dir(type(self)) + list(self.keys()) @classmethod - def from_dict(cls, other, name=None) -> "ParseResults": + def from_dict(cls, other, name=None) -> ParseResults: """ Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the name-value relations as results names. If an optional ``name`` argument is diff --git a/contrib/python/pyparsing/py3/pyparsing/testing.py b/contrib/python/pyparsing/py3/pyparsing/testing.py index 5654d47d62..836b2f86fb 100644 --- a/contrib/python/pyparsing/py3/pyparsing/testing.py +++ b/contrib/python/pyparsing/py3/pyparsing/testing.py @@ -257,10 +257,14 @@ class pyparsing_test: eol_mark: str = "|", mark_spaces: typing.Optional[str] = None, mark_control: typing.Optional[str] = None, + *, + indent: typing.Union[str, int] = "", + base_1: bool = True, ) -> str: """ Helpful method for debugging a parser - prints a string with line and column numbers. - (Line and column numbers are 1-based.) + (Line and column numbers are 1-based by default - if debugging a parse action, + pass base_1=False, to correspond to the loc value passed to the parse action.) :param s: tuple(bool, str - string to be printed with line and column numbers :param start_line: int - (optional) starting line number in s to print (default=1) @@ -273,11 +277,18 @@ class pyparsing_test: - "unicode" - replaces control chars with Unicode symbols, such as "␍" and "␊" - any single character string - replace control characters with given string - None (default) - string is displayed as-is + :param indent: str | int - (optional) string to indent with line and column numbers; if an int + is passed, converted to " " * indent + :param base_1: bool - (optional) whether to label string using base 1; if False, string will be + labeled based at 0 (default=True) :return: str - input string with leading line numbers and column number headers """ if expand_tabs: s = s.expandtabs() + if isinstance(indent, int): + indent = " " * indent + indent = indent.expandtabs() if mark_control is not None: mark_control = typing.cast(str, mark_control) if mark_control == "unicode": @@ -300,46 +311,52 @@ class pyparsing_test: else: s = s.replace(" ", mark_spaces) if start_line is None: - start_line = 1 + start_line = 0 if end_line is None: end_line = len(s) end_line = min(end_line, len(s)) - start_line = min(max(1, start_line), end_line) + start_line = min(max(0, start_line), end_line) if mark_control != "unicode": - s_lines = s.splitlines()[start_line - 1 : end_line] + s_lines = s.splitlines()[start_line - base_1 : end_line] else: - s_lines = [line + "␊" for line in s.split("␊")[start_line - 1 : end_line]] + s_lines = [ + line + "␊" for line in s.split("␊")[start_line - base_1 : end_line] + ] if not s_lines: return "" lineno_width = len(str(end_line)) max_line_len = max(len(line) for line in s_lines) - lead = " " * (lineno_width + 1) + lead = indent + " " * (lineno_width + 1) if max_line_len >= 99: header0 = ( lead + + ("" if base_1 else " ") + "".join( f"{' ' * 99}{(i + 1) % 100}" - for i in range(max(max_line_len // 100, 1)) + for i in range(1 if base_1 else 0, max(max_line_len // 100, 1)) ) + "\n" ) else: header0 = "" header1 = ( - header0 + ("" if base_1 else " ") + lead + "".join(f" {(i + 1) % 10}" for i in range(-(-max_line_len // 10))) + "\n" ) - header2 = lead + "1234567890" * (-(-max_line_len // 10)) + "\n" + digits = "1234567890" + header2 = ( + lead + ("" if base_1 else "0") + digits * (-(-max_line_len // 10)) + "\n" + ) return ( header1 + header2 + "\n".join( - f"{i:{lineno_width}d}:{line}{eol_mark}" - for i, line in enumerate(s_lines, start=start_line) + f"{indent}{i:{lineno_width}d}:{line}{eol_mark}" + for i, line in enumerate(s_lines, start=start_line + base_1) ) + "\n" ) diff --git a/contrib/python/pyparsing/py3/pyparsing/unicode.py b/contrib/python/pyparsing/py3/pyparsing/unicode.py index 0e3e06572b..066486c28e 100644 --- a/contrib/python/pyparsing/py3/pyparsing/unicode.py +++ b/contrib/python/pyparsing/py3/pyparsing/unicode.py @@ -2,7 +2,7 @@ import sys from itertools import filterfalse -from typing import List, Tuple, Union +from typing import Union class _lazyclassproperty: @@ -25,7 +25,7 @@ class _lazyclassproperty: return cls._intern[attrname] -UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]] +UnicodeRangeList = list[Union[tuple[int, int], tuple[int]]] class unicode_set: @@ -53,9 +53,9 @@ class unicode_set: _ranges: UnicodeRangeList = [] @_lazyclassproperty - def _chars_for_ranges(cls) -> List[str]: - ret: List[int] = [] - for cc in cls.__mro__: + def _chars_for_ranges(cls) -> list[str]: + ret: list[int] = [] + for cc in cls.__mro__: # type: ignore[attr-defined] if cc is unicode_set: break for rr in getattr(cc, "_ranges", ()): diff --git a/contrib/python/pyparsing/py3/pyparsing/util.py b/contrib/python/pyparsing/py3/pyparsing/util.py index 94837fea0f..1487019c27 100644 --- a/contrib/python/pyparsing/py3/pyparsing/util.py +++ b/contrib/python/pyparsing/py3/pyparsing/util.py @@ -1,11 +1,11 @@ # util.py +import contextlib +from functools import lru_cache, wraps import inspect -import warnings -import types -import collections import itertools -from functools import lru_cache, wraps -from typing import Callable, List, Union, Iterable, TypeVar, cast +import types +from typing import Callable, Union, Iterable, TypeVar, cast +import warnings _bslash = chr(92) C = TypeVar("C", bound=Callable) @@ -14,8 +14,8 @@ C = TypeVar("C", bound=Callable) class __config_flags: """Internal class for defining compatibility and debugging flags""" - _all_names: List[str] = [] - _fixed_names: List[str] = [] + _all_names: list[str] = [] + _fixed_names: list[str] = [] _type_desc = "configuration" @classmethod @@ -100,27 +100,24 @@ class _UnboundedCache: class _FifoCache: def __init__(self, size): - self.not_in_cache = not_in_cache = object() cache = {} - keyring = [object()] * size + self.size = size + self.not_in_cache = not_in_cache = object() cache_get = cache.get cache_pop = cache.pop - keyiter = itertools.cycle(range(size)) def get(_, key): return cache_get(key, not_in_cache) def set_(_, key, value): cache[key] = value - i = next(keyiter) - cache_pop(keyring[i], None) - keyring[i] = key + while len(cache) > size: + # pop oldest element in cache by getting the first key + cache_pop(next(iter(cache))) def clear(_): cache.clear() - keyring[:] = [object()] * size - self.size = size self.get = types.MethodType(get, self) self.set = types.MethodType(set_, self) self.clear = types.MethodType(clear, self) @@ -137,13 +134,13 @@ class LRUMemo: def __init__(self, capacity): self._capacity = capacity self._active = {} - self._memory = collections.OrderedDict() + self._memory = {} def __getitem__(self, key): try: return self._active[key] except KeyError: - self._memory.move_to_end(key) + self._memory[key] = self._memory.pop(key) return self._memory[key] def __setitem__(self, key, value): @@ -156,8 +153,9 @@ class LRUMemo: except KeyError: pass else: - while len(self._memory) >= self._capacity: - self._memory.popitem(last=False) + oldest_keys = list(self._memory)[: -(self._capacity + 1)] + for key_to_delete in oldest_keys: + self._memory.pop(key_to_delete) self._memory[key] = value def clear(self): @@ -183,60 +181,182 @@ def _escape_regex_range_chars(s: str) -> str: return str(s) +class _GroupConsecutive: + """ + Used as a callable `key` for itertools.groupby to group + characters that are consecutive: + itertools.groupby("abcdejkmpqrs", key=IsConsecutive()) + yields: + (0, iter(['a', 'b', 'c', 'd', 'e'])) + (1, iter(['j', 'k'])) + (2, iter(['m'])) + (3, iter(['p', 'q', 'r', 's'])) + """ + def __init__(self): + self.prev = 0 + self.counter = itertools.count() + self.value = -1 + + def __call__(self, char: str) -> int: + c_int = ord(char) + self.prev, prev = c_int, self.prev + if c_int - prev > 1: + self.value = next(self.counter) + return self.value + + def _collapse_string_to_ranges( s: Union[str, Iterable[str]], re_escape: bool = True ) -> str: - def is_consecutive(c): - c_int = ord(c) - is_consecutive.prev, prev = c_int, is_consecutive.prev - if c_int - prev > 1: - is_consecutive.value = next(is_consecutive.counter) - return is_consecutive.value + r""" + Take a string or list of single-character strings, and return + a string of the consecutive characters in that string collapsed + into groups, as might be used in a regular expression '[a-z]' + character set: + 'a' -> 'a' -> '[a]' + 'bc' -> 'bc' -> '[bc]' + 'defgh' -> 'd-h' -> '[d-h]' + 'fdgeh' -> 'd-h' -> '[d-h]' + 'jklnpqrtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]' + Duplicates get collapsed out: + 'aaa' -> 'a' -> '[a]' + 'bcbccb' -> 'bc' -> '[bc]' + 'defghhgf' -> 'd-h' -> '[d-h]' + 'jklnpqrjjjtu' -> 'j-lnp-rtu' -> '[j-lnp-rtu]' + Spaces are preserved: + 'ab c' -> ' a-c' -> '[ a-c]' + Characters that are significant when defining regex ranges + get escaped: + 'acde[]-' -> r'\-\[\]ac-e' -> r'[\-\[\]ac-e]' + """ - is_consecutive.prev = 0 # type: ignore [attr-defined] - is_consecutive.counter = itertools.count() # type: ignore [attr-defined] - is_consecutive.value = -1 # type: ignore [attr-defined] + # Developer notes: + # - Do not optimize this code assuming that the given input string + # or internal lists will be short (such as in loading generators into + # lists to make it easier to find the last element); this method is also + # used to generate regex ranges for character sets in the pyparsing.unicode + # classes, and these can be _very_ long lists of strings - def escape_re_range_char(c): + def escape_re_range_char(c: str) -> str: return "\\" + c if c in r"\^-][" else c - def no_escape_re_range_char(c): + def no_escape_re_range_char(c: str) -> str: return c if not re_escape: escape_re_range_char = no_escape_re_range_char ret = [] - s = "".join(sorted(set(s))) - if len(s) > 3: - for _, chars in itertools.groupby(s, key=is_consecutive): + + # reduce input string to remove duplicates, and put in sorted order + s_chars: list[str] = sorted(set(s)) + + if len(s_chars) > 2: + # find groups of characters that are consecutive (can be collapsed + # down to "<first>-<last>") + for _, chars in itertools.groupby(s_chars, key=_GroupConsecutive()): + # _ is unimportant, is just used to identify groups + # chars is an iterator of one or more consecutive characters + # that comprise the current group first = last = next(chars) - last = collections.deque( - itertools.chain(iter([last]), chars), maxlen=1 - ).pop() + with contextlib.suppress(ValueError): + *_, last = chars + if first == last: + # there was only a single char in this group ret.append(escape_re_range_char(first)) + + elif last == chr(ord(first) + 1): + # there were only 2 characters in this group + # 'a','b' -> 'ab' + ret.append(f"{escape_re_range_char(first)}{escape_re_range_char(last)}") + else: - sep = "" if ord(last) == ord(first) + 1 else "-" + # there were > 2 characters in this group, make into a range + # 'c','d','e' -> 'c-e' ret.append( - f"{escape_re_range_char(first)}{sep}{escape_re_range_char(last)}" + f"{escape_re_range_char(first)}-{escape_re_range_char(last)}" ) else: - ret = [escape_re_range_char(c) for c in s] + # only 1 or 2 chars were given to form into groups + # 'a' -> ['a'] + # 'bc' -> ['b', 'c'] + # 'dg' -> ['d', 'g'] + # no need to list them with "-", just return as a list + # (after escaping) + ret = [escape_re_range_char(c) for c in s_chars] return "".join(ret) -def _flatten(ll: list) -> list: +def _flatten(ll: Iterable) -> list: ret = [] - for i in ll: - if isinstance(i, list): - ret.extend(_flatten(i)) + to_visit = [*ll] + while to_visit: + i = to_visit.pop(0) + if isinstance(i, Iterable) and not isinstance(i, str): + to_visit[:0] = i else: ret.append(i) return ret +def make_compressed_re( + word_list: Iterable[str], max_level: int = 2, _level: int = 1 +) -> str: + """ + Create a regular expression string from a list of words, collapsing by common + prefixes and optional suffixes. + + Calls itself recursively to build nested sublists for each group of suffixes + that have a shared prefix. + """ + + def get_suffixes_from_common_prefixes(namelist: list[str]): + if len(namelist) > 1: + for prefix, suffixes in itertools.groupby(namelist, key=lambda s: s[:1]): + yield prefix, sorted([s[1:] for s in suffixes], key=len, reverse=True) + else: + yield namelist[0][0], [namelist[0][1:]] + + if max_level == 0: + return "|".join(sorted(word_list, key=len, reverse=True)) + + ret = [] + sep = "" + for initial, suffixes in get_suffixes_from_common_prefixes(sorted(word_list)): + ret.append(sep) + sep = "|" + + trailing = "" + if "" in suffixes: + trailing = "?" + suffixes.remove("") + + if len(suffixes) > 1: + if all(len(s) == 1 for s in suffixes): + ret.append(f"{initial}[{''.join(suffixes)}]{trailing}") + else: + if _level < max_level: + suffix_re = make_compressed_re( + sorted(suffixes), max_level, _level + 1 + ) + ret.append(f"{initial}({suffix_re}){trailing}") + else: + suffixes.sort(key=len, reverse=True) + ret.append(f"{initial}({'|'.join(suffixes)}){trailing}") + else: + if suffixes: + suffix = suffixes[0] + if len(suffix) > 1 and trailing: + ret.append(f"{initial}({suffix}){trailing}") + else: + ret.append(f"{initial}{suffix}{trailing}") + else: + ret.append(initial) + return "".join(ret) + + def replaced_by_pep8(compat_name: str, fn: C) -> C: # In a future version, uncomment the code in the internal _inner() functions # to begin emitting DeprecationWarnings. @@ -268,10 +388,10 @@ def replaced_by_pep8(compat_name: str, fn: C) -> C: _inner.__name__ = compat_name _inner.__annotations__ = fn.__annotations__ if isinstance(fn, types.FunctionType): - _inner.__kwdefaults__ = fn.__kwdefaults__ + _inner.__kwdefaults__ = fn.__kwdefaults__ # type: ignore [attr-defined] elif isinstance(fn, type) and hasattr(fn, "__init__"): - _inner.__kwdefaults__ = fn.__init__.__kwdefaults__ + _inner.__kwdefaults__ = fn.__init__.__kwdefaults__ # type: ignore [misc,attr-defined] else: - _inner.__kwdefaults__ = None + _inner.__kwdefaults__ = None # type: ignore [attr-defined] _inner.__qualname__ = fn.__qualname__ return cast(C, _inner) diff --git a/contrib/python/pyparsing/py3/ya.make b/contrib/python/pyparsing/py3/ya.make index c5575db221..1b36194fab 100644 --- a/contrib/python/pyparsing/py3/ya.make +++ b/contrib/python/pyparsing/py3/ya.make @@ -4,7 +4,7 @@ PY3_LIBRARY() PROVIDES(pyparsing) -VERSION(3.1.4) +VERSION(3.2.0) LICENSE(MIT) |