diff options
| author | YDBot <[email protected]> | 2025-10-03 00:52:15 +0000 |
|---|---|---|
| committer | YDBot <[email protected]> | 2025-10-03 00:52:15 +0000 |
| commit | 073e0c88e4f1db5e8042dc7c6d4e71779eddb520 (patch) | |
| tree | b6ab74ecbe3c102ad9ce876f669b8404739ee0b4 /contrib/python | |
| parent | 713668928078465faf4a428572e1c66767117076 (diff) | |
| parent | 3845cf703c43a63764f37d7d6bd281193373147a (diff) | |
Sync branches 251003-0050
Diffstat (limited to 'contrib/python')
56 files changed, 5715 insertions, 3122 deletions
diff --git a/contrib/python/fonttools/.dist-info/METADATA b/contrib/python/fonttools/.dist-info/METADATA index 04d2cc58876..3e389d79601 100644 --- a/contrib/python/fonttools/.dist-info/METADATA +++ b/contrib/python/fonttools/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: fonttools -Version: 4.59.2 +Version: 4.60.0 Summary: Tools to manipulate font files Home-page: http://github.com/fonttools/fonttools Author: Just van Rossum @@ -211,7 +211,11 @@ are required to unlock the extra features named "ufo", etc. for Python, which internally uses `NumPy <https://pypi.python.org/pypi/numpy>`__ arrays and hence is very fast; * `munkres <https://pypi.python.org/pypi/munkres>`__: a pure-Python - module that implements the Hungarian or Kuhn-Munkres algorithm. + module that implements the Hungarian or Kuhn-Munkres algorithm. Slower than + SciPy, but useful for minimalistic systems where adding SciPy is undesirable. + + This ensures both performance (via SciPy) and minimal footprint (via Munkres) + are possible. To plot the results to a PDF or HTML format, you also need to install: @@ -388,6 +392,29 @@ Have fun! Changelog ~~~~~~~~~ +4.60.0 (released 2025-09-17) +---------------------------- + +- [pointPen] Allow ``reverseFlipped`` parameter of ``DecomposingPointPen`` to take a ``ReverseFlipped`` + enum value to control whether/how to reverse contour direction of flipped components, in addition to + the existing True/False. This allows to set ``ReverseFlipped.ON_CURVE_FIRST`` to ensure that + the decomposed outline starts with an on-curve point before being reversed, for better consistency + with other segment-oriented contour transformations. The change is backward compatible, and the + default behavior hasn't changed (#3934). +- [filterPen] Added ``ContourFilterPointPen``, base pen for buffered contour operations, and + ``OnCurveStartPointPen`` filter to ensure contours start with an on-curve point (#3934). +- [cu2qu] Fixed difference in cython vs pure-python complex division by real number (#3930). +- [varLib.avar] Refactored and added some new sub-modules and scripts (#3926). + * ``varLib.avar.build`` module to build avar (and a missing fvar) binaries into a possibly empty TTFont, + * ``varLib.avar.unbuild`` module to print a .designspace snippet that would generate the same avar binary, + * ``varLib.avar.map`` module to take TTFont and do the mapping, in user/normalized space, + * ``varLib.avar.plan`` module moved from ``varLib.avarPlanner``. + The bare ``fonttools varLib.avar`` script is deprecated, in favour of ``fonttools varLib.avar.build`` (or ``unbuild``). +- [interpolatable] Clarify ``linear_sum_assignment`` backend options and minimal dependency + usage (#3927). +- [post] Speed up ``build_psNameMapping`` (#3923). +- [ufoLib] Added typing annotations to fontTools.ufoLib (#3875). + 4.59.2 (released 2025-08-27) ---------------------------- diff --git a/contrib/python/fonttools/README.rst b/contrib/python/fonttools/README.rst index 6d638f89c0e..8a4dfc4b232 100644 --- a/contrib/python/fonttools/README.rst +++ b/contrib/python/fonttools/README.rst @@ -124,7 +124,11 @@ are required to unlock the extra features named "ufo", etc. for Python, which internally uses `NumPy <https://pypi.python.org/pypi/numpy>`__ arrays and hence is very fast; * `munkres <https://pypi.python.org/pypi/munkres>`__: a pure-Python - module that implements the Hungarian or Kuhn-Munkres algorithm. + module that implements the Hungarian or Kuhn-Munkres algorithm. Slower than + SciPy, but useful for minimalistic systems where adding SciPy is undesirable. + + This ensures both performance (via SciPy) and minimal footprint (via Munkres) + are possible. To plot the results to a PDF or HTML format, you also need to install: diff --git a/contrib/python/fonttools/fontTools/__init__.py b/contrib/python/fonttools/fontTools/__init__.py index 739f0c002c6..8e7f496ee21 100644 --- a/contrib/python/fonttools/fontTools/__init__.py +++ b/contrib/python/fonttools/fontTools/__init__.py @@ -3,6 +3,6 @@ from fontTools.misc.loggingTools import configLogger log = logging.getLogger(__name__) -version = __version__ = "4.59.2" +version = __version__ = "4.60.0" __all__ = ["version", "log", "configLogger"] diff --git a/contrib/python/fonttools/fontTools/annotations.py b/contrib/python/fonttools/fontTools/annotations.py new file mode 100644 index 00000000000..5ff5972e36c --- /dev/null +++ b/contrib/python/fonttools/fontTools/annotations.py @@ -0,0 +1,30 @@ +from __future__ import annotations +from typing import TYPE_CHECKING, Iterable, Optional, TypeVar, Union +from collections.abc import Callable, Sequence +from fontTools.misc.filesystem._base import FS +from os import PathLike +from xml.etree.ElementTree import Element as ElementTreeElement + +if TYPE_CHECKING: + from fontTools.ufoLib import UFOFormatVersion + from fontTools.ufoLib.glifLib import GLIFFormatVersion + from lxml.etree import _Element as LxmlElement + + +T = TypeVar("T") # Generic type +K = TypeVar("K") # Generic dict key type +V = TypeVar("V") # Generic dict value type + +GlyphNameToFileNameFunc = Optional[Callable[[str, set[str]], str]] +ElementType = Union[ElementTreeElement, "LxmlElement"] +FormatVersion = Union[int, tuple[int, int]] +FormatVersions = Optional[Iterable[FormatVersion]] +GLIFFormatVersionInput = Optional[Union[int, tuple[int, int], "GLIFFormatVersion"]] +UFOFormatVersionInput = Optional[Union[int, tuple[int, int], "UFOFormatVersion"]] +IntFloat = Union[int, float] +KerningPair = tuple[str, str] +KerningDict = dict[KerningPair, IntFloat] +KerningGroups = dict[str, Sequence[str]] +KerningNested = dict[str, dict[str, IntFloat]] +PathStr = Union[str, PathLike[str]] +PathOrFS = Union[PathStr, FS] diff --git a/contrib/python/fonttools/fontTools/cu2qu/cu2qu.py b/contrib/python/fonttools/fontTools/cu2qu/cu2qu.py index e2782a78666..150c03fb4a0 100644 --- a/contrib/python/fonttools/fontTools/cu2qu/cu2qu.py +++ b/contrib/python/fonttools/fontTools/cu2qu/cu2qu.py @@ -61,6 +61,23 @@ def dot(v1, v2): @cython.cfunc [email protected](z=cython.complex, den=cython.double) [email protected](zr=cython.double, zi=cython.double) +def _complex_div_by_real(z, den): + """Divide complex by real using Python's method (two separate divisions). + + This ensures bit-exact compatibility with Python's complex division, + avoiding C's multiply-by-reciprocal optimization that can cause 1 ULP differences + on some platforms/compilers (e.g. clang on macOS arm64). + + https://github.com/fonttools/fonttools/issues/3928 + """ + zr = z.real + zi = z.imag + return complex(zr / den, zi / den) + + @cython.inline @cython.locals(a=cython.complex, b=cython.complex, c=cython.complex, d=cython.complex) @cython.locals( @@ -68,8 +85,8 @@ def dot(v1, v2): ) def calc_cubic_points(a, b, c, d): _1 = d - _2 = (c / 3.0) + d - _3 = (b + c) / 3.0 + _2 + _2 = _complex_div_by_real(c, 3.0) + d + _3 = _complex_div_by_real(b + c, 3.0) + _2 _4 = a + d + c + b return _1, _2, _3, _4 diff --git a/contrib/python/fonttools/fontTools/misc/enumTools.py b/contrib/python/fonttools/fontTools/misc/enumTools.py new file mode 100644 index 00000000000..e947342f200 --- /dev/null +++ b/contrib/python/fonttools/fontTools/misc/enumTools.py @@ -0,0 +1,23 @@ +"""Enum-related utilities, including backports for older Python versions.""" + +from __future__ import annotations + +from enum import Enum + + +__all__ = ["StrEnum"] + +# StrEnum is only available in Python 3.11+ +try: + from enum import StrEnum +except ImportError: + + class StrEnum(str, Enum): + """ + Minimal backport of Python 3.11's StrEnum for older versions. + + An Enum where all members are also strings. + """ + + def __str__(self) -> str: + return self.value diff --git a/contrib/python/fonttools/fontTools/pens/filterPen.py b/contrib/python/fonttools/fontTools/pens/filterPen.py index f104e67dd35..73be8fa98e5 100644 --- a/contrib/python/fonttools/fontTools/pens/filterPen.py +++ b/contrib/python/fonttools/fontTools/pens/filterPen.py @@ -1,7 +1,11 @@ from __future__ import annotations from fontTools.pens.basePen import AbstractPen, DecomposingPen -from fontTools.pens.pointPen import AbstractPointPen, DecomposingPointPen +from fontTools.pens.pointPen import ( + AbstractPointPen, + DecomposingPointPen, + ReverseFlipped, +) from fontTools.pens.recordingPen import RecordingPen @@ -155,26 +159,61 @@ class FilterPointPen(_PassThruComponentsMixin, AbstractPointPen): def __init__(self, outPen): self._outPen = outPen - def beginPath(self, **kwargs): + def beginPath(self, identifier=None, **kwargs): + kwargs = dict(kwargs) + if identifier is not None: + kwargs["identifier"] = identifier self._outPen.beginPath(**kwargs) def endPath(self): self._outPen.endPath() - def addPoint(self, pt, segmentType=None, smooth=False, name=None, **kwargs): + def addPoint( + self, + pt, + segmentType=None, + smooth=False, + name=None, + identifier=None, + **kwargs, + ): + kwargs = dict(kwargs) + if identifier is not None: + kwargs["identifier"] = identifier self._outPen.addPoint(pt, segmentType, smooth, name, **kwargs) -class _DecomposingFilterPenMixin: - """Mixin class that decomposes components as regular contours. +class _DecomposingFilterMixinBase: + """Base mixin class with common `addComponent` logic for decomposing filter pens.""" + + def addComponent(self, baseGlyphName, transformation, **kwargs): + # only decompose the component if it's included in the set + if self.include is None or baseGlyphName in self.include: + # if we're decomposing nested components, temporarily set include to None + include_bak = self.include + if self.decomposeNested and self.include: + self.include = None + try: + super().addComponent(baseGlyphName, transformation, **kwargs) + finally: + if self.include != include_bak: + self.include = include_bak + else: + _PassThruComponentsMixin.addComponent( + self, baseGlyphName, transformation, **kwargs + ) - Shared by both DecomposingFilterPen and DecomposingFilterPointPen. - Takes two required parameters, another (segment or point) pen 'outPen' to draw +class _DecomposingFilterPenMixin(_DecomposingFilterMixinBase): + """Mixin class that decomposes components as regular contours for segment pens. + + Used by DecomposingFilterPen. + + Takes two required parameters, another segment pen 'outPen' to draw with, and a 'glyphSet' dict of drawable glyph objects to draw components from. The 'skipMissingComponents' and 'reverseFlipped' optional arguments work the - same as in the DecomposingPen/DecomposingPointPen. Both are False by default. + same as in the DecomposingPen. reverseFlipped is bool only (True/False). In addition, the decomposing filter pens also take the following two options: @@ -196,35 +235,69 @@ class _DecomposingFilterPenMixin: outPen, glyphSet, skipMissingComponents=None, - reverseFlipped=False, + reverseFlipped: bool = False, include: set[str] | None = None, decomposeNested: bool = True, + **kwargs, ): + assert isinstance( + reverseFlipped, bool + ), f"Expected bool, got {type(reverseFlipped).__name__}" super().__init__( outPen=outPen, glyphSet=glyphSet, skipMissingComponents=skipMissingComponents, reverseFlipped=reverseFlipped, + **kwargs, ) self.include = include self.decomposeNested = decomposeNested - def addComponent(self, baseGlyphName, transformation, **kwargs): - # only decompose the component if it's included in the set - if self.include is None or baseGlyphName in self.include: - # if we're decomposing nested components, temporarily set include to None - include_bak = self.include - if self.decomposeNested and self.include: - self.include = None - try: - super().addComponent(baseGlyphName, transformation, **kwargs) - finally: - if self.include != include_bak: - self.include = include_bak - else: - _PassThruComponentsMixin.addComponent( - self, baseGlyphName, transformation, **kwargs - ) + +class _DecomposingFilterPointPenMixin(_DecomposingFilterMixinBase): + """Mixin class that decomposes components as regular contours for point pens. + + Takes two required parameters, another point pen 'outPen' to draw + with, and a 'glyphSet' dict of drawable glyph objects to draw components from. + + The 'skipMissingComponents' and 'reverseFlipped' optional arguments work the + same as in the DecomposingPointPen. reverseFlipped accepts bool | ReverseFlipped + (see DecomposingPointPen). + + In addition, the decomposing filter pens also take the following two options: + + 'include' is an optional set of component base glyph names to consider for + decomposition; the default include=None means decompose all components no matter + the base glyph name). + + 'decomposeNested' (bool) controls whether to recurse decomposition into nested + components of components (this only matters when 'include' was also provided); + if False, only decompose top-level components included in the set, but not + also their children. + """ + + # raises MissingComponentError if base glyph is not found in glyphSet + skipMissingComponents = False + + def __init__( + self, + outPen, + glyphSet, + skipMissingComponents=None, + reverseFlipped: bool | ReverseFlipped = False, + include: set[str] | None = None, + decomposeNested: bool = True, + **kwargs, + ): + super().__init__( + outPen=outPen, + glyphSet=glyphSet, + skipMissingComponents=skipMissingComponents, + reverseFlipped=reverseFlipped, + **kwargs, + ) + self.include = include + self.decomposeNested = decomposeNested class DecomposingFilterPen(_DecomposingFilterPenMixin, DecomposingPen, FilterPen): @@ -234,8 +307,127 @@ class DecomposingFilterPen(_DecomposingFilterPenMixin, DecomposingPen, FilterPen class DecomposingFilterPointPen( - _DecomposingFilterPenMixin, DecomposingPointPen, FilterPointPen + _DecomposingFilterPointPenMixin, DecomposingPointPen, FilterPointPen ): """Filter point pen that draws components as regular contours.""" pass + + +class ContourFilterPointPen(_PassThruComponentsMixin, AbstractPointPen): + """A "buffered" filter point pen that accumulates contour data, passes + it through a ``filterContour`` method when the contour is closed or ended, + and finally draws the result with the output point pen. + + Components are passed through unchanged. + + The ``filterContour`` method can modify the contour in-place (return None) + or return a new contour to replace it. + """ + + def __init__(self, outPen): + self._outPen = outPen + self.currentContour = None + self.currentContourKwargs = None + + def beginPath(self, identifier=None, **kwargs): + if self.currentContour is not None: + raise ValueError("Path already begun") + kwargs = dict(kwargs) + if identifier is not None: + kwargs["identifier"] = identifier + self.currentContour = [] + self.currentContourKwargs = kwargs + + def endPath(self): + if self.currentContour is None: + raise ValueError("Path not begun") + self._flushContour() + self.currentContour = None + self.currentContourKwargs = None + + def _flushContour(self): + """Flush the current contour to the output pen.""" + result = self.filterContour(self.currentContour) + if result is not None: + self.currentContour = result + + # Draw the filtered contour + self._outPen.beginPath(**self.currentContourKwargs) + for pt, segmentType, smooth, name, kwargs in self.currentContour: + self._outPen.addPoint(pt, segmentType, smooth, name, **kwargs) + self._outPen.endPath() + + def filterContour(self, contour): + """Subclasses must override this to perform the filtering. + + The contour is a list of (pt, segmentType, smooth, name, kwargs) tuples. + If the method doesn't return a value (i.e. returns None), it's + assumed that the contour was modified in-place. + Otherwise, the return value replaces the original contour. + """ + return # or return contour + + def addPoint( + self, + pt, + segmentType=None, + smooth=False, + name=None, + identifier=None, + **kwargs, + ): + if self.currentContour is None: + raise ValueError("Path not begun") + kwargs = dict(kwargs) + if identifier is not None: + kwargs["identifier"] = identifier + self.currentContour.append((pt, segmentType, smooth, name, kwargs)) + + +class OnCurveFirstPointPen(ContourFilterPointPen): + """Filter point pen that ensures closed contours start with an on-curve point. + + If a closed contour starts with an off-curve point (segmentType=None), it rotates + the points list so that the first on-curve point (segmentType != None) becomes + the start point. Open contours and contours already starting with on-curve points + are passed through unchanged. + + >>> from fontTools.pens.recordingPen import RecordingPointPen + >>> rec = RecordingPointPen() + >>> pen = OnCurveFirstPointPen(rec) + >>> # Closed contour starting with off-curve - will be rotated + >>> pen.beginPath() + >>> pen.addPoint((0, 0), None) # off-curve + >>> pen.addPoint((100, 100), "line") # on-curve - will become start + >>> pen.addPoint((200, 0), None) # off-curve + >>> pen.addPoint((300, 100), "curve") # on-curve + >>> pen.endPath() + >>> # The contour should now start with (100, 100) "line" + >>> rec.value[0] + ('beginPath', (), {}) + >>> rec.value[1] + ('addPoint', ((100, 100), 'line', False, None), {}) + >>> rec.value[2] + ('addPoint', ((200, 0), None, False, None), {}) + >>> rec.value[3] + ('addPoint', ((300, 100), 'curve', False, None), {}) + >>> rec.value[4] + ('addPoint', ((0, 0), None, False, None), {}) + """ + + def filterContour(self, contour): + """Rotate closed contour to start with first on-curve point if needed.""" + if not contour: + return + + # Check if it's a closed contour (no "move" segmentType) + is_closed = contour[0][1] != "move" + + if is_closed and contour[0][1] is None: + # Closed contour starting with off-curve - need to rotate + # Find the first on-curve point + for i, (pt, segmentType, smooth, name, kwargs) in enumerate(contour): + if segmentType is not None: + # Rotate the points list so it starts with the first on-curve point + return contour[i:] + contour[:i] diff --git a/contrib/python/fonttools/fontTools/pens/pointPen.py b/contrib/python/fonttools/fontTools/pens/pointPen.py index 843d7a28d31..3091b86921b 100644 --- a/contrib/python/fonttools/fontTools/pens/pointPen.py +++ b/contrib/python/fonttools/fontTools/pens/pointPen.py @@ -17,6 +17,7 @@ from __future__ import annotations import math from typing import Any, Dict, List, Optional, Tuple +from fontTools.misc.enumTools import StrEnum from fontTools.misc.loggingTools import LogMixin from fontTools.misc.transform import DecomposedTransform, Identity from fontTools.pens.basePen import AbstractPen, MissingComponentError, PenError @@ -28,6 +29,7 @@ __all__ = [ "SegmentToPointPen", "GuessSmoothPointPen", "ReverseContourPointPen", + "ReverseFlipped", ] # Some type aliases to make it easier below @@ -39,6 +41,19 @@ SegmentType = Optional[str] SegmentList = List[Tuple[SegmentType, SegmentPointList]] +class ReverseFlipped(StrEnum): + """How to handle flipped components during decomposition. + + NO: Don't reverse flipped components + KEEP_START: Reverse flipped components, keeping original starting point + ON_CURVE_FIRST: Reverse flipped components, ensuring first point is on-curve + """ + + NO = "no" + KEEP_START = "keep_start" + ON_CURVE_FIRST = "on_curve_first" + + class AbstractPointPen: """Baseclass for all PointPens.""" @@ -559,15 +574,20 @@ class DecomposingPointPen(LogMixin, AbstractPointPen): glyphSet, *args, skipMissingComponents=None, - reverseFlipped=False, + reverseFlipped: bool | ReverseFlipped = False, **kwargs, ): """Takes a 'glyphSet' argument (dict), in which the glyphs that are referenced as components are looked up by their name. - If the optional 'reverseFlipped' argument is True, components whose transformation - matrix has a negative determinant will be decomposed with a reversed path direction - to compensate for the flip. + If the optional 'reverseFlipped' argument is True or a ReverseFlipped enum value, + components whose transformation matrix has a negative determinant will be decomposed + with a reversed path direction to compensate for the flip. + + The reverseFlipped parameter can be: + - False or ReverseFlipped.NO: Don't reverse flipped components + - True or ReverseFlipped.KEEP_START: Reverse, keeping original starting point + - ReverseFlipped.ON_CURVE_FIRST: Reverse, ensuring first point is on-curve The optional 'skipMissingComponents' argument can be set to True/False to override the homonymous class attribute for a given pen instance. @@ -579,7 +599,13 @@ class DecomposingPointPen(LogMixin, AbstractPointPen): if skipMissingComponents is None else skipMissingComponents ) - self.reverseFlipped = reverseFlipped + # Handle backward compatibility and validate string inputs + if reverseFlipped is False: + self.reverseFlipped = ReverseFlipped.NO + elif reverseFlipped is True: + self.reverseFlipped = ReverseFlipped.KEEP_START + else: + self.reverseFlipped = ReverseFlipped(reverseFlipped) def addComponent(self, baseGlyphName, transformation, identifier=None, **kwargs): """Transform the points of the base glyph and draw it onto self. @@ -600,10 +626,18 @@ class DecomposingPointPen(LogMixin, AbstractPointPen): pen = self if transformation != Identity: pen = TransformPointPen(pen, transformation) - if self.reverseFlipped: + if self.reverseFlipped != ReverseFlipped.NO: # if the transformation has a negative determinant, it will # reverse the contour direction of the component a, b, c, d = transformation[:4] if a * d - b * c < 0: pen = ReverseContourPointPen(pen) + + if self.reverseFlipped == ReverseFlipped.ON_CURVE_FIRST: + from fontTools.pens.filterPen import OnCurveFirstPointPen + + # Ensure the starting point is an on-curve. + # Wrap last so this filter runs first during drawPoints + pen = OnCurveFirstPointPen(pen) + glyph.drawPoints(pen) diff --git a/contrib/python/fonttools/fontTools/ttLib/tables/_p_o_s_t.py b/contrib/python/fonttools/fontTools/ttLib/tables/_p_o_s_t.py index 8aa37b6a034..1564c89347b 100644 --- a/contrib/python/fonttools/fontTools/ttLib/tables/_p_o_s_t.py +++ b/contrib/python/fonttools/fontTools/ttLib/tables/_p_o_s_t.py @@ -118,6 +118,7 @@ class table__p_o_s_t(DefaultTable.DefaultTable): def build_psNameMapping(self, ttFont): mapping = {} allNames = {} + glyphOrderNames = set(self.glyphOrder) for i in range(ttFont["maxp"].numGlyphs): glyphName = psName = self.glyphOrder[i] if glyphName == "": @@ -126,16 +127,15 @@ class table__p_o_s_t(DefaultTable.DefaultTable): if glyphName in allNames: # make up a new glyphName that's unique n = allNames[glyphName] - # check if the exists in any of the seen names or later ones - names = set(allNames.keys()) | set(self.glyphOrder) - while (glyphName + "." + str(n)) in names: + # check if the glyph name exists in the glyph order + while f"{glyphName}.{n}" in glyphOrderNames: n += 1 allNames[glyphName] = n + 1 - glyphName = glyphName + "." + str(n) + glyphName = f"{glyphName}.{n}" - self.glyphOrder[i] = glyphName allNames[glyphName] = 1 if glyphName != psName: + self.glyphOrder[i] = glyphName mapping[glyphName] = psName self.mapping = mapping diff --git a/contrib/python/fonttools/fontTools/ufoLib/__init__.py b/contrib/python/fonttools/fontTools/ufoLib/__init__.py index a6ce1434d9d..bfaff5849e4 100644 --- a/contrib/python/fonttools/fontTools/ufoLib/__init__.py +++ b/contrib/python/fonttools/fontTools/ufoLib/__init__.py @@ -32,6 +32,8 @@ Value conversion functions are available for converting - :func:`.convertFontInfoValueForAttributeFromVersion3ToVersion2` """ +from __future__ import annotations + import enum import logging import os @@ -39,19 +41,47 @@ import zipfile from collections import OrderedDict from copy import deepcopy from os import fsdecode +from typing import IO, TYPE_CHECKING, Any, Optional, Union, cast from fontTools.misc import filesystem as fs from fontTools.misc import plistlib from fontTools.ufoLib.converters import convertUFO1OrUFO2KerningToUFO3Kerning from fontTools.ufoLib.errors import UFOLibError from fontTools.ufoLib.filenames import userNameToFileName -from fontTools.ufoLib.utils import _VersionTupleEnumMixin, numberTypes +from fontTools.ufoLib.utils import ( + BaseFormatVersion, + normalizeFormatVersion, + numberTypes, +) from fontTools.ufoLib.validators import * +if TYPE_CHECKING: + from logging import Logger + + from fontTools.annotations import ( + GlyphNameToFileNameFunc, + K, + KerningDict, + KerningGroups, + KerningNested, + PathOrFS, + PathStr, + UFOFormatVersionInput, + V, + ) + from fontTools.misc.filesystem._base import FS + from fontTools.ufoLib.glifLib import GlyphSet + +KerningGroupRenameMaps = dict[str, dict[str, str]] +LibDict = dict[str, Any] +LayerOrderList = Optional[list[Optional[str]]] +AttributeDataDict = dict[str, Any] +FontInfoAttributes = dict[str, AttributeDataDict] + # client code can check this to see if the upstream `fs` package is being used haveFS = fs._haveFS -__all__ = [ +__all__: list[str] = [ "haveFS", "makeUFOPath", "UFOLibError", @@ -69,43 +99,37 @@ __all__ = [ "convertFontInfoValueForAttributeFromVersion2ToVersion1", ] -__version__ = "3.0.0" +__version__: str = "3.0.0" -logger = logging.getLogger(__name__) +logger: Logger = logging.getLogger(__name__) # --------- # Constants # --------- -DEFAULT_GLYPHS_DIRNAME = "glyphs" -DATA_DIRNAME = "data" -IMAGES_DIRNAME = "images" -METAINFO_FILENAME = "metainfo.plist" -FONTINFO_FILENAME = "fontinfo.plist" -LIB_FILENAME = "lib.plist" -GROUPS_FILENAME = "groups.plist" -KERNING_FILENAME = "kerning.plist" -FEATURES_FILENAME = "features.fea" -LAYERCONTENTS_FILENAME = "layercontents.plist" -LAYERINFO_FILENAME = "layerinfo.plist" +DEFAULT_GLYPHS_DIRNAME: str = "glyphs" +DATA_DIRNAME: str = "data" +IMAGES_DIRNAME: str = "images" +METAINFO_FILENAME: str = "metainfo.plist" +FONTINFO_FILENAME: str = "fontinfo.plist" +LIB_FILENAME: str = "lib.plist" +GROUPS_FILENAME: str = "groups.plist" +KERNING_FILENAME: str = "kerning.plist" +FEATURES_FILENAME: str = "features.fea" +LAYERCONTENTS_FILENAME: str = "layercontents.plist" +LAYERINFO_FILENAME: str = "layerinfo.plist" -DEFAULT_LAYER_NAME = "public.default" +DEFAULT_LAYER_NAME: str = "public.default" -class UFOFormatVersion(tuple, _VersionTupleEnumMixin, enum.Enum): +class UFOFormatVersion(BaseFormatVersion): FORMAT_1_0 = (1, 0) FORMAT_2_0 = (2, 0) FORMAT_3_0 = (3, 0) -# python 3.11 doesn't like when a mixin overrides a dunder method like __str__ -# for some reasons it keep using Enum.__str__, see -# https://github.com/fonttools/fonttools/pull/2655 -UFOFormatVersion.__str__ = _VersionTupleEnumMixin.__str__ - - class UFOFileStructure(enum.Enum): ZIP = "zip" PACKAGE = "package" @@ -117,7 +141,11 @@ class UFOFileStructure(enum.Enum): class _UFOBaseIO: - def getFileModificationTime(self, path): + if TYPE_CHECKING: + fs: FS + _havePreviousFile: bool + + def getFileModificationTime(self, path: PathStr) -> Optional[float]: """ Returns the modification time for the file at the given path, as a floating point number giving the number of seconds since the epoch. @@ -129,9 +157,11 @@ class _UFOBaseIO: except (fs.errors.MissingInfoNamespace, fs.errors.ResourceNotFound): return None else: - return dt.timestamp() + if dt is not None: + return dt.timestamp() + return None - def _getPlist(self, fileName, default=None): + def _getPlist(self, fileName: str, default: Optional[Any] = None) -> Any: """ Read a property list relative to the UFO filesystem's root. Raises UFOLibError if the file is missing and default is None, @@ -155,7 +185,7 @@ class _UFOBaseIO: # TODO(anthrotype): try to narrow this down a little raise UFOLibError(f"'{fileName}' could not be read on {self.fs}: {e}") - def _writePlist(self, fileName, obj): + def _writePlist(self, fileName: str, obj: Any) -> None: """ Write a property list to a file relative to the UFO filesystem's root. @@ -209,15 +239,17 @@ class UFOReader(_UFOBaseIO): ``False`` to not validate the data. """ - def __init__(self, path, validate=True): - if hasattr(path, "__fspath__"): # support os.PathLike objects + def __init__(self, path: PathOrFS, validate: bool = True) -> None: + # Only call __fspath__ if path is not already a str or FS object + if not isinstance(path, (str, fs.base.FS)) and hasattr(path, "__fspath__"): path = path.__fspath__() if isinstance(path, str): structure = _sniffFileStructure(path) + parentFS: FS try: if structure is UFOFileStructure.ZIP: - parentFS = fs.zipfs.ZipFS(path, write=False, encoding="utf-8") + parentFS = fs.zipfs.ZipFS(path, write=False, encoding="utf-8") # type: ignore[abstract] else: parentFS = fs.osfs.OSFS(path) except fs.errors.CreateFailed as e: @@ -235,7 +267,7 @@ class UFOReader(_UFOBaseIO): if len(rootDirs) == 1: # 'ClosingSubFS' ensures that the parent zip file is closed when # its root subdirectory is closed - self.fs = parentFS.opendir( + self.fs: FS = parentFS.opendir( rootDirs[0], factory=fs.subfs.ClosingSubFS ) else: @@ -247,10 +279,10 @@ class UFOReader(_UFOBaseIO): self.fs = parentFS # when passed a path string, we make sure we close the newly opened fs # upon calling UFOReader.close method or context manager's __exit__ - self._shouldClose = True + self._shouldClose: bool = True self._fileStructure = structure elif isinstance(path, fs.base.FS): - filesystem = path + filesystem: FS = path try: filesystem.check() except fs.errors.FilesystemClosed: @@ -272,9 +304,9 @@ class UFOReader(_UFOBaseIO): "Expected a path string or fs.base.FS object, found '%s'" % type(path).__name__ ) - self._path = fsdecode(path) - self._validate = validate - self._upConvertedKerningData = None + self._path: str = fsdecode(path) + self._validate: bool = validate + self._upConvertedKerningData: Optional[dict[str, Any]] = None try: self.readMetaInfo(validate=validate) @@ -284,7 +316,7 @@ class UFOReader(_UFOBaseIO): # properties - def _get_path(self): + def _get_path(self) -> str: import warnings warnings.warn( @@ -294,9 +326,9 @@ class UFOReader(_UFOBaseIO): ) return self._path - path = property(_get_path, doc="The path of the UFO (DEPRECATED).") + path: property = property(_get_path, doc="The path of the UFO (DEPRECATED).") - def _get_formatVersion(self): + def _get_formatVersion(self) -> int: import warnings warnings.warn( @@ -312,16 +344,16 @@ class UFOReader(_UFOBaseIO): ) @property - def formatVersionTuple(self): + def formatVersionTuple(self) -> tuple[int, int]: """The (major, minor) format version of the UFO. This is determined by reading metainfo.plist during __init__. """ return self._formatVersion - def _get_fileStructure(self): + def _get_fileStructure(self) -> Any: return self._fileStructure - fileStructure = property( + fileStructure: property = property( _get_fileStructure, doc=( "The file structure of the UFO: " @@ -331,7 +363,7 @@ class UFOReader(_UFOBaseIO): # up conversion - def _upConvertKerning(self, validate): + def _upConvertKerning(self, validate: bool) -> None: """ Up convert kerning and groups in UFO 1 and 2. The data will be held internally until each bit of data @@ -385,7 +417,7 @@ class UFOReader(_UFOBaseIO): # support methods - def readBytesFromPath(self, path): + def readBytesFromPath(self, path: PathStr) -> Optional[bytes]: """ Returns the bytes in the file at the given path. The path must be relative to the UFO's filesystem root. @@ -396,7 +428,9 @@ class UFOReader(_UFOBaseIO): except fs.errors.ResourceNotFound: return None - def getReadFileForPath(self, path, encoding=None): + def getReadFileForPath( + self, path: PathStr, encoding: Optional[str] = None + ) -> Optional[Union[IO[bytes], IO[str]]]: """ Returns a file (or file-like) object for the file at the given path. The path must be relative to the UFO path. @@ -417,7 +451,7 @@ class UFOReader(_UFOBaseIO): # metainfo.plist - def _readMetaInfo(self, validate=None): + def _readMetaInfo(self, validate: Optional[bool] = None) -> dict[str, Any]: """ Read metainfo.plist and return raw data. Only used for internal operations. @@ -459,7 +493,7 @@ class UFOReader(_UFOBaseIO): data["formatVersionTuple"] = formatVersion return data - def readMetaInfo(self, validate=None): + def readMetaInfo(self, validate: Optional[bool] = None) -> None: """ Read metainfo.plist and set formatVersion. Only used for internal operations. @@ -471,7 +505,7 @@ class UFOReader(_UFOBaseIO): # groups.plist - def _readGroups(self): + def _readGroups(self) -> dict[str, list[str]]: groups = self._getPlist(GROUPS_FILENAME, {}) # remove any duplicate glyphs in a kerning group for groupName, glyphList in groups.items(): @@ -479,7 +513,7 @@ class UFOReader(_UFOBaseIO): groups[groupName] = list(OrderedDict.fromkeys(glyphList)) return groups - def readGroups(self, validate=None): + def readGroups(self, validate: Optional[bool] = None) -> dict[str, list[str]]: """ Read groups.plist. Returns a dict. ``validate`` will validate the read data, by default it is set to the @@ -490,7 +524,7 @@ class UFOReader(_UFOBaseIO): # handle up conversion if self._formatVersion < UFOFormatVersion.FORMAT_3_0: self._upConvertKerning(validate) - groups = self._upConvertedKerningData["groups"] + groups = cast(dict, self._upConvertedKerningData)["groups"] # normal else: groups = self._readGroups() @@ -500,7 +534,9 @@ class UFOReader(_UFOBaseIO): raise UFOLibError(message) return groups - def getKerningGroupConversionRenameMaps(self, validate=None): + def getKerningGroupRenameMaps( + self, validate: Optional[bool] = None + ) -> KerningGroupRenameMaps: """ Get maps defining the renaming that was done during any needed kerning group conversion. This method returns a @@ -524,17 +560,17 @@ class UFOReader(_UFOBaseIO): # use the public group reader to force the load and # conversion of the data if it hasn't happened yet. self.readGroups(validate=validate) - return self._upConvertedKerningData["groupRenameMaps"] + return cast(dict, self._upConvertedKerningData)["groupRenameMaps"] # fontinfo.plist - def _readInfo(self, validate): + def _readInfo(self, validate: bool) -> dict[str, Any]: data = self._getPlist(FONTINFO_FILENAME, {}) if validate and not isinstance(data, dict): raise UFOLibError("fontinfo.plist is not properly formatted.") return data - def readInfo(self, info, validate=None): + def readInfo(self, info: Any, validate: Optional[bool] = None) -> None: """ Read fontinfo.plist. It requires an object that allows setting attributes with names that follow the fontinfo.plist @@ -593,11 +629,11 @@ class UFOReader(_UFOBaseIO): # kerning.plist - def _readKerning(self): + def _readKerning(self) -> KerningNested: data = self._getPlist(KERNING_FILENAME, {}) return data - def readKerning(self, validate=None): + def readKerning(self, validate: Optional[bool] = None) -> KerningDict: """ Read kerning.plist. Returns a dict. @@ -609,7 +645,7 @@ class UFOReader(_UFOBaseIO): # handle up conversion if self._formatVersion < UFOFormatVersion.FORMAT_3_0: self._upConvertKerning(validate) - kerningNested = self._upConvertedKerningData["kerning"] + kerningNested = cast(dict, self._upConvertedKerningData)["kerning"] # normal else: kerningNested = self._readKerning() @@ -627,7 +663,7 @@ class UFOReader(_UFOBaseIO): # lib.plist - def readLib(self, validate=None): + def readLib(self, validate: Optional[bool] = None) -> dict[str, Any]: """ Read lib.plist. Returns a dict. @@ -645,7 +681,7 @@ class UFOReader(_UFOBaseIO): # features.fea - def readFeatures(self): + def readFeatures(self) -> str: """ Read features.fea. Return a string. The returned string is empty if the file is missing. @@ -658,7 +694,7 @@ class UFOReader(_UFOBaseIO): # glyph sets & layers - def _readLayerContents(self, validate): + def _readLayerContents(self, validate: bool) -> list[tuple[str, str]]: """ Rebuild the layer contents list by checking what glyphsets are available on disk. @@ -674,7 +710,7 @@ class UFOReader(_UFOBaseIO): raise UFOLibError(error) return contents - def getLayerNames(self, validate=None): + def getLayerNames(self, validate: Optional[bool] = None) -> list[str]: """ Get the ordered layer names from layercontents.plist. @@ -687,7 +723,7 @@ class UFOReader(_UFOBaseIO): layerNames = [layerName for layerName, directoryName in layerContents] return layerNames - def getDefaultLayerName(self, validate=None): + def getDefaultLayerName(self, validate: Optional[bool] = None) -> str: """ Get the default layer name from layercontents.plist. @@ -703,7 +739,12 @@ class UFOReader(_UFOBaseIO): # this will already have been raised during __init__ raise UFOLibError("The default layer is not defined in layercontents.plist.") - def getGlyphSet(self, layerName=None, validateRead=None, validateWrite=None): + def getGlyphSet( + self, + layerName: Optional[str] = None, + validateRead: Optional[bool] = None, + validateWrite: Optional[bool] = None, + ) -> GlyphSet: """ Return the GlyphSet associated with the glyphs directory mapped to layerName @@ -744,7 +785,9 @@ class UFOReader(_UFOBaseIO): expectContentsFile=True, ) - def getCharacterMapping(self, layerName=None, validate=None): + def getCharacterMapping( + self, layerName: Optional[str] = None, validate: Optional[bool] = None + ) -> dict[int, list[str]]: """ Return a dictionary that maps unicode values (ints) to lists of glyph names. @@ -755,7 +798,7 @@ class UFOReader(_UFOBaseIO): layerName, validateRead=validate, validateWrite=True ) allUnicodes = glyphSet.getUnicodes() - cmap = {} + cmap: dict[int, list[str]] = {} for glyphName, unicodes in allUnicodes.items(): for code in unicodes: if code in cmap: @@ -766,7 +809,7 @@ class UFOReader(_UFOBaseIO): # /data - def getDataDirectoryListing(self): + def getDataDirectoryListing(self) -> list[str]: """ Returns a list of all files in the data directory. The returned paths will be relative to the UFO. @@ -787,7 +830,7 @@ class UFOReader(_UFOBaseIO): except fs.errors.ResourceError: return [] - def getImageDirectoryListing(self, validate=None): + def getImageDirectoryListing(self, validate: Optional[bool] = None) -> list[str]: """ Returns a list of all image file names in the images directory. Each of the images will @@ -823,7 +866,7 @@ class UFOReader(_UFOBaseIO): result.append(path.name) return result - def readData(self, fileName): + def readData(self, fileName: PathStr) -> bytes: """ Return bytes for the file named 'fileName' inside the 'data/' directory. """ @@ -839,7 +882,7 @@ class UFOReader(_UFOBaseIO): raise UFOLibError(f"No data file named '{fileName}' on {self.fs}") return data - def readImage(self, fileName, validate=None): + def readImage(self, fileName: PathStr, validate: Optional[bool] = None) -> bytes: """ Return image data for the file named fileName. @@ -868,14 +911,14 @@ class UFOReader(_UFOBaseIO): raise UFOLibError(error) return data - def close(self): + def close(self) -> None: if self._shouldClose: self.fs.close() - def __enter__(self): + def __enter__(self) -> UFOReader: return self - def __exit__(self, exc_type, exc_value, exc_tb): + def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> None: self.close() @@ -910,14 +953,14 @@ class UFOWriter(UFOReader): def __init__( self, - path, - formatVersion=None, - fileCreator="com.github.fonttools.ufoLib", - structure=None, - validate=True, - ): + path: PathOrFS, + formatVersion: UFOFormatVersionInput = None, + fileCreator: str = "com.github.fonttools.ufoLib", + structure: Optional[UFOFileStructure] = None, + validate: bool = True, + ) -> None: try: - formatVersion = UFOFormatVersion(formatVersion) + formatVersion = normalizeFormatVersion(formatVersion, UFOFormatVersion) except ValueError as e: from fontTools.ufoLib.errors import UnsupportedUFOFormat @@ -963,8 +1006,8 @@ class UFOWriter(UFOReader): # we can't write a zip in-place, so we have to copy its # contents to a temporary location and work from there, then # upon closing UFOWriter we create the final zip file - parentFS = fs.tempfs.TempFS() - with fs.zipfs.ZipFS(path, encoding="utf-8") as origFS: + parentFS: FS = fs.tempfs.TempFS() + with fs.zipfs.ZipFS(path, encoding="utf-8") as origFS: # type: ignore[abstract] fs.copy.copy_fs(origFS, parentFS) # if output path is an existing zip, we require that it contains # one, and only one, root directory (with arbitrary name), in turn @@ -986,7 +1029,7 @@ class UFOWriter(UFOReader): # if the output zip file didn't exist, we create the root folder; # we name it the same as input 'path', but with '.ufo' extension rootDir = os.path.splitext(os.path.basename(path))[0] + ".ufo" - parentFS = fs.zipfs.ZipFS(path, write=True, encoding="utf-8") + parentFS = fs.zipfs.ZipFS(path, write=True, encoding="utf-8") # type: ignore[abstract] parentFS.makedir(rootDir) # 'ClosingSubFS' ensures that the parent filesystem is closed # when its root subdirectory is closed @@ -997,7 +1040,7 @@ class UFOWriter(UFOReader): self._havePreviousFile = havePreviousFile self._shouldClose = True elif isinstance(path, fs.base.FS): - filesystem = path + filesystem: FS = path try: filesystem.check() except fs.errors.FilesystemClosed: @@ -1032,7 +1075,7 @@ class UFOWriter(UFOReader): self._path = fsdecode(path) self._formatVersion = formatVersion self._fileCreator = fileCreator - self._downConversionKerningData = None + self._downConversionKerningData: Optional[KerningGroupRenameMaps] = None self._validate = validate # if the file already exists, get the format version. # this will be needed for up and down conversion. @@ -1050,7 +1093,7 @@ class UFOWriter(UFOReader): "that is trying to be written. This is not supported." ) # handle the layer contents - self.layerContents = {} + self.layerContents: Union[dict[str, str], OrderedDict[str, str]] = {} if previousFormatVersion is not None and previousFormatVersion.major >= 3: # already exists self.layerContents = OrderedDict(self._readLayerContents(validate)) @@ -1064,17 +1107,19 @@ class UFOWriter(UFOReader): # properties - def _get_fileCreator(self): + def _get_fileCreator(self) -> str: return self._fileCreator - fileCreator = property( + fileCreator: property = property( _get_fileCreator, doc="The file creator of the UFO. This is set into metainfo.plist during __init__.", ) # support methods for file system interaction - def copyFromReader(self, reader, sourcePath, destPath): + def copyFromReader( + self, reader: UFOReader, sourcePath: PathStr, destPath: PathStr + ) -> None: """ Copy the sourcePath in the provided UFOReader to destPath in this writer. The paths must be relative. This works with @@ -1097,7 +1142,7 @@ class UFOWriter(UFOReader): else: fs.copy.copy_file(reader.fs, sourcePath, self.fs, destPath) - def writeBytesToPath(self, path, data): + def writeBytesToPath(self, path: PathStr, data: bytes) -> None: """ Write bytes to a path relative to the UFO filesystem's root. If writing to an existing UFO, check to see if data matches the data @@ -1117,7 +1162,12 @@ class UFOWriter(UFOReader): self.fs.makedirs(fs.path.dirname(path), recreate=True) self.fs.writebytes(path, data) - def getFileObjectForPath(self, path, mode="w", encoding=None): + def getFileObjectForPath( + self, + path: PathStr, + mode: str = "w", + encoding: Optional[str] = None, + ) -> Optional[IO[Any]]: """ Returns a file (or file-like) object for the file at the given path. The path must be relative @@ -1140,9 +1190,12 @@ class UFOWriter(UFOReader): self.fs.makedirs(fs.path.dirname(path), recreate=True) return self.fs.open(path, mode=mode, encoding=encoding) except fs.errors.ResourceError as e: - return UFOLibError(f"unable to open '{path}' on {self.fs}: {e}") + raise UFOLibError(f"unable to open '{path}' on {self.fs}: {e}") + return None - def removePath(self, path, force=False, removeEmptyParents=True): + def removePath( + self, path: PathStr, force: bool = False, removeEmptyParents: bool = True + ) -> None: """ Remove the file (or directory) at path. The path must be relative to the UFO. @@ -1169,7 +1222,7 @@ class UFOWriter(UFOReader): # UFO mod time - def setModificationTime(self): + def setModificationTime(self) -> None: """ Set the UFO modification time to the current time. This is never called automatically. It is up to the @@ -1185,7 +1238,7 @@ class UFOWriter(UFOReader): # metainfo.plist - def _writeMetaInfo(self): + def _writeMetaInfo(self) -> None: metaInfo = dict( creator=self._fileCreator, formatVersion=self._formatVersion.major, @@ -1196,7 +1249,7 @@ class UFOWriter(UFOReader): # groups.plist - def setKerningGroupConversionRenameMaps(self, maps): + def setKerningGroupConversionRenameMaps(self, maps: KerningGroupRenameMaps) -> None: """ Set maps defining the renaming that should be done when writing groups and kerning in UFO 1 and UFO 2. @@ -1210,7 +1263,7 @@ class UFOWriter(UFOReader): } This is the same form returned by UFOReader's - getKerningGroupConversionRenameMaps method. + getKerningGroupRenameMaps method. """ if self._formatVersion >= UFOFormatVersion.FORMAT_3_0: return # XXX raise an error here @@ -1221,7 +1274,9 @@ class UFOWriter(UFOReader): remap[dataName] = writeName self._downConversionKerningData = dict(groupRenameMap=remap) - def writeGroups(self, groups, validate=None): + def writeGroups( + self, groups: KerningGroups, validate: Optional[bool] = None + ) -> None: """ Write groups.plist. This method requires a dict of glyph groups as an argument. @@ -1276,7 +1331,7 @@ class UFOWriter(UFOReader): # fontinfo.plist - def writeInfo(self, info, validate=None): + def writeInfo(self, info: Any, validate: Optional[bool] = None) -> None: """ Write info.plist. This method requires an object that supports getting attributes that follow the @@ -1322,7 +1377,9 @@ class UFOWriter(UFOReader): # kerning.plist - def writeKerning(self, kerning, validate=None): + def writeKerning( + self, kerning: KerningDict, validate: Optional[bool] = None + ) -> None: """ Write kerning.plist. This method requires a dict of kerning pairs as an argument. @@ -1366,7 +1423,7 @@ class UFOWriter(UFOReader): remappedKerning[side1, side2] = value kerning = remappedKerning # pack and write - kerningDict = {} + kerningDict: KerningNested = {} for left, right in kerning.keys(): value = kerning[left, right] if left not in kerningDict: @@ -1379,7 +1436,7 @@ class UFOWriter(UFOReader): # lib.plist - def writeLib(self, libDict, validate=None): + def writeLib(self, libDict: LibDict, validate: Optional[bool] = None) -> None: """ Write lib.plist. This method requires a lib dict as an argument. @@ -1400,7 +1457,7 @@ class UFOWriter(UFOReader): # features.fea - def writeFeatures(self, features, validate=None): + def writeFeatures(self, features: str, validate: Optional[bool] = None) -> None: """ Write features.fea. This method requires a features string as an argument. @@ -1419,7 +1476,9 @@ class UFOWriter(UFOReader): # glyph sets & layers - def writeLayerContents(self, layerOrder=None, validate=None): + def writeLayerContents( + self, layerOrder: LayerOrderList = None, validate: Optional[bool] = None + ) -> None: """ Write the layercontents.plist file. This method *must* be called after all glyph sets have been written. @@ -1429,7 +1488,7 @@ class UFOWriter(UFOReader): if self._formatVersion < UFOFormatVersion.FORMAT_3_0: return if layerOrder is not None: - newOrder = [] + newOrder: list[Optional[str]] = [] for layerName in layerOrder: if layerName is None: layerName = DEFAULT_LAYER_NAME @@ -1442,11 +1501,13 @@ class UFOWriter(UFOReader): "The layer order content does not match the glyph sets that have been created." ) layerContents = [ - (layerName, self.layerContents[layerName]) for layerName in layerOrder + (layerName, self.layerContents[layerName]) + for layerName in layerOrder + if layerName is not None ] self._writePlist(LAYERCONTENTS_FILENAME, layerContents) - def _findDirectoryForLayerName(self, layerName): + def _findDirectoryForLayerName(self, layerName: Optional[str]) -> str: foundDirectory = None for existingLayerName, directoryName in list(self.layerContents.items()): if layerName is None and directoryName == DEFAULT_GLYPHS_DIRNAME: @@ -1462,15 +1523,15 @@ class UFOWriter(UFOReader): ) return foundDirectory - def getGlyphSet( + def getGlyphSet( # type: ignore[override] self, - layerName=None, - defaultLayer=True, - glyphNameToFileNameFunc=None, - validateRead=None, - validateWrite=None, - expectContentsFile=False, - ): + layerName: Optional[str] = None, + defaultLayer: bool = True, + glyphNameToFileNameFunc: GlyphNameToFileNameFunc = None, + validateRead: Optional[bool] = None, + validateWrite: Optional[bool] = None, + expectContentsFile: bool = False, + ) -> GlyphSet: """ Return the GlyphSet object associated with the appropriate glyph directory in the .ufo. @@ -1530,11 +1591,11 @@ class UFOWriter(UFOReader): def _getDefaultGlyphSet( self, - validateRead, - validateWrite, - glyphNameToFileNameFunc=None, - expectContentsFile=False, - ): + validateRead: bool, + validateWrite: bool, + glyphNameToFileNameFunc: GlyphNameToFileNameFunc = None, + expectContentsFile: bool = False, + ) -> GlyphSet: from fontTools.ufoLib.glifLib import GlyphSet glyphSubFS = self.fs.makedir(DEFAULT_GLYPHS_DIRNAME, recreate=True) @@ -1549,13 +1610,13 @@ class UFOWriter(UFOReader): def _getGlyphSetFormatVersion3( self, - validateRead, - validateWrite, - layerName=None, - defaultLayer=True, - glyphNameToFileNameFunc=None, - expectContentsFile=False, - ): + validateRead: bool, + validateWrite: bool, + layerName: Optional[str] = None, + defaultLayer: bool = True, + glyphNameToFileNameFunc: GlyphNameToFileNameFunc = None, + expectContentsFile: bool = False, + ) -> GlyphSet: from fontTools.ufoLib.glifLib import GlyphSet # if the default flag is on, make sure that the default in the file @@ -1573,6 +1634,11 @@ class UFOWriter(UFOReader): raise UFOLibError( "The layer name is already mapped to a non-default layer." ) + + # handle layerName is None to avoid MyPy errors + if layerName is None: + raise TypeError("'leyerName' cannot be None.") + # get an existing directory name if layerName in self.layerContents: directory = self.layerContents[layerName] @@ -1601,7 +1667,12 @@ class UFOWriter(UFOReader): expectContentsFile=expectContentsFile, ) - def renameGlyphSet(self, layerName, newLayerName, defaultLayer=False): + def renameGlyphSet( + self, + layerName: Optional[str], + newLayerName: Optional[str], + defaultLayer: bool = False, + ) -> None: """ Rename a glyph set. @@ -1615,7 +1686,7 @@ class UFOWriter(UFOReader): return # the new and old names can be the same # as long as the default is being switched - if layerName == newLayerName: + if layerName is not None and layerName == newLayerName: # if the default is off and the layer is already not the default, skip if ( self.layerContents[layerName] != DEFAULT_GLYPHS_DIRNAME @@ -1644,12 +1715,13 @@ class UFOWriter(UFOReader): newLayerName, existing=existing, prefix="glyphs." ) # update the internal mapping - del self.layerContents[layerName] + if layerName is not None: + del self.layerContents[layerName] self.layerContents[newLayerName] = newDirectory # do the file system copy self.fs.movedir(oldDirectory, newDirectory, create=True) - def deleteGlyphSet(self, layerName): + def deleteGlyphSet(self, layerName: Optional[str]) -> None: """ Remove the glyph set matching layerName. """ @@ -1659,16 +1731,17 @@ class UFOWriter(UFOReader): return foundDirectory = self._findDirectoryForLayerName(layerName) self.removePath(foundDirectory, removeEmptyParents=False) - del self.layerContents[layerName] + if layerName is not None: + del self.layerContents[layerName] - def writeData(self, fileName, data): + def writeData(self, fileName: PathStr, data: bytes) -> None: """ Write data to fileName in the 'data' directory. The data must be a bytes string. """ self.writeBytesToPath(f"{DATA_DIRNAME}/{fsdecode(fileName)}", data) - def removeData(self, fileName): + def removeData(self, fileName: PathStr) -> None: """ Remove the file named fileName from the data directory. """ @@ -1676,7 +1749,12 @@ class UFOWriter(UFOReader): # /images - def writeImage(self, fileName, data, validate=None): + def writeImage( + self, + fileName: PathStr, + data: bytes, + validate: Optional[bool] = None, + ) -> None: """ Write data to fileName in the images directory. The data must be a valid PNG. @@ -1694,7 +1772,11 @@ class UFOWriter(UFOReader): raise UFOLibError(error) self.writeBytesToPath(f"{IMAGES_DIRNAME}/{fileName}", data) - def removeImage(self, fileName, validate=None): # XXX remove unused 'validate'? + def removeImage( + self, + fileName: PathStr, + validate: Optional[bool] = None, + ) -> None: # XXX remove unused 'validate'? """ Remove the file named fileName from the images directory. @@ -1705,7 +1787,13 @@ class UFOWriter(UFOReader): ) self.removePath(f"{IMAGES_DIRNAME}/{fsdecode(fileName)}") - def copyImageFromReader(self, reader, sourceFileName, destFileName, validate=None): + def copyImageFromReader( + self, + reader: UFOReader, + sourceFileName: PathStr, + destFileName: PathStr, + validate: Optional[bool] = None, + ) -> None: """ Copy the sourceFileName in the provided UFOReader to destFileName in this writer. This uses the most memory efficient method possible @@ -1721,12 +1809,12 @@ class UFOWriter(UFOReader): destPath = f"{IMAGES_DIRNAME}/{fsdecode(destFileName)}" self.copyFromReader(reader, sourcePath, destPath) - def close(self): + def close(self) -> None: if self._havePreviousFile and self._fileStructure is UFOFileStructure.ZIP: # if we are updating an existing zip file, we can now compress the # contents of the temporary filesystem in the destination path rootDir = os.path.splitext(os.path.basename(self._path))[0] + ".ufo" - with fs.zipfs.ZipFS(self._path, write=True, encoding="utf-8") as destFS: + with fs.zipfs.ZipFS(self._path, write=True, encoding="utf-8") as destFS: # type: ignore[abstract] fs.copy.copy_fs(self.fs, destFS.makedir(rootDir)) super().close() @@ -1740,7 +1828,7 @@ UFOReaderWriter = UFOWriter # ---------------- -def _sniffFileStructure(ufo_path): +def _sniffFileStructure(ufo_path: PathStr) -> UFOFileStructure: """Return UFOFileStructure.ZIP if the UFO at path 'ufo_path' (str) is a zip file, else return UFOFileStructure.PACKAGE if 'ufo_path' is a directory. @@ -1759,7 +1847,7 @@ def _sniffFileStructure(ufo_path): raise UFOLibError("No such file or directory: '%s'" % ufo_path) -def makeUFOPath(path): +def makeUFOPath(path: PathStr) -> str: """ Return a .ufo pathname. @@ -1786,7 +1874,7 @@ def makeUFOPath(path): # cases of invalid values. -def validateFontInfoVersion2ValueForAttribute(attr, value): +def validateFontInfoVersion2ValueForAttribute(attr: str, value: Any) -> bool: """ This performs very basic validation of the value for attribute following the UFO 2 fontinfo.plist specification. The results @@ -1798,7 +1886,7 @@ def validateFontInfoVersion2ValueForAttribute(attr, value): """ dataValidationDict = fontInfoAttributesVersion2ValueData[attr] valueType = dataValidationDict.get("type") - validator = dataValidationDict.get("valueValidator") + validator = dataValidationDict.get("valueValidator", genericTypeValidator) valueOptions = dataValidationDict.get("valueOptions") # have specific options for the validator if valueOptions is not None: @@ -1812,7 +1900,7 @@ def validateFontInfoVersion2ValueForAttribute(attr, value): return isValidValue -def validateInfoVersion2Data(infoData): +def validateInfoVersion2Data(infoData: dict[str, Any]) -> dict[str, Any]: """ This performs very basic validation of the value for infoData following the UFO 2 fontinfo.plist specification. The results @@ -1832,7 +1920,7 @@ def validateInfoVersion2Data(infoData): return validInfoData -def validateFontInfoVersion3ValueForAttribute(attr, value): +def validateFontInfoVersion3ValueForAttribute(attr: str, value: Any) -> bool: """ This performs very basic validation of the value for attribute following the UFO 3 fontinfo.plist specification. The results @@ -1844,7 +1932,7 @@ def validateFontInfoVersion3ValueForAttribute(attr, value): """ dataValidationDict = fontInfoAttributesVersion3ValueData[attr] valueType = dataValidationDict.get("type") - validator = dataValidationDict.get("valueValidator") + validator = dataValidationDict.get("valueValidator", genericTypeValidator) valueOptions = dataValidationDict.get("valueOptions") # have specific options for the validator if valueOptions is not None: @@ -1858,7 +1946,7 @@ def validateFontInfoVersion3ValueForAttribute(attr, value): return isValidValue -def validateInfoVersion3Data(infoData): +def validateInfoVersion3Data(infoData: dict[str, Any]) -> dict[str, Any]: """ This performs very basic validation of the value for infoData following the UFO 3 fontinfo.plist specification. The results @@ -1880,18 +1968,18 @@ def validateInfoVersion3Data(infoData): # Value Options -fontInfoOpenTypeHeadFlagsOptions = list(range(0, 15)) -fontInfoOpenTypeOS2SelectionOptions = [1, 2, 3, 4, 7, 8, 9] -fontInfoOpenTypeOS2UnicodeRangesOptions = list(range(0, 128)) -fontInfoOpenTypeOS2CodePageRangesOptions = list(range(0, 64)) -fontInfoOpenTypeOS2TypeOptions = [0, 1, 2, 3, 8, 9] +fontInfoOpenTypeHeadFlagsOptions: list[int] = list(range(0, 15)) +fontInfoOpenTypeOS2SelectionOptions: list[int] = [1, 2, 3, 4, 7, 8, 9] +fontInfoOpenTypeOS2UnicodeRangesOptions: list[int] = list(range(0, 128)) +fontInfoOpenTypeOS2CodePageRangesOptions: list[int] = list(range(0, 64)) +fontInfoOpenTypeOS2TypeOptions: list[int] = [0, 1, 2, 3, 8, 9] # Version Attribute Definitions # This defines the attributes, types and, in some # cases the possible values, that can exist is # fontinfo.plist. -fontInfoAttributesVersion1 = { +fontInfoAttributesVersion1: set[str] = { "familyName", "styleName", "fullName", @@ -1934,7 +2022,7 @@ fontInfoAttributesVersion1 = { "ttVersion", } -fontInfoAttributesVersion2ValueData = { +fontInfoAttributesVersion2ValueData: FontInfoAttributes = { "familyName": dict(type=str), "styleName": dict(type=str), "styleMapFamilyName": dict(type=str), @@ -2076,9 +2164,11 @@ fontInfoAttributesVersion2ValueData = { "macintoshFONDFamilyID": dict(type=int), "macintoshFONDName": dict(type=str), } -fontInfoAttributesVersion2 = set(fontInfoAttributesVersion2ValueData.keys()) +fontInfoAttributesVersion2: set[str] = set(fontInfoAttributesVersion2ValueData.keys()) -fontInfoAttributesVersion3ValueData = deepcopy(fontInfoAttributesVersion2ValueData) +fontInfoAttributesVersion3ValueData: FontInfoAttributes = deepcopy( + fontInfoAttributesVersion2ValueData +) fontInfoAttributesVersion3ValueData.update( { "versionMinor": dict(type=int, valueValidator=genericNonNegativeIntValidator), @@ -2161,7 +2251,7 @@ fontInfoAttributesVersion3ValueData.update( "guidelines": dict(type=list, valueValidator=guidelinesValidator), } ) -fontInfoAttributesVersion3 = set(fontInfoAttributesVersion3ValueData.keys()) +fontInfoAttributesVersion3: set[str] = set(fontInfoAttributesVersion3ValueData.keys()) # insert the type validator for all attrs that # have no defined validator. @@ -2178,14 +2268,14 @@ for attr, dataDict in list(fontInfoAttributesVersion3ValueData.items()): # to version 2 or vice-versa. -def _flipDict(d): +def _flipDict(d: dict[K, V]) -> dict[V, K]: flipped = {} for key, value in list(d.items()): flipped[value] = key return flipped -fontInfoAttributesVersion1To2 = { +fontInfoAttributesVersion1To2: dict[str, str] = { "menuName": "styleMapFamilyName", "designer": "openTypeNameDesigner", "designerURL": "openTypeNameDesignerURL", @@ -2217,12 +2307,17 @@ fontInfoAttributesVersion1To2 = { fontInfoAttributesVersion2To1 = _flipDict(fontInfoAttributesVersion1To2) deprecatedFontInfoAttributesVersion2 = set(fontInfoAttributesVersion1To2.keys()) -_fontStyle1To2 = {64: "regular", 1: "italic", 32: "bold", 33: "bold italic"} -_fontStyle2To1 = _flipDict(_fontStyle1To2) +_fontStyle1To2: dict[int, str] = { + 64: "regular", + 1: "italic", + 32: "bold", + 33: "bold italic", +} +_fontStyle2To1: dict[str, int] = _flipDict(_fontStyle1To2) # Some UFO 1 files have 0 _fontStyle1To2[0] = "regular" -_widthName1To2 = { +_widthName1To2: dict[str, int] = { "Ultra-condensed": 1, "Extra-condensed": 2, "Condensed": 3, @@ -2233,7 +2328,7 @@ _widthName1To2 = { "Extra-expanded": 8, "Ultra-expanded": 9, } -_widthName2To1 = _flipDict(_widthName1To2) +_widthName2To1: dict[int, str] = _flipDict(_widthName1To2) # FontLab's default width value is "Normal". # Many format version 1 UFOs will have this. _widthName1To2["Normal"] = 5 @@ -2245,7 +2340,7 @@ _widthName1To2["medium"] = 5 # "Medium" appears in a lot of UFO 1 files. _widthName1To2["Medium"] = 5 -_msCharSet1To2 = { +_msCharSet1To2: dict[int, int] = { 0: 1, 1: 2, 2: 3, @@ -2267,12 +2362,14 @@ _msCharSet1To2 = { 238: 19, 255: 20, } -_msCharSet2To1 = _flipDict(_msCharSet1To2) +_msCharSet2To1: dict[int, int] = _flipDict(_msCharSet1To2) # 1 <-> 2 -def convertFontInfoValueForAttributeFromVersion1ToVersion2(attr, value): +def convertFontInfoValueForAttributeFromVersion1ToVersion2( + attr: str, value: Any +) -> tuple[str, Any]: """ Convert value from version 1 to version 2 format. Returns the new attribute name and the converted value. @@ -2284,7 +2381,7 @@ def convertFontInfoValueForAttributeFromVersion1ToVersion2(attr, value): value = int(value) if value is not None: if attr == "fontStyle": - v = _fontStyle1To2.get(value) + v: Optional[Union[str, int]] = _fontStyle1To2.get(value) if v is None: raise UFOLibError( f"Cannot convert value ({value!r}) for attribute {attr}." @@ -2308,7 +2405,9 @@ def convertFontInfoValueForAttributeFromVersion1ToVersion2(attr, value): return attr, value -def convertFontInfoValueForAttributeFromVersion2ToVersion1(attr, value): +def convertFontInfoValueForAttributeFromVersion2ToVersion1( + attr: str, value: Any +) -> tuple[str, Any]: """ Convert value from version 2 to version 1 format. Returns the new attribute name and the converted value. @@ -2325,7 +2424,7 @@ def convertFontInfoValueForAttributeFromVersion2ToVersion1(attr, value): return attr, value -def _convertFontInfoDataVersion1ToVersion2(data): +def _convertFontInfoDataVersion1ToVersion2(data: dict[str, Any]) -> dict[str, Any]: converted = {} for attr, value in list(data.items()): # FontLab gives -1 for the weightValue @@ -2349,7 +2448,7 @@ def _convertFontInfoDataVersion1ToVersion2(data): return converted -def _convertFontInfoDataVersion2ToVersion1(data): +def _convertFontInfoDataVersion2ToVersion1(data: dict[str, Any]) -> dict[str, Any]: converted = {} for attr, value in list(data.items()): newAttr, newValue = convertFontInfoValueForAttributeFromVersion2ToVersion1( @@ -2370,16 +2469,16 @@ def _convertFontInfoDataVersion2ToVersion1(data): # 2 <-> 3 -_ufo2To3NonNegativeInt = { +_ufo2To3NonNegativeInt: set[str] = { "versionMinor", "openTypeHeadLowestRecPPEM", "openTypeOS2WinAscent", "openTypeOS2WinDescent", } -_ufo2To3NonNegativeIntOrFloat = { +_ufo2To3NonNegativeIntOrFloat: set[str] = { "unitsPerEm", } -_ufo2To3FloatToInt = { +_ufo2To3FloatToInt: set[str] = { "openTypeHeadLowestRecPPEM", "openTypeHheaAscender", "openTypeHheaDescender", @@ -2407,7 +2506,9 @@ _ufo2To3FloatToInt = { } -def convertFontInfoValueForAttributeFromVersion2ToVersion3(attr, value): +def convertFontInfoValueForAttributeFromVersion2ToVersion3( + attr: str, value: Any +) -> tuple[str, Any]: """ Convert value from version 2 to version 3 format. Returns the new attribute name and the converted value. @@ -2435,7 +2536,9 @@ def convertFontInfoValueForAttributeFromVersion2ToVersion3(attr, value): return attr, value -def convertFontInfoValueForAttributeFromVersion3ToVersion2(attr, value): +def convertFontInfoValueForAttributeFromVersion3ToVersion2( + attr: str, value: Any +) -> tuple[str, Any]: """ Convert value from version 3 to version 2 format. Returns the new attribute name and the converted value. @@ -2444,7 +2547,7 @@ def convertFontInfoValueForAttributeFromVersion3ToVersion2(attr, value): return attr, value -def _convertFontInfoDataVersion3ToVersion2(data): +def _convertFontInfoDataVersion3ToVersion2(data: dict[str, Any]) -> dict[str, Any]: converted = {} for attr, value in list(data.items()): newAttr, newValue = convertFontInfoValueForAttributeFromVersion3ToVersion2( @@ -2456,7 +2559,7 @@ def _convertFontInfoDataVersion3ToVersion2(data): return converted -def _convertFontInfoDataVersion2ToVersion3(data): +def _convertFontInfoDataVersion2ToVersion3(data: dict[str, Any]) -> dict[str, Any]: converted = {} for attr, value in list(data.items()): attr, value = convertFontInfoValueForAttributeFromVersion2ToVersion3( diff --git a/contrib/python/fonttools/fontTools/ufoLib/converters.py b/contrib/python/fonttools/fontTools/ufoLib/converters.py index 4ee6b05e338..94a229f48d2 100644 --- a/contrib/python/fonttools/fontTools/ufoLib/converters.py +++ b/contrib/python/fonttools/fontTools/ufoLib/converters.py @@ -1,3 +1,10 @@ +from __future__ import annotations + +from typing import Mapping, Any +from collections.abc import Container + +from fontTools.annotations import KerningNested + """ Functions for converting UFO1 or UFO2 files into UFO3 format. @@ -9,7 +16,9 @@ or UFO2, and _to_ UFO3. # adapted from the UFO spec -def convertUFO1OrUFO2KerningToUFO3Kerning(kerning, groups, glyphSet=()): +def convertUFO1OrUFO2KerningToUFO3Kerning( + kerning: KerningNested, groups: dict[str, list[str]], glyphSet: Container[str] = () +) -> tuple[KerningNested, dict[str, list[str]], dict[str, dict[str, str]]]: """Convert kerning data in UFO1 or UFO2 syntax into UFO3 syntax. Args: @@ -40,7 +49,7 @@ def convertUFO1OrUFO2KerningToUFO3Kerning(kerning, groups, glyphSet=()): if not second.startswith("public.kern2."): secondReferencedGroups.add(second) # Create new names for these groups. - firstRenamedGroups = {} + firstRenamedGroups: dict[str, str] = {} for first in firstReferencedGroups: # Make a list of existing group names. existingGroupNames = list(groups.keys()) + list(firstRenamedGroups.keys()) @@ -52,7 +61,7 @@ def convertUFO1OrUFO2KerningToUFO3Kerning(kerning, groups, glyphSet=()): newName = makeUniqueGroupName(newName, existingGroupNames) # Store for use later. firstRenamedGroups[first] = newName - secondRenamedGroups = {} + secondRenamedGroups: dict[str, str] = {} for second in secondReferencedGroups: # Make a list of existing group names. existingGroupNames = list(groups.keys()) + list(secondRenamedGroups.keys()) @@ -84,7 +93,7 @@ def convertUFO1OrUFO2KerningToUFO3Kerning(kerning, groups, glyphSet=()): return newKerning, groups, dict(side1=firstRenamedGroups, side2=secondRenamedGroups) -def findKnownKerningGroups(groups): +def findKnownKerningGroups(groups: Mapping[str, Any]) -> tuple[set[str], set[str]]: """Find all kerning groups in a UFO1 or UFO2 font that use known prefixes. In some cases, not all kerning groups will be referenced @@ -150,7 +159,7 @@ def findKnownKerningGroups(groups): return firstGroups, secondGroups -def makeUniqueGroupName(name, groupNames, counter=0): +def makeUniqueGroupName(name: str, groupNames: list[str], counter: int = 0) -> str: """Make a kerning group name that will be unique within the set of group names. If the requested kerning group name already exists within the set, this diff --git a/contrib/python/fonttools/fontTools/ufoLib/filenames.py b/contrib/python/fonttools/fontTools/ufoLib/filenames.py index 83442f1c8ce..6a4090a7553 100644 --- a/contrib/python/fonttools/fontTools/ufoLib/filenames.py +++ b/contrib/python/fonttools/fontTools/ufoLib/filenames.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +from collections.abc import Iterable + """ Convert user-provided internal UFO names to spec-compliant filenames. @@ -27,7 +31,7 @@ by Tal Leming and is copyright (c) 2005-2016, The RoboFab Developers: # inclusive. # 3. Various characters that (mostly) Windows and POSIX-y filesystems don't # allow, plus "(" and ")", as per the specification. -illegalCharacters = { +illegalCharacters: set[str] = { "\x00", "\x01", "\x02", @@ -76,7 +80,7 @@ illegalCharacters = { "|", "\x7f", } -reservedFileNames = { +reservedFileNames: set[str] = { "aux", "clock$", "com1", @@ -101,14 +105,16 @@ reservedFileNames = { "nul", "prn", } -maxFileNameLength = 255 +maxFileNameLength: int = 255 class NameTranslationError(Exception): pass -def userNameToFileName(userName: str, existing=(), prefix="", suffix=""): +def userNameToFileName( + userName: str, existing: Iterable[str] = (), prefix: str = "", suffix: str = "" +) -> str: """Converts from a user name to a file name. Takes care to avoid illegal characters, reserved file names, ambiguity between @@ -212,7 +218,9 @@ def userNameToFileName(userName: str, existing=(), prefix="", suffix=""): return fullName -def handleClash1(userName, existing=[], prefix="", suffix=""): +def handleClash1( + userName: str, existing: Iterable[str] = [], prefix: str = "", suffix: str = "" +) -> str: """A helper function that resolves collisions with existing names when choosing a filename. This function attempts to append an unused integer counter to the filename. @@ -278,7 +286,9 @@ def handleClash1(userName, existing=[], prefix="", suffix=""): return finalName -def handleClash2(existing=[], prefix="", suffix=""): +def handleClash2( + existing: Iterable[str] = [], prefix: str = "", suffix: str = "" +) -> str: """A helper function that resolves collisions with existing names when choosing a filename. This function is a fallback to :func:`handleClash1`. It attempts to append an unused integer counter to the filename. diff --git a/contrib/python/fonttools/fontTools/ufoLib/glifLib.py b/contrib/python/fonttools/fontTools/ufoLib/glifLib.py index 028d38c36b6..040c31c4990 100644 --- a/contrib/python/fonttools/fontTools/ufoLib/glifLib.py +++ b/contrib/python/fonttools/fontTools/ufoLib/glifLib.py @@ -12,9 +12,9 @@ glyph data. See the class doc string for details. from __future__ import annotations -import enum import logging from collections import OrderedDict +from typing import TYPE_CHECKING, Any, Optional, Union, cast from warnings import warn import fontTools.misc.filesystem as fs @@ -24,7 +24,11 @@ from fontTools.pens.pointPen import AbstractPointPen, PointToSegmentPen from fontTools.ufoLib import UFOFormatVersion, _UFOBaseIO from fontTools.ufoLib.errors import GlifLibError from fontTools.ufoLib.filenames import userNameToFileName -from fontTools.ufoLib.utils import _VersionTupleEnumMixin, numberTypes +from fontTools.ufoLib.utils import ( + BaseFormatVersion, + normalizeFormatVersion, + numberTypes, +) from fontTools.ufoLib.validators import ( anchorsValidator, colorValidator, @@ -35,7 +39,24 @@ from fontTools.ufoLib.validators import ( imageValidator, ) -__all__ = [ +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Set + from logging import Logger + + from fontTools.annotations import ( + ElementType, + FormatVersion, + FormatVersions, + GLIFFormatVersionInput, + GlyphNameToFileNameFunc, + IntFloat, + PathOrFS, + UFOFormatVersionInput, + ) + from fontTools.misc.filesystem._base import FS + + +__all__: list[str] = [ "GlyphSet", "GlifLibError", "readGlyphFromString", @@ -43,7 +64,7 @@ __all__ = [ "glyphNameToFileName", ] -logger = logging.getLogger(__name__) +logger: Logger = logging.getLogger(__name__) # --------- @@ -54,7 +75,7 @@ CONTENTS_FILENAME = "contents.plist" LAYERINFO_FILENAME = "layerinfo.plist" -class GLIFFormatVersion(tuple, _VersionTupleEnumMixin, enum.Enum): +class GLIFFormatVersion(BaseFormatVersion): """Class representing the versions of the .glif format supported by the UFO version in use. For a given :mod:`fontTools.ufoLib.UFOFormatVersion`, the :func:`supported_versions` method will @@ -66,13 +87,17 @@ class GLIFFormatVersion(tuple, _VersionTupleEnumMixin, enum.Enum): FORMAT_2_0 = (2, 0) @classmethod - def default(cls, ufoFormatVersion=None): + def default( + cls, ufoFormatVersion: Optional[UFOFormatVersion] = None + ) -> GLIFFormatVersion: if ufoFormatVersion is not None: return max(cls.supported_versions(ufoFormatVersion)) return super().default() @classmethod - def supported_versions(cls, ufoFormatVersion=None): + def supported_versions( + cls, ufoFormatVersion: Optional[UFOFormatVersion] = None + ) -> frozenset[GLIFFormatVersion]: if ufoFormatVersion is None: # if ufo format unspecified, return all the supported GLIF formats return super().supported_versions() @@ -83,10 +108,6 @@ class GLIFFormatVersion(tuple, _VersionTupleEnumMixin, enum.Enum): return frozenset(versions) -# workaround for py3.11, see https://github.com/fonttools/fonttools/pull/2655 -GLIFFormatVersion.__str__ = _VersionTupleEnumMixin.__str__ - - # ------------ # Simple Glyph # ------------ @@ -98,11 +119,11 @@ class Glyph: the draw() or the drawPoints() method has been called. """ - def __init__(self, glyphName, glyphSet): - self.glyphName = glyphName - self.glyphSet = glyphSet + def __init__(self, glyphName: str, glyphSet: GlyphSet) -> None: + self.glyphName: str = glyphName + self.glyphSet: GlyphSet = glyphSet - def draw(self, pen, outputImpliedClosingLine=False): + def draw(self, pen: Any, outputImpliedClosingLine: bool = False) -> None: """ Draw this glyph onto a *FontTools* Pen. """ @@ -111,7 +132,7 @@ class Glyph: ) self.drawPoints(pointPen) - def drawPoints(self, pointPen): + def drawPoints(self, pointPen: AbstractPointPen) -> None: """ Draw this glyph onto a PointPen. """ @@ -141,13 +162,13 @@ class GlyphSet(_UFOBaseIO): def __init__( self, - path, - glyphNameToFileNameFunc=None, - ufoFormatVersion=None, - validateRead=True, - validateWrite=True, - expectContentsFile=False, - ): + path: PathOrFS, + glyphNameToFileNameFunc: GlyphNameToFileNameFunc = None, + ufoFormatVersion: UFOFormatVersionInput = None, + validateRead: bool = True, + validateWrite: bool = True, + expectContentsFile: bool = False, + ) -> None: """ 'path' should be a path (string) to an existing local directory, or an instance of fs.base.FS class. @@ -165,7 +186,9 @@ class GlyphSet(_UFOBaseIO): are reading an existing UFO and ``False`` if you create a fresh glyph set. """ try: - ufoFormatVersion = UFOFormatVersion(ufoFormatVersion) + ufoFormatVersion = normalizeFormatVersion( + ufoFormatVersion, UFOFormatVersion + ) except ValueError as e: from fontTools.ufoLib.errors import UnsupportedUFOFormat @@ -178,10 +201,10 @@ class GlyphSet(_UFOBaseIO): if isinstance(path, str): try: - filesystem = fs.osfs.OSFS(path) + filesystem: FS = fs.osfs.OSFS(path) except fs.errors.CreateFailed: raise GlifLibError("No glyphs directory '%s'" % path) - self._shouldClose = True + self._shouldClose: bool = True elif isinstance(path, fs.base.FS): filesystem = path try: @@ -201,26 +224,28 @@ class GlyphSet(_UFOBaseIO): # 'dirName' is kept for backward compatibility only, but it's DEPRECATED # as it's not guaranteed that it maps to an existing OSFS directory. # Client could use the FS api via the `self.fs` attribute instead. - self.dirName = fs.path.basename(path) - self.fs = filesystem + self.dirName: str = fs.path.basename(path) + self.fs: FS = filesystem # if glyphSet contains no 'contents.plist', we consider it empty - self._havePreviousFile = filesystem.exists(CONTENTS_FILENAME) + self._havePreviousFile: bool = filesystem.exists(CONTENTS_FILENAME) if expectContentsFile and not self._havePreviousFile: raise GlifLibError(f"{CONTENTS_FILENAME} is missing.") # attribute kept for backward compatibility - self.ufoFormatVersion = ufoFormatVersion.major - self.ufoFormatVersionTuple = ufoFormatVersion + self.ufoFormatVersion: int = ufoFormatVersion.major + self.ufoFormatVersionTuple: UFOFormatVersion = ufoFormatVersion if glyphNameToFileNameFunc is None: glyphNameToFileNameFunc = glyphNameToFileName - self.glyphNameToFileName = glyphNameToFileNameFunc - self._validateRead = validateRead - self._validateWrite = validateWrite + self.glyphNameToFileName: Callable[[str, set[str]], str] = ( + glyphNameToFileNameFunc + ) + self._validateRead: bool = validateRead + self._validateWrite: bool = validateWrite self._existingFileNames: set[str] | None = None - self._reverseContents = None + self._reverseContents: Optional[dict[str, str]] = None self.rebuildContents() - def rebuildContents(self, validateRead=None): + def rebuildContents(self, validateRead: bool = False) -> None: """ Rebuild the contents dict by loading contents.plist. @@ -248,11 +273,11 @@ class GlyphSet(_UFOBaseIO): ) if invalidFormat: raise GlifLibError("%s is not properly formatted" % CONTENTS_FILENAME) - self.contents = contents + self.contents: dict[str, str] = contents self._existingFileNames = None self._reverseContents = None - def getReverseContents(self): + def getReverseContents(self) -> dict[str, str]: """ Return a reversed dict of self.contents, mapping file names to glyph names. This is primarily an aid for custom glyph name to file @@ -268,7 +293,7 @@ class GlyphSet(_UFOBaseIO): self._reverseContents = d return self._reverseContents - def writeContents(self): + def writeContents(self) -> None: """ Write the contents.plist file out to disk. Call this method when you're done writing glyphs. @@ -277,7 +302,7 @@ class GlyphSet(_UFOBaseIO): # layer info - def readLayerInfo(self, info, validateRead=None): + def readLayerInfo(self, info: Any, validateRead: Optional[bool] = None) -> None: """ ``validateRead`` will validate the data, by default it is set to the class's ``validateRead`` value, can be overridden. @@ -299,7 +324,7 @@ class GlyphSet(_UFOBaseIO): % attr ) - def writeLayerInfo(self, info, validateWrite=None): + def writeLayerInfo(self, info: Any, validateWrite: Optional[bool] = None) -> None: """ ``validateWrite`` will validate the data, by default it is set to the class's ``validateWrite`` value, can be overridden. @@ -335,7 +360,7 @@ class GlyphSet(_UFOBaseIO): # data empty, remove existing file self.fs.remove(LAYERINFO_FILENAME) - def getGLIF(self, glyphName): + def getGLIF(self, glyphName: str) -> bytes: """ Get the raw GLIF text for a given glyph name. This only works for GLIF files that are already on disk. @@ -356,7 +381,7 @@ class GlyphSet(_UFOBaseIO): "does not exist on %s" % (fileName, glyphName, self.fs) ) - def getGLIFModificationTime(self, glyphName): + def getGLIFModificationTime(self, glyphName: str) -> Optional[float]: """ Returns the modification time for the GLIF file with 'glyphName', as a floating point number giving the number of seconds since the epoch. @@ -369,7 +394,13 @@ class GlyphSet(_UFOBaseIO): # reading/writing API - def readGlyph(self, glyphName, glyphObject=None, pointPen=None, validate=None): + def readGlyph( + self, + glyphName: str, + glyphObject: Optional[Any] = None, + pointPen: Optional[AbstractPointPen] = None, + validate: Optional[bool] = None, + ) -> None: """ Read a .glif file for 'glyphName' from the glyph set. The 'glyphObject' argument can be any kind of object (even None); @@ -446,12 +477,12 @@ class GlyphSet(_UFOBaseIO): def writeGlyph( self, - glyphName, - glyphObject=None, - drawPointsFunc=None, - formatVersion=None, - validate=None, - ): + glyphName: str, + glyphObject: Optional[Any] = None, + drawPointsFunc: Optional[Callable[[AbstractPointPen], None]] = None, + formatVersion: GLIFFormatVersionInput = None, + validate: Optional[bool] = None, + ) -> None: """ Write a .glif file for 'glyphName' to the glyph set. The 'glyphObject' argument can be any kind of object (even None); @@ -501,7 +532,7 @@ class GlyphSet(_UFOBaseIO): formatVersion = GLIFFormatVersion.default(self.ufoFormatVersionTuple) else: try: - formatVersion = GLIFFormatVersion(formatVersion) + formatVersion = normalizeFormatVersion(formatVersion, GLIFFormatVersion) except ValueError as e: from fontTools.ufoLib.errors import UnsupportedGLIFFormat @@ -545,7 +576,7 @@ class GlyphSet(_UFOBaseIO): return self.fs.writebytes(fileName, data) - def deleteGlyph(self, glyphName): + def deleteGlyph(self, glyphName: str) -> None: """Permanently delete the glyph from the glyph set on disk. Will raise KeyError if the glyph is not present in the glyph set. """ @@ -559,25 +590,27 @@ class GlyphSet(_UFOBaseIO): # dict-like support - def keys(self): + def keys(self) -> list[str]: return list(self.contents.keys()) - def has_key(self, glyphName): + def has_key(self, glyphName: str) -> bool: return glyphName in self.contents __contains__ = has_key - def __len__(self): + def __len__(self) -> int: return len(self.contents) - def __getitem__(self, glyphName): + def __getitem__(self, glyphName: str) -> Any: if glyphName not in self.contents: raise KeyError(glyphName) return self.glyphClass(glyphName, self) # quickly fetch unicode values - def getUnicodes(self, glyphNames=None): + def getUnicodes( + self, glyphNames: Optional[Iterable[str]] = None + ) -> dict[str, list[int]]: """ Return a dictionary that maps glyph names to lists containing the unicode value[s] for that glyph, if any. This parses the .glif @@ -592,7 +625,9 @@ class GlyphSet(_UFOBaseIO): unicodes[glyphName] = _fetchUnicodes(text) return unicodes - def getComponentReferences(self, glyphNames=None): + def getComponentReferences( + self, glyphNames: Optional[Iterable[str]] = None + ) -> dict[str, list[str]]: """ Return a dictionary that maps glyph names to lists containing the base glyph name of components in the glyph. This parses the .glif @@ -607,7 +642,9 @@ class GlyphSet(_UFOBaseIO): components[glyphName] = _fetchComponentBases(text) return components - def getImageReferences(self, glyphNames=None): + def getImageReferences( + self, glyphNames: Optional[Iterable[str]] = None + ) -> dict[str, Optional[str]]: """ Return a dictionary that maps glyph names to the file name of the image referenced by the glyph. This parses the .glif files partially, so it is a @@ -622,14 +659,14 @@ class GlyphSet(_UFOBaseIO): images[glyphName] = _fetchImageFileName(text) return images - def close(self): + def close(self) -> None: if self._shouldClose: self.fs.close() - def __enter__(self): + def __enter__(self) -> GlyphSet: return self - def __exit__(self, exc_type, exc_value, exc_tb): + def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> None: self.close() @@ -638,7 +675,7 @@ class GlyphSet(_UFOBaseIO): # ----------------------- -def glyphNameToFileName(glyphName, existingFileNames): +def glyphNameToFileName(glyphName: str, existingFileNames: Optional[set[str]]) -> str: """ Wrapper around the userNameToFileName function in filenames.py @@ -656,12 +693,12 @@ def glyphNameToFileName(glyphName, existingFileNames): def readGlyphFromString( - aString, - glyphObject=None, - pointPen=None, - formatVersions=None, - validate=True, -): + aString: Union[str, bytes], + glyphObject: Optional[Any] = None, + pointPen: Optional[Any] = None, + formatVersions: FormatVersions = None, + validate: bool = True, +) -> None: """ Read .glif data from a string into a glyph object. @@ -702,7 +739,7 @@ def readGlyphFromString( The formatVersions optional argument define the GLIF format versions that are allowed to be read. - The type is Optional[Iterable[Tuple[int, int], int]]. It can contain + The type is Optional[Iterable[tuple[int, int], int]]. It can contain either integers (for the major versions to be allowed, with minor digits defaulting to 0), or tuples of integers to specify both (major, minor) versions. @@ -714,12 +751,14 @@ def readGlyphFromString( tree = _glifTreeFromString(aString) if formatVersions is None: - validFormatVersions = GLIFFormatVersion.supported_versions() + validFormatVersions: Set[GLIFFormatVersion] = ( + GLIFFormatVersion.supported_versions() + ) else: validFormatVersions, invalidFormatVersions = set(), set() for v in formatVersions: try: - formatVersion = GLIFFormatVersion(v) + formatVersion = normalizeFormatVersion(v, GLIFFormatVersion) except ValueError: invalidFormatVersions.add(v) else: @@ -740,16 +779,16 @@ def readGlyphFromString( def _writeGlyphToBytes( - glyphName, - glyphObject=None, - drawPointsFunc=None, - writer=None, - formatVersion=None, - validate=True, -): + glyphName: str, + glyphObject: Optional[Any] = None, + drawPointsFunc: Optional[Callable[[Any], None]] = None, + writer: Optional[Any] = None, + formatVersion: Optional[FormatVersion] = None, + validate: bool = True, +) -> bytes: """Return .glif data for a glyph as a UTF-8 encoded bytes string.""" try: - formatVersion = GLIFFormatVersion(formatVersion) + formatVersion = normalizeFormatVersion(formatVersion, GLIFFormatVersion) except ValueError: from fontTools.ufoLib.errors import UnsupportedGLIFFormat @@ -767,7 +806,7 @@ def _writeGlyphToBytes( if formatVersion.minor != 0: glyphAttrs["formatMinor"] = repr(formatVersion.minor) root = etree.Element("glyph", glyphAttrs) - identifiers = set() + identifiers: set[str] = set() # advance _writeAdvance(glyphObject, root, validate) # unicodes @@ -807,12 +846,12 @@ def _writeGlyphToBytes( def writeGlyphToString( - glyphName, - glyphObject=None, - drawPointsFunc=None, - formatVersion=None, - validate=True, -): + glyphName: str, + glyphObject: Optional[Any] = None, + drawPointsFunc: Optional[Callable[[Any], None]] = None, + formatVersion: Optional[FormatVersion] = None, + validate: bool = True, +) -> str: """ Return .glif data for a glyph as a string. The XML declaration's encoding is always set to "UTF-8". @@ -867,7 +906,7 @@ def writeGlyphToString( return data.decode("utf-8") -def _writeAdvance(glyphObject, element, validate): +def _writeAdvance(glyphObject: Any, element: ElementType, validate: bool) -> None: width = getattr(glyphObject, "width", None) if width is not None: if validate and not isinstance(width, numberTypes): @@ -892,8 +931,8 @@ def _writeAdvance(glyphObject, element, validate): etree.SubElement(element, "advance", dict(height=repr(height))) -def _writeUnicodes(glyphObject, element, validate): - unicodes = getattr(glyphObject, "unicodes", None) +def _writeUnicodes(glyphObject: Any, element: ElementType, validate: bool) -> None: + unicodes = getattr(glyphObject, "unicodes", []) if validate and isinstance(unicodes, int): unicodes = [unicodes] seen = set() @@ -907,17 +946,21 @@ def _writeUnicodes(glyphObject, element, validate): etree.SubElement(element, "unicode", dict(hex=hexCode)) -def _writeNote(glyphObject, element, validate): +def _writeNote(glyphObject: Any, element: ElementType, validate: bool) -> None: note = getattr(glyphObject, "note", None) if validate and not isinstance(note, str): raise GlifLibError("note attribute must be str") - note = note.strip() - note = "\n" + note + "\n" - etree.SubElement(element, "note").text = note + if isinstance(note, str): + note = note.strip() + note = "\n" + note + "\n" + etree.SubElement(element, "note").text = note -def _writeImage(glyphObject, element, validate): +def _writeImage(glyphObject: Any, element: ElementType, validate: bool) -> None: image = getattr(glyphObject, "image", None) + if image is None: + return + if validate and not imageValidator(image): raise GlifLibError( "image attribute must be a dict or dict-like object with the proper structure." @@ -933,7 +976,9 @@ def _writeImage(glyphObject, element, validate): etree.SubElement(element, "image", attrs) -def _writeGuidelines(glyphObject, element, identifiers, validate): +def _writeGuidelines( + glyphObject: Any, element: ElementType, identifiers: set[str], validate: bool +) -> None: guidelines = getattr(glyphObject, "guidelines", []) if validate and not guidelinesValidator(guidelines): raise GlifLibError("guidelines attribute does not have the proper structure.") @@ -963,7 +1008,7 @@ def _writeGuidelines(glyphObject, element, identifiers, validate): etree.SubElement(element, "guideline", attrs) -def _writeAnchorsFormat1(pen, anchors, validate): +def _writeAnchorsFormat1(pen: Any, anchors: Any, validate: bool) -> None: if validate and not anchorsValidator(anchors): raise GlifLibError("anchors attribute does not have the proper structure.") for anchor in anchors: @@ -980,7 +1025,12 @@ def _writeAnchorsFormat1(pen, anchors, validate): pen.endPath() -def _writeAnchors(glyphObject, element, identifiers, validate): +def _writeAnchors( + glyphObject: Any, + element: ElementType, + identifiers: set[str], + validate: bool, +) -> None: anchors = getattr(glyphObject, "anchors", []) if validate and not anchorsValidator(anchors): raise GlifLibError("anchors attribute does not have the proper structure.") @@ -1005,7 +1055,7 @@ def _writeAnchors(glyphObject, element, identifiers, validate): etree.SubElement(element, "anchor", attrs) -def _writeLib(glyphObject, element, validate): +def _writeLib(glyphObject: Any, element: ElementType, validate: bool) -> None: lib = getattr(glyphObject, "lib", None) if not lib: # don't write empty lib @@ -1031,7 +1081,7 @@ layerInfoVersion3ValueData = { } -def validateLayerInfoVersion3ValueForAttribute(attr, value): +def validateLayerInfoVersion3ValueForAttribute(attr: str, value: Any) -> bool: """ This performs very basic validation of the value for attribute following the UFO 3 fontinfo.plist specification. The results @@ -1048,6 +1098,7 @@ def validateLayerInfoVersion3ValueForAttribute(attr, value): validator = dataValidationDict.get("valueValidator") valueOptions = dataValidationDict.get("valueOptions") # have specific options for the validator + assert callable(validator) if valueOptions is not None: isValidValue = validator(value, valueOptions) # no specific options @@ -1059,7 +1110,7 @@ def validateLayerInfoVersion3ValueForAttribute(attr, value): return isValidValue -def validateLayerInfoVersion3Data(infoData): +def validateLayerInfoVersion3Data(infoData: dict[str, Any]) -> dict[str, Any]: """ This performs very basic validation of the value for infoData following the UFO 3 layerinfo.plist specification. The results @@ -1083,7 +1134,7 @@ def validateLayerInfoVersion3Data(infoData): # ----------------- -def _glifTreeFromFile(aFile): +def _glifTreeFromFile(aFile: Union[str, bytes, int]) -> ElementType: if etree._have_lxml: tree = etree.parse(aFile, parser=etree.XMLParser(remove_comments=True)) else: @@ -1096,7 +1147,7 @@ def _glifTreeFromFile(aFile): return root -def _glifTreeFromString(aString): +def _glifTreeFromString(aString: Union[str, bytes]) -> ElementType: data = tobytes(aString, encoding="utf-8") try: if etree._have_lxml: @@ -1114,16 +1165,18 @@ def _glifTreeFromString(aString): def _readGlyphFromTree( - tree, - glyphObject=None, - pointPen=None, - formatVersions=GLIFFormatVersion.supported_versions(), - validate=True, -): + tree: ElementType, + glyphObject: Optional[Any] = None, + pointPen: Optional[AbstractPointPen] = None, + formatVersions: Set[GLIFFormatVersion] = GLIFFormatVersion.supported_versions(), + validate: bool = True, +) -> None: # check the format version formatVersionMajor = tree.get("format") - if validate and formatVersionMajor is None: - raise GlifLibError("Unspecified format version in GLIF.") + if formatVersionMajor is None: + if validate: + raise GlifLibError("Unspecified format version in GLIF.") + formatVersionMajor = 0 formatVersionMinor = tree.get("formatMinor", 0) try: formatVersion = GLIFFormatVersion( @@ -1165,14 +1218,21 @@ def _readGlyphFromTree( def _readGlyphFromTreeFormat1( - tree, glyphObject=None, pointPen=None, validate=None, **kwargs -): + tree: ElementType, + glyphObject: Optional[Any] = None, + pointPen: Optional[AbstractPointPen] = None, + validate: bool = False, + **kwargs: Any, +) -> None: # get the name _readName(glyphObject, tree, validate) # populate the sub elements unicodes = [] haveSeenAdvance = haveSeenOutline = haveSeenLib = haveSeenNote = False for element in tree: + if glyphObject is None: + continue + if element.tag == "outline": if validate: if haveSeenOutline: @@ -1185,8 +1245,6 @@ def _readGlyphFromTreeFormat1( raise GlifLibError("Invalid outline structure.") haveSeenOutline = True buildOutlineFormat1(glyphObject, pointPen, element, validate) - elif glyphObject is None: - continue elif element.tag == "advance": if validate and haveSeenAdvance: raise GlifLibError("The advance element occurs more than once.") @@ -1224,8 +1282,12 @@ def _readGlyphFromTreeFormat1( def _readGlyphFromTreeFormat2( - tree, glyphObject=None, pointPen=None, validate=None, formatMinor=0 -): + tree: ElementType, + glyphObject: Optional[Any] = None, + pointPen: Optional[AbstractPointPen] = None, + validate: bool = False, + formatMinor: int = 0, +) -> None: # get the name _readName(glyphObject, tree, validate) # populate the sub elements @@ -1235,8 +1297,10 @@ def _readGlyphFromTreeFormat2( haveSeenAdvance = haveSeenImage = haveSeenOutline = haveSeenLib = haveSeenNote = ( False ) - identifiers = set() + identifiers: set[str] = set() for element in tree: + if glyphObject is None: + continue if element.tag == "outline": if validate: if haveSeenOutline: @@ -1252,8 +1316,6 @@ def _readGlyphFromTreeFormat2( buildOutlineFormat2( glyphObject, pointPen, element, identifiers, validate ) - elif glyphObject is None: - continue elif element.tag == "advance": if validate and haveSeenAdvance: raise GlifLibError("The advance element occurs more than once.") @@ -1324,13 +1386,13 @@ def _readGlyphFromTreeFormat2( _relaxedSetattr(glyphObject, "anchors", anchors) -_READ_GLYPH_FROM_TREE_FUNCS = { +_READ_GLYPH_FROM_TREE_FUNCS: dict[GLIFFormatVersion, Callable[..., Any]] = { GLIFFormatVersion.FORMAT_1_0: _readGlyphFromTreeFormat1, GLIFFormatVersion.FORMAT_2_0: _readGlyphFromTreeFormat2, } -def _readName(glyphObject, root, validate): +def _readName(glyphObject: Optional[Any], root: ElementType, validate: bool) -> None: glyphName = root.get("name") if validate and not glyphName: raise GlifLibError("Empty glyph name in GLIF.") @@ -1338,20 +1400,22 @@ def _readName(glyphObject, root, validate): _relaxedSetattr(glyphObject, "name", glyphName) -def _readAdvance(glyphObject, advance): +def _readAdvance(glyphObject: Optional[Any], advance: ElementType) -> None: width = _number(advance.get("width", 0)) _relaxedSetattr(glyphObject, "width", width) height = _number(advance.get("height", 0)) _relaxedSetattr(glyphObject, "height", height) -def _readNote(glyphObject, note): +def _readNote(glyphObject: Optional[Any], note: ElementType) -> None: + if note.text is None: + return lines = note.text.split("\n") note = "\n".join(line.strip() for line in lines if line.strip()) _relaxedSetattr(glyphObject, "note", note) -def _readLib(glyphObject, lib, validate): +def _readLib(glyphObject: Optional[Any], lib: ElementType, validate: bool) -> None: assert len(lib) == 1 child = lib[0] plist = plistlib.fromtree(child) @@ -1362,7 +1426,7 @@ def _readLib(glyphObject, lib, validate): _relaxedSetattr(glyphObject, "lib", plist) -def _readImage(glyphObject, image, validate): +def _readImage(glyphObject: Optional[Any], image: ElementType, validate: bool) -> None: imageData = dict(image.attrib) for attr, default in _transformationInfo: value = imageData.get(attr, default) @@ -1376,8 +1440,8 @@ def _readImage(glyphObject, image, validate): # GLIF to PointPen # ---------------- -contourAttributesFormat2 = {"identifier"} -componentAttributesFormat1 = { +contourAttributesFormat2: set[str] = {"identifier"} +componentAttributesFormat1: set[str] = { "base", "xScale", "xyScale", @@ -1386,16 +1450,21 @@ componentAttributesFormat1 = { "xOffset", "yOffset", } -componentAttributesFormat2 = componentAttributesFormat1 | {"identifier"} -pointAttributesFormat1 = {"x", "y", "type", "smooth", "name"} -pointAttributesFormat2 = pointAttributesFormat1 | {"identifier"} -pointSmoothOptions = {"no", "yes"} -pointTypeOptions = {"move", "line", "offcurve", "curve", "qcurve"} +componentAttributesFormat2: set[str] = componentAttributesFormat1 | {"identifier"} +pointAttributesFormat1: set[str] = {"x", "y", "type", "smooth", "name"} +pointAttributesFormat2: set[str] = pointAttributesFormat1 | {"identifier"} +pointSmoothOptions: set[str] = {"no", "yes"} +pointTypeOptions: set[str] = {"move", "line", "offcurve", "curve", "qcurve"} # format 1 -def buildOutlineFormat1(glyphObject, pen, outline, validate): +def buildOutlineFormat1( + glyphObject: Any, + pen: Optional[AbstractPointPen], + outline: Iterable[ElementType], + validate: bool, +) -> None: anchors = [] for element in outline: if element.tag == "contour": @@ -1419,7 +1488,7 @@ def buildOutlineFormat1(glyphObject, pen, outline, validate): _relaxedSetattr(glyphObject, "anchors", anchors) -def _buildAnchorFormat1(point, validate): +def _buildAnchorFormat1(point: ElementType, validate: bool) -> Optional[dict[str, Any]]: if point.get("type") != "move": return None name = point.get("name") @@ -1429,15 +1498,19 @@ def _buildAnchorFormat1(point, validate): y = point.get("y") if validate and x is None: raise GlifLibError("Required x attribute is missing in point element.") + assert x is not None if validate and y is None: raise GlifLibError("Required y attribute is missing in point element.") + assert y is not None x = _number(x) y = _number(y) anchor = dict(x=x, y=y, name=name) return anchor -def _buildOutlineContourFormat1(pen, contour, validate): +def _buildOutlineContourFormat1( + pen: AbstractPointPen, contour: ElementType, validate: bool +) -> None: if validate and contour.attrib: raise GlifLibError("Unknown attributes in contour element.") pen.beginPath() @@ -1452,7 +1525,9 @@ def _buildOutlineContourFormat1(pen, contour, validate): pen.endPath() -def _buildOutlinePointsFormat1(pen, contour): +def _buildOutlinePointsFormat1( + pen: AbstractPointPen, contour: list[dict[str, Any]] +) -> None: for point in contour: x = point["x"] y = point["y"] @@ -1462,7 +1537,9 @@ def _buildOutlinePointsFormat1(pen, contour): pen.addPoint((x, y), segmentType=segmentType, smooth=smooth, name=name) -def _buildOutlineComponentFormat1(pen, component, validate): +def _buildOutlineComponentFormat1( + pen: AbstractPointPen, component: ElementType, validate: bool +) -> None: if validate: if len(component): raise GlifLibError("Unknown child elements of component element.") @@ -1472,21 +1549,26 @@ def _buildOutlineComponentFormat1(pen, component, validate): baseGlyphName = component.get("base") if validate and baseGlyphName is None: raise GlifLibError("The base attribute is not defined in the component.") - transformation = [] - for attr, default in _transformationInfo: - value = component.get(attr) - if value is None: - value = default - else: - value = _number(value) - transformation.append(value) - pen.addComponent(baseGlyphName, tuple(transformation)) + assert baseGlyphName is not None + transformation = tuple( + _number(component.get(attr) or default) for attr, default in _transformationInfo + ) + transformation = cast( + tuple[float, float, float, float, float, float], transformation + ) + pen.addComponent(baseGlyphName, transformation) # format 2 -def buildOutlineFormat2(glyphObject, pen, outline, identifiers, validate): +def buildOutlineFormat2( + glyphObject: Any, + pen: AbstractPointPen, + outline: Iterable[ElementType], + identifiers: set[str], + validate: bool, +) -> None: for element in outline: if element.tag == "contour": _buildOutlineContourFormat2(pen, element, identifiers, validate) @@ -1496,7 +1578,9 @@ def buildOutlineFormat2(glyphObject, pen, outline, identifiers, validate): raise GlifLibError("Unknown element in outline element: %s" % element.tag) -def _buildOutlineContourFormat2(pen, contour, identifiers, validate): +def _buildOutlineContourFormat2( + pen: AbstractPointPen, contour: ElementType, identifiers: set[str], validate: bool +) -> None: if validate: for attr in contour.attrib.keys(): if attr not in contourAttributesFormat2: @@ -1529,7 +1613,12 @@ def _buildOutlineContourFormat2(pen, contour, identifiers, validate): pen.endPath() -def _buildOutlinePointsFormat2(pen, contour, identifiers, validate): +def _buildOutlinePointsFormat2( + pen: AbstractPointPen, + contour: list[dict[str, Any]], + identifiers: set[str], + validate: bool, +) -> None: for point in contour: x = point["x"] y = point["y"] @@ -1562,7 +1651,9 @@ def _buildOutlinePointsFormat2(pen, contour, identifiers, validate): ) -def _buildOutlineComponentFormat2(pen, component, identifiers, validate): +def _buildOutlineComponentFormat2( + pen: AbstractPointPen, component: ElementType, identifiers: set[str], validate: bool +) -> None: if validate: if len(component): raise GlifLibError("Unknown child elements of component element.") @@ -1572,14 +1663,13 @@ def _buildOutlineComponentFormat2(pen, component, identifiers, validate): baseGlyphName = component.get("base") if validate and baseGlyphName is None: raise GlifLibError("The base attribute is not defined in the component.") - transformation = [] - for attr, default in _transformationInfo: - value = component.get(attr) - if value is None: - value = default - else: - value = _number(value) - transformation.append(value) + assert baseGlyphName is not None + transformation = tuple( + _number(component.get(attr) or default) for attr, default in _transformationInfo + ) + transformation = cast( + tuple[float, float, float, float, float, float], transformation + ) identifier = component.get("identifier") if identifier is not None: if validate: @@ -1591,9 +1681,9 @@ def _buildOutlineComponentFormat2(pen, component, identifiers, validate): raise GlifLibError("The identifier %s is not valid." % identifier) identifiers.add(identifier) try: - pen.addComponent(baseGlyphName, tuple(transformation), identifier=identifier) + pen.addComponent(baseGlyphName, transformation, identifier=identifier) except TypeError: - pen.addComponent(baseGlyphName, tuple(transformation)) + pen.addComponent(baseGlyphName, transformation) warn( "The addComponent method needs an identifier kwarg. The component's identifier value has been discarded.", DeprecationWarning, @@ -1712,14 +1802,14 @@ def _validateAndMassagePointStructures( # --------------------- -def _relaxedSetattr(object, attr, value): +def _relaxedSetattr(object: Any, attr: str, value: Any) -> None: try: setattr(object, attr, value) except AttributeError: pass -def _number(s): +def _number(s: Union[str, int, float]) -> IntFloat: """ Given a numeric string, return an integer or a float, whichever the string indicates. _number("1") will return the integer 1, @@ -1735,7 +1825,7 @@ def _number(s): GlifLibError: Could not convert a to an int or float. """ try: - n = int(s) + n: IntFloat = int(s) return n except ValueError: pass @@ -1758,21 +1848,21 @@ class _DoneParsing(Exception): class _BaseParser: - def __init__(self): - self._elementStack = [] + def __init__(self) -> None: + self._elementStack: list[str] = [] - def parse(self, text): + def parse(self, text: bytes): from xml.parsers.expat import ParserCreate parser = ParserCreate() parser.StartElementHandler = self.startElementHandler parser.EndElementHandler = self.endElementHandler - parser.Parse(text, 1) + parser.Parse(text, True) - def startElementHandler(self, name, attrs): + def startElementHandler(self, name: str, attrs: Any) -> None: self._elementStack.append(name) - def endElementHandler(self, name): + def endElementHandler(self, name: str) -> None: other = self._elementStack.pop(-1) assert other == name @@ -1780,7 +1870,7 @@ class _BaseParser: # unicodes -def _fetchUnicodes(glif): +def _fetchUnicodes(glif: bytes) -> list[int]: """ Get a list of unicodes listed in glif. """ @@ -1790,11 +1880,11 @@ def _fetchUnicodes(glif): class _FetchUnicodesParser(_BaseParser): - def __init__(self): - self.unicodes = [] + def __init__(self) -> None: + self.unicodes: list[int] = [] super().__init__() - def startElementHandler(self, name, attrs): + def startElementHandler(self, name: str, attrs: dict[str, str]) -> None: if ( name == "unicode" and self._elementStack @@ -1803,9 +1893,9 @@ class _FetchUnicodesParser(_BaseParser): value = attrs.get("hex") if value is not None: try: - value = int(value, 16) - if value not in self.unicodes: - self.unicodes.append(value) + intValue = int(value, 16) + if intValue not in self.unicodes: + self.unicodes.append(intValue) except ValueError: pass super().startElementHandler(name, attrs) @@ -1814,7 +1904,7 @@ class _FetchUnicodesParser(_BaseParser): # image -def _fetchImageFileName(glif): +def _fetchImageFileName(glif: bytes) -> Optional[str]: """ The image file name (if any) from glif. """ @@ -1827,11 +1917,11 @@ def _fetchImageFileName(glif): class _FetchImageFileNameParser(_BaseParser): - def __init__(self): - self.fileName = None + def __init__(self) -> None: + self.fileName: Optional[str] = None super().__init__() - def startElementHandler(self, name, attrs): + def startElementHandler(self, name: str, attrs: dict[str, str]) -> None: if name == "image" and self._elementStack and self._elementStack[-1] == "glyph": self.fileName = attrs.get("fileName") raise _DoneParsing @@ -1841,7 +1931,7 @@ class _FetchImageFileNameParser(_BaseParser): # component references -def _fetchComponentBases(glif): +def _fetchComponentBases(glif: bytes) -> list[str]: """ Get a list of component base glyphs listed in glif. """ @@ -1854,11 +1944,11 @@ def _fetchComponentBases(glif): class _FetchComponentBasesParser(_BaseParser): - def __init__(self): - self.bases = [] + def __init__(self) -> None: + self.bases: list[str] = [] super().__init__() - def startElementHandler(self, name, attrs): + def startElementHandler(self, name: str, attrs: dict[str, str]) -> None: if ( name == "component" and self._elementStack @@ -1869,7 +1959,7 @@ class _FetchComponentBasesParser(_BaseParser): self.bases.append(base) super().startElementHandler(name, attrs) - def endElementHandler(self, name): + def endElementHandler(self, name: str) -> None: if name == "outline": raise _DoneParsing super().endElementHandler(name) @@ -1879,7 +1969,7 @@ class _FetchComponentBasesParser(_BaseParser): # GLIF Point Pen # -------------- -_transformationInfo = [ +_transformationInfo: list[tuple[str, int]] = [ # field name, default value ("xScale", 1), ("xyScale", 0), @@ -1896,15 +1986,21 @@ class GLIFPointPen(AbstractPointPen): part of .glif files. """ - def __init__(self, element, formatVersion=None, identifiers=None, validate=True): + def __init__( + self, + element: ElementType, + formatVersion: Optional[FormatVersion] = None, + identifiers: Optional[set[str]] = None, + validate: bool = True, + ) -> None: if identifiers is None: identifiers = set() - self.formatVersion = GLIFFormatVersion(formatVersion) + self.formatVersion = normalizeFormatVersion(formatVersion, GLIFFormatVersion) self.identifiers = identifiers self.outline = element self.contour = None self.prevOffCurveCount = 0 - self.prevPointTypes = [] + self.prevPointTypes: list[str] = [] self.validate = validate def beginPath(self, identifier=None, **kwargs): diff --git a/contrib/python/fonttools/fontTools/ufoLib/kerning.py b/contrib/python/fonttools/fontTools/ufoLib/kerning.py index 5c84dd720af..01ae55c062c 100644 --- a/contrib/python/fonttools/fontTools/ufoLib/kerning.py +++ b/contrib/python/fonttools/fontTools/ufoLib/kerning.py @@ -1,6 +1,20 @@ +from __future__ import annotations + +from typing import Optional + +from fontTools.annotations import KerningPair, KerningDict, KerningGroups, IntFloat + +StrDict = dict[str, str] + + def lookupKerningValue( - pair, kerning, groups, fallback=0, glyphToFirstGroup=None, glyphToSecondGroup=None -): + pair: KerningPair, + kerning: KerningDict, + groups: KerningGroups, + fallback: IntFloat = 0, + glyphToFirstGroup: Optional[StrDict] = None, + glyphToSecondGroup: Optional[StrDict] = None, +) -> IntFloat: """Retrieve the kerning value (if any) between a pair of elements. The elments can be either individual glyphs (by name) or kerning @@ -72,11 +86,12 @@ def lookupKerningValue( # quickly check to see if the pair is in the kerning dictionary if pair in kerning: return kerning[pair] + # ensure both or no glyph-to-group mappings are provided + if (glyphToFirstGroup is None) != (glyphToSecondGroup is None): + raise ValueError( + "Must provide both 'glyphToFirstGroup' and 'glyphToSecondGroup', or neither." + ) # create glyph to group mapping - if glyphToFirstGroup is not None: - assert glyphToSecondGroup is not None - if glyphToSecondGroup is not None: - assert glyphToFirstGroup is not None if glyphToFirstGroup is None: glyphToFirstGroup = {} glyphToSecondGroup = {} @@ -87,25 +102,30 @@ def lookupKerningValue( elif group.startswith("public.kern2."): for glyph in groupMembers: glyphToSecondGroup[glyph] = group + # ensure type safety for mappings + assert glyphToFirstGroup is not None + assert glyphToSecondGroup is not None # get group names and make sure first and second are glyph names first, second = pair firstGroup = secondGroup = None if first.startswith("public.kern1."): firstGroup = first - first = None + firstGlyph = None else: firstGroup = glyphToFirstGroup.get(first) + firstGlyph = first if second.startswith("public.kern2."): secondGroup = second - second = None + secondGlyph = None else: secondGroup = glyphToSecondGroup.get(second) + secondGlyph = second # make an ordered list of pairs to look up pairs = [ - (first, second), - (first, secondGroup), - (firstGroup, second), - (firstGroup, secondGroup), + (a, b) + for a in (firstGlyph, firstGroup) + for b in (secondGlyph, secondGroup) + if a is not None and b is not None ] # look up the pairs and return any matches for pair in pairs: diff --git a/contrib/python/fonttools/fontTools/ufoLib/utils.py b/contrib/python/fonttools/fontTools/ufoLib/utils.py index 45ae5e812eb..3930d8931da 100644 --- a/contrib/python/fonttools/fontTools/ufoLib/utils.py +++ b/contrib/python/fonttools/fontTools/ufoLib/utils.py @@ -5,14 +5,22 @@ define the :py:obj:`.deprecated` decorator that is used elsewhere in the module. """ -import warnings +from __future__ import annotations + +from typing import Optional, Type, TypeVar, Union, cast +from collections.abc import Callable +import enum import functools +import warnings +F = TypeVar("F", bound=Callable[..., object]) +FormatVersion = TypeVar("FormatVersion", bound="BaseFormatVersion") +FormatVersionInput = Optional[Union[int, tuple[int, int], FormatVersion]] numberTypes = (int, float) -def deprecated(msg=""): +def deprecated(msg: str = "") -> Callable[[F], F]: """Decorator factory to mark functions as deprecated with given message. >>> @deprecated("Enough!") @@ -25,7 +33,7 @@ def deprecated(msg=""): True """ - def deprecated_decorator(func): + def deprecated_decorator(func: F) -> F: @functools.wraps(func) def wrapper(*args, **kwargs): warnings.warn( @@ -35,41 +43,61 @@ def deprecated(msg=""): ) return func(*args, **kwargs) - return wrapper + return cast(F, wrapper) return deprecated_decorator -# To be mixed with enum.Enum in UFOFormatVersion and GLIFFormatVersion -class _VersionTupleEnumMixin: +def normalizeFormatVersion( + value: FormatVersionInput, cls: Type[FormatVersion] +) -> FormatVersion: + # Needed for type safety of UFOFormatVersion and GLIFFormatVersion input + if value is None: + return cls.default() + if isinstance(value, cls): + return value + if isinstance(value, int): + return cls((value, 0)) + if isinstance(value, tuple) and len(value) == 2: + return cls(value) + raise ValueError(f"Unsupported format version: {value!r}") + + +# Base class for UFOFormatVersion and GLIFFormatVersion +class BaseFormatVersion(tuple[int, int], enum.Enum): + value: tuple[int, int] + + def __new__(cls: Type[FormatVersion], value: tuple[int, int]) -> BaseFormatVersion: + return super().__new__(cls, value) + @property - def major(self): + def major(self) -> int: return self.value[0] @property - def minor(self): + def minor(self) -> int: return self.value[1] @classmethod - def _missing_(cls, value): + def _missing_(cls, value: object) -> BaseFormatVersion: # allow to initialize a version enum from a single (major) integer if isinstance(value, int): return cls((value, 0)) # or from None to obtain the current default version if value is None: return cls.default() - return super()._missing_(value) + raise ValueError(f"{value!r} is not a valid {cls.__name__}") - def __str__(self): + def __str__(self) -> str: return f"{self.major}.{self.minor}" @classmethod - def default(cls): + def default(cls: Type[FormatVersion]) -> FormatVersion: # get the latest defined version (i.e. the max of all versions) return max(cls.__members__.values()) @classmethod - def supported_versions(cls): + def supported_versions(cls: Type[FormatVersion]) -> frozenset[FormatVersion]: return frozenset(cls.__members__.values()) diff --git a/contrib/python/fonttools/fontTools/ufoLib/validators.py b/contrib/python/fonttools/fontTools/ufoLib/validators.py index 7d87cc93063..54c65fb6cf6 100644 --- a/contrib/python/fonttools/fontTools/ufoLib/validators.py +++ b/contrib/python/fonttools/fontTools/ufoLib/validators.py @@ -1,18 +1,25 @@ """Various low level data validators.""" +from __future__ import annotations + import calendar -from collections.abc import Mapping +from collections.abc import Mapping, Sequence from io import open import fontTools.misc.filesystem as fs +from typing import Any, Type, Optional, Union + +from fontTools.annotations import IntFloat from fontTools.ufoLib.utils import numberTypes +GenericDict = dict[str, tuple[Union[type, tuple[Type[Any], ...]], bool]] + # ------- # Generic # ------- -def isDictEnough(value): +def isDictEnough(value: Any) -> bool: """ Some objects will likely come in that aren't dicts but are dict-ish enough. @@ -25,14 +32,14 @@ def isDictEnough(value): return True -def genericTypeValidator(value, typ): +def genericTypeValidator(value: Any, typ: Type[Any]) -> bool: """ Generic. (Added at version 2.) """ return isinstance(value, typ) -def genericIntListValidator(values, validValues): +def genericIntListValidator(values: Any, validValues: Sequence[int]) -> bool: """ Generic. (Added at version 2.) """ @@ -48,7 +55,7 @@ def genericIntListValidator(values, validValues): return True -def genericNonNegativeIntValidator(value): +def genericNonNegativeIntValidator(value: Any) -> bool: """ Generic. (Added at version 3.) """ @@ -59,7 +66,7 @@ def genericNonNegativeIntValidator(value): return True -def genericNonNegativeNumberValidator(value): +def genericNonNegativeNumberValidator(value: Any) -> bool: """ Generic. (Added at version 3.) """ @@ -70,7 +77,7 @@ def genericNonNegativeNumberValidator(value): return True -def genericDictValidator(value, prototype): +def genericDictValidator(value: Any, prototype: GenericDict) -> bool: """ Generic. (Added at version 3.) """ @@ -104,7 +111,7 @@ def genericDictValidator(value, prototype): # Data Validators -def fontInfoStyleMapStyleNameValidator(value): +def fontInfoStyleMapStyleNameValidator(value: Any) -> bool: """ Version 2+. """ @@ -112,7 +119,7 @@ def fontInfoStyleMapStyleNameValidator(value): return value in options -def fontInfoOpenTypeGaspRangeRecordsValidator(value): +def fontInfoOpenTypeGaspRangeRecordsValidator(value: Any) -> bool: """ Version 3+. """ @@ -121,7 +128,9 @@ def fontInfoOpenTypeGaspRangeRecordsValidator(value): if len(value) == 0: return True validBehaviors = [0, 1, 2, 3] - dictPrototype = dict(rangeMaxPPEM=(int, True), rangeGaspBehavior=(list, True)) + dictPrototype: GenericDict = dict( + rangeMaxPPEM=(int, True), rangeGaspBehavior=(list, True) + ) ppemOrder = [] for rangeRecord in value: if not genericDictValidator(rangeRecord, dictPrototype): @@ -140,7 +149,7 @@ def fontInfoOpenTypeGaspRangeRecordsValidator(value): return True -def fontInfoOpenTypeHeadCreatedValidator(value): +def fontInfoOpenTypeHeadCreatedValidator(value: Any) -> bool: """ Version 2+. """ @@ -152,61 +161,61 @@ def fontInfoOpenTypeHeadCreatedValidator(value): return False if value.count(" ") != 1: return False - date, time = value.split(" ") - if date.count("/") != 2: + strDate, strTime = value.split(" ") + if strDate.count("/") != 2: return False - if time.count(":") != 2: + if strTime.count(":") != 2: return False # date - year, month, day = date.split("/") - if len(year) != 4: + strYear, strMonth, strDay = strDate.split("/") + if len(strYear) != 4: return False - if len(month) != 2: + if len(strMonth) != 2: return False - if len(day) != 2: + if len(strDay) != 2: return False try: - year = int(year) - month = int(month) - day = int(day) + intYear = int(strYear) + intMonth = int(strMonth) + intDay = int(strDay) except ValueError: return False - if month < 1 or month > 12: + if intMonth < 1 or intMonth > 12: return False - monthMaxDay = calendar.monthrange(year, month)[1] - if day < 1 or day > monthMaxDay: + monthMaxDay = calendar.monthrange(intYear, intMonth)[1] + if intDay < 1 or intDay > monthMaxDay: return False # time - hour, minute, second = time.split(":") - if len(hour) != 2: + strHour, strMinute, strSecond = strTime.split(":") + if len(strHour) != 2: return False - if len(minute) != 2: + if len(strMinute) != 2: return False - if len(second) != 2: + if len(strSecond) != 2: return False try: - hour = int(hour) - minute = int(minute) - second = int(second) + intHour = int(strHour) + intMinute = int(strMinute) + intSecond = int(strSecond) except ValueError: return False - if hour < 0 or hour > 23: + if intHour < 0 or intHour > 23: return False - if minute < 0 or minute > 59: + if intMinute < 0 or intMinute > 59: return False - if second < 0 or second > 59: + if intSecond < 0 or intSecond > 59: return False # fallback return True -def fontInfoOpenTypeNameRecordsValidator(value): +def fontInfoOpenTypeNameRecordsValidator(value: Any) -> bool: """ Version 3+. """ if not isinstance(value, list): return False - dictPrototype = dict( + dictPrototype: GenericDict = dict( nameID=(int, True), platformID=(int, True), encodingID=(int, True), @@ -219,7 +228,7 @@ def fontInfoOpenTypeNameRecordsValidator(value): return True -def fontInfoOpenTypeOS2WeightClassValidator(value): +def fontInfoOpenTypeOS2WeightClassValidator(value: Any) -> bool: """ Version 2+. """ @@ -230,7 +239,7 @@ def fontInfoOpenTypeOS2WeightClassValidator(value): return True -def fontInfoOpenTypeOS2WidthClassValidator(value): +def fontInfoOpenTypeOS2WidthClassValidator(value: Any) -> bool: """ Version 2+. """ @@ -243,7 +252,7 @@ def fontInfoOpenTypeOS2WidthClassValidator(value): return True -def fontInfoVersion2OpenTypeOS2PanoseValidator(values): +def fontInfoVersion2OpenTypeOS2PanoseValidator(values: Any) -> bool: """ Version 2. """ @@ -258,7 +267,7 @@ def fontInfoVersion2OpenTypeOS2PanoseValidator(values): return True -def fontInfoVersion3OpenTypeOS2PanoseValidator(values): +def fontInfoVersion3OpenTypeOS2PanoseValidator(values: Any) -> bool: """ Version 3+. """ @@ -275,7 +284,7 @@ def fontInfoVersion3OpenTypeOS2PanoseValidator(values): return True -def fontInfoOpenTypeOS2FamilyClassValidator(values): +def fontInfoOpenTypeOS2FamilyClassValidator(values: Any) -> bool: """ Version 2+. """ @@ -294,7 +303,7 @@ def fontInfoOpenTypeOS2FamilyClassValidator(values): return True -def fontInfoPostscriptBluesValidator(values): +def fontInfoPostscriptBluesValidator(values: Any) -> bool: """ Version 2+. """ @@ -310,7 +319,7 @@ def fontInfoPostscriptBluesValidator(values): return True -def fontInfoPostscriptOtherBluesValidator(values): +def fontInfoPostscriptOtherBluesValidator(values: Any) -> bool: """ Version 2+. """ @@ -326,7 +335,7 @@ def fontInfoPostscriptOtherBluesValidator(values): return True -def fontInfoPostscriptStemsValidator(values): +def fontInfoPostscriptStemsValidator(values: Any) -> bool: """ Version 2+. """ @@ -340,7 +349,7 @@ def fontInfoPostscriptStemsValidator(values): return True -def fontInfoPostscriptWindowsCharacterSetValidator(value): +def fontInfoPostscriptWindowsCharacterSetValidator(value: Any) -> bool: """ Version 2+. """ @@ -350,21 +359,21 @@ def fontInfoPostscriptWindowsCharacterSetValidator(value): return True -def fontInfoWOFFMetadataUniqueIDValidator(value): +def fontInfoWOFFMetadataUniqueIDValidator(value: Any) -> bool: """ Version 3+. """ - dictPrototype = dict(id=(str, True)) + dictPrototype: GenericDict = dict(id=(str, True)) if not genericDictValidator(value, dictPrototype): return False return True -def fontInfoWOFFMetadataVendorValidator(value): +def fontInfoWOFFMetadataVendorValidator(value: Any) -> bool: """ Version 3+. """ - dictPrototype = { + dictPrototype: GenericDict = { "name": (str, True), "url": (str, False), "dir": (str, False), @@ -377,11 +386,11 @@ def fontInfoWOFFMetadataVendorValidator(value): return True -def fontInfoWOFFMetadataCreditsValidator(value): +def fontInfoWOFFMetadataCreditsValidator(value: Any) -> bool: """ Version 3+. """ - dictPrototype = dict(credits=(list, True)) + dictPrototype: GenericDict = dict(credits=(list, True)) if not genericDictValidator(value, dictPrototype): return False if not len(value["credits"]): @@ -401,11 +410,11 @@ def fontInfoWOFFMetadataCreditsValidator(value): return True -def fontInfoWOFFMetadataDescriptionValidator(value): +def fontInfoWOFFMetadataDescriptionValidator(value: Any) -> bool: """ Version 3+. """ - dictPrototype = dict(url=(str, False), text=(list, True)) + dictPrototype: GenericDict = dict(url=(str, False), text=(list, True)) if not genericDictValidator(value, dictPrototype): return False for text in value["text"]: @@ -414,11 +423,13 @@ def fontInfoWOFFMetadataDescriptionValidator(value): return True -def fontInfoWOFFMetadataLicenseValidator(value): +def fontInfoWOFFMetadataLicenseValidator(value: Any) -> bool: """ Version 3+. """ - dictPrototype = dict(url=(str, False), text=(list, False), id=(str, False)) + dictPrototype: GenericDict = dict( + url=(str, False), text=(list, False), id=(str, False) + ) if not genericDictValidator(value, dictPrototype): return False if "text" in value: @@ -428,11 +439,11 @@ def fontInfoWOFFMetadataLicenseValidator(value): return True -def fontInfoWOFFMetadataTrademarkValidator(value): +def fontInfoWOFFMetadataTrademarkValidator(value: Any) -> bool: """ Version 3+. """ - dictPrototype = dict(text=(list, True)) + dictPrototype: GenericDict = dict(text=(list, True)) if not genericDictValidator(value, dictPrototype): return False for text in value["text"]: @@ -441,11 +452,11 @@ def fontInfoWOFFMetadataTrademarkValidator(value): return True -def fontInfoWOFFMetadataCopyrightValidator(value): +def fontInfoWOFFMetadataCopyrightValidator(value: Any) -> bool: """ Version 3+. """ - dictPrototype = dict(text=(list, True)) + dictPrototype: GenericDict = dict(text=(list, True)) if not genericDictValidator(value, dictPrototype): return False for text in value["text"]: @@ -454,11 +465,15 @@ def fontInfoWOFFMetadataCopyrightValidator(value): return True -def fontInfoWOFFMetadataLicenseeValidator(value): +def fontInfoWOFFMetadataLicenseeValidator(value: Any) -> bool: """ Version 3+. """ - dictPrototype = {"name": (str, True), "dir": (str, False), "class": (str, False)} + dictPrototype: GenericDict = { + "name": (str, True), + "dir": (str, False), + "class": (str, False), + } if not genericDictValidator(value, dictPrototype): return False if "dir" in value and value.get("dir") not in ("ltr", "rtl"): @@ -466,11 +481,11 @@ def fontInfoWOFFMetadataLicenseeValidator(value): return True -def fontInfoWOFFMetadataTextValue(value): +def fontInfoWOFFMetadataTextValue(value: Any) -> bool: """ Version 3+. """ - dictPrototype = { + dictPrototype: GenericDict = { "text": (str, True), "language": (str, False), "dir": (str, False), @@ -483,7 +498,7 @@ def fontInfoWOFFMetadataTextValue(value): return True -def fontInfoWOFFMetadataExtensionsValidator(value): +def fontInfoWOFFMetadataExtensionsValidator(value: Any) -> bool: """ Version 3+. """ @@ -497,11 +512,13 @@ def fontInfoWOFFMetadataExtensionsValidator(value): return True -def fontInfoWOFFMetadataExtensionValidator(value): +def fontInfoWOFFMetadataExtensionValidator(value: Any) -> bool: """ Version 3+. """ - dictPrototype = dict(names=(list, False), items=(list, True), id=(str, False)) + dictPrototype: GenericDict = dict( + names=(list, False), items=(list, True), id=(str, False) + ) if not genericDictValidator(value, dictPrototype): return False if "names" in value: @@ -514,11 +531,13 @@ def fontInfoWOFFMetadataExtensionValidator(value): return True -def fontInfoWOFFMetadataExtensionItemValidator(value): +def fontInfoWOFFMetadataExtensionItemValidator(value: Any) -> bool: """ Version 3+. """ - dictPrototype = dict(id=(str, False), names=(list, True), values=(list, True)) + dictPrototype: GenericDict = dict( + id=(str, False), names=(list, True), values=(list, True) + ) if not genericDictValidator(value, dictPrototype): return False for name in value["names"]: @@ -530,11 +549,11 @@ def fontInfoWOFFMetadataExtensionItemValidator(value): return True -def fontInfoWOFFMetadataExtensionNameValidator(value): +def fontInfoWOFFMetadataExtensionNameValidator(value: Any) -> bool: """ Version 3+. """ - dictPrototype = { + dictPrototype: GenericDict = { "text": (str, True), "language": (str, False), "dir": (str, False), @@ -547,11 +566,11 @@ def fontInfoWOFFMetadataExtensionNameValidator(value): return True -def fontInfoWOFFMetadataExtensionValueValidator(value): +def fontInfoWOFFMetadataExtensionValueValidator(value: Any) -> bool: """ Version 3+. """ - dictPrototype = { + dictPrototype: GenericDict = { "text": (str, True), "language": (str, False), "dir": (str, False), @@ -569,7 +588,7 @@ def fontInfoWOFFMetadataExtensionValueValidator(value): # ---------- -def guidelinesValidator(value, identifiers=None): +def guidelinesValidator(value: Any, identifiers: Optional[set[str]] = None) -> bool: """ Version 3+. """ @@ -588,7 +607,7 @@ def guidelinesValidator(value, identifiers=None): return True -_guidelineDictPrototype = dict( +_guidelineDictPrototype: GenericDict = dict( x=((int, float), False), y=((int, float), False), angle=((int, float), False), @@ -598,7 +617,7 @@ _guidelineDictPrototype = dict( ) -def guidelineValidator(value): +def guidelineValidator(value: Any) -> bool: """ Version 3+. """ @@ -639,7 +658,7 @@ def guidelineValidator(value): # ------- -def anchorsValidator(value, identifiers=None): +def anchorsValidator(value: Any, identifiers: Optional[set[str]] = None) -> bool: """ Version 3+. """ @@ -658,7 +677,7 @@ def anchorsValidator(value, identifiers=None): return True -_anchorDictPrototype = dict( +_anchorDictPrototype: GenericDict = dict( x=((int, float), False), y=((int, float), False), name=(str, False), @@ -667,7 +686,7 @@ _anchorDictPrototype = dict( ) -def anchorValidator(value): +def anchorValidator(value: Any) -> bool: """ Version 3+. """ @@ -694,7 +713,7 @@ def anchorValidator(value): # ---------- -def identifierValidator(value): +def identifierValidator(value: Any) -> bool: """ Version 3+. @@ -714,8 +733,8 @@ def identifierValidator(value): if len(value) > 100: return False for c in value: - c = ord(c) - if c < validCharactersMin or c > validCharactersMax: + i = ord(c) + if i < validCharactersMin or i > validCharactersMax: return False return True @@ -725,7 +744,7 @@ def identifierValidator(value): # ----- -def colorValidator(value): +def colorValidator(value: Any) -> bool: """ Version 3+. @@ -776,22 +795,21 @@ def colorValidator(value): for part in parts: part = part.strip() converted = False + number: IntFloat try: - part = int(part) + number = int(part) converted = True except ValueError: pass if not converted: try: - part = float(part) + number = float(part) converted = True except ValueError: pass if not converted: return False - if part < 0: - return False - if part > 1: + if not 0 <= number <= 1: return False return True @@ -800,9 +818,9 @@ def colorValidator(value): # image # ----- -pngSignature = b"\x89PNG\r\n\x1a\n" +pngSignature: bytes = b"\x89PNG\r\n\x1a\n" -_imageDictPrototype = dict( +_imageDictPrototype: GenericDict = dict( fileName=(str, True), xScale=((int, float), False), xyScale=((int, float), False), @@ -830,7 +848,11 @@ def imageValidator(value): return True -def pngValidator(path=None, data=None, fileObj=None): +def pngValidator( + path: Optional[str] = None, + data: Optional[bytes] = None, + fileObj: Optional[Any] = None, +) -> tuple[bool, Any]: """ Version 3+. @@ -856,7 +878,9 @@ def pngValidator(path=None, data=None, fileObj=None): # ------------------- -def layerContentsValidator(value, ufoPathOrFileSystem): +def layerContentsValidator( + value: Any, ufoPathOrFileSystem: Union[str, fs.base.FS] +) -> tuple[bool, Optional[str]]: """ Check the validity of layercontents.plist. Version 3+. @@ -930,7 +954,7 @@ def layerContentsValidator(value, ufoPathOrFileSystem): # ------------ -def groupsValidator(value): +def groupsValidator(value: Any) -> tuple[bool, Optional[str]]: """ Check the validity of the groups. Version 3+ (though it's backwards compatible with UFO 1 and UFO 2). @@ -977,8 +1001,8 @@ def groupsValidator(value): bogusFormatMessage = "The group data is not in the correct format." if not isDictEnough(value): return False, bogusFormatMessage - firstSideMapping = {} - secondSideMapping = {} + firstSideMapping: dict[str, str] = {} + secondSideMapping: dict[str, str] = {} for groupName, glyphList in value.items(): if not isinstance(groupName, (str)): return False, bogusFormatMessage @@ -1022,7 +1046,7 @@ def groupsValidator(value): # ------------- -def kerningValidator(data): +def kerningValidator(data: Any) -> tuple[bool, Optional[str]]: """ Check the validity of the kerning data structure. Version 3+ (though it's backwards compatible with UFO 1 and UFO 2). @@ -1068,7 +1092,7 @@ def kerningValidator(data): _bogusLibFormatMessage = "The lib data is not in the correct format: %s" -def fontLibValidator(value): +def fontLibValidator(value: Any) -> tuple[bool, Optional[str]]: """ Check the validity of the lib. Version 3+ (though it's backwards compatible with UFO 1 and UFO 2). @@ -1140,7 +1164,7 @@ def fontLibValidator(value): # -------- -def glyphLibValidator(value): +def glyphLibValidator(value: Any) -> tuple[bool, Optional[str]]: """ Check the validity of the lib. Version 3+ (though it's backwards compatible with UFO 1 and UFO 2). diff --git a/contrib/python/fonttools/fontTools/varLib/avar/__init__.py b/contrib/python/fonttools/fontTools/varLib/avar/__init__.py new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/contrib/python/fonttools/fontTools/varLib/avar/__init__.py diff --git a/contrib/python/fonttools/fontTools/varLib/avar/__main__.py b/contrib/python/fonttools/fontTools/varLib/avar/__main__.py new file mode 100644 index 00000000000..1bb27dbfd53 --- /dev/null +++ b/contrib/python/fonttools/fontTools/varLib/avar/__main__.py @@ -0,0 +1,72 @@ +import logging + +log = logging.getLogger("fontTools.varLib.avar") + + +def main(args=None): + from fontTools.ttLib import TTFont + from fontTools.misc.cliTools import makeOutputFileName + from fontTools import configLogger + import argparse + import sys + + print( + "WARNING: This script is deprecated. Use `fonttools varLib.avar.build` " + "or `fonttools varLib.avar.unbuild` instead.\n", + file=sys.stderr, + ) + + if args is None: + args = sys.argv[1:] + + parser = argparse.ArgumentParser( + "fonttools varLib.avar", + description="Add `avar` table from designspace file to variable font.", + ) + parser.add_argument("font", metavar="varfont.ttf", help="Variable-font file.") + parser.add_argument( + "designspace", + metavar="family.designspace", + help="Designspace file.", + nargs="?", + default=None, + ) + parser.add_argument( + "-o", + "--output-file", + type=str, + help="Output font file name.", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="Run more verbosely." + ) + + options = parser.parse_args(args) + + configLogger(level=("INFO" if options.verbose else "WARNING")) + + font = TTFont(options.font) + + if options.designspace is None: + from .unbuild import unbuild + + unbuild(font) + return 0 + + from .build import build + + build(font, options.designspace) + + if options.output_file is None: + outfile = makeOutputFileName(options.font, overWrite=True, suffix=".avar") + else: + outfile = options.output_file + if outfile: + log.info("Saving %s", outfile) + font.save(outfile) + + +if __name__ == "__main__": + import sys + + sys.exit(main()) diff --git a/contrib/python/fonttools/fontTools/varLib/avar/build.py b/contrib/python/fonttools/fontTools/varLib/avar/build.py new file mode 100644 index 00000000000..e70925cbd74 --- /dev/null +++ b/contrib/python/fonttools/fontTools/varLib/avar/build.py @@ -0,0 +1,79 @@ +from fontTools.varLib import _add_fvar, _add_avar, load_designspace +from fontTools.ttLib import newTable +import logging + +log = logging.getLogger("fontTools.varLib.avar") + + +def build(font, designspace_file): + ds = load_designspace(designspace_file, require_sources=False) + + if not "fvar" in font: + # if "name" not in font: + font["name"] = newTable("name") + _add_fvar(font, ds.axes, ds.instances) + + axisTags = [a.axisTag for a in font["fvar"].axes] + + if "avar" in font: + log.warning("avar table already present, overwriting.") + del font["avar"] + + _add_avar(font, ds.axes, ds.axisMappings, axisTags) + + +def main(args=None): + """Add `avar` table from designspace file to variable font.""" + + from fontTools.ttLib import TTFont + from fontTools.misc.cliTools import makeOutputFileName + from fontTools import configLogger + import argparse + + if args is None: + import sys + + args = sys.argv[1:] + + parser = argparse.ArgumentParser( + "fonttools varLib.avar.build", + description="Add `avar` table from designspace file to variable font.", + ) + parser.add_argument("font", metavar="varfont.ttf", help="Variable-font file.") + parser.add_argument( + "designspace", + metavar="family.designspace", + help="Designspace file.", + default=None, + ) + parser.add_argument( + "-o", + "--output-file", + type=str, + help="Output font file name.", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="Run more verbosely." + ) + + options = parser.parse_args(args) + + configLogger(level=("INFO" if options.verbose else "WARNING")) + + font = TTFont(options.font) + + build(font, options.designspace) + + if options.output_file is None: + outfile = makeOutputFileName(options.font, overWrite=True, suffix=".avar") + else: + outfile = options.output_file + if outfile: + log.info("Saving %s", outfile) + font.save(outfile) + + +if __name__ == "__main__": + import sys + + sys.exit(main()) diff --git a/contrib/python/fonttools/fontTools/varLib/avar/map.py b/contrib/python/fonttools/fontTools/varLib/avar/map.py new file mode 100644 index 00000000000..68eed6c83eb --- /dev/null +++ b/contrib/python/fonttools/fontTools/varLib/avar/map.py @@ -0,0 +1,108 @@ +from fontTools.varLib.models import normalizeValue + + +def _denormalize(v, triplet): + if v >= 0: + return triplet[1] + v * (triplet[2] - triplet[1]) + else: + return triplet[1] + v * (triplet[1] - triplet[0]) + + +def map( + font, location, *, inputNormalized=False, outputNormalized=False, dropZeroes=False +): + if "fvar" not in font: + return None + + fvar = font["fvar"] + axes = {a.axisTag: (a.minValue, a.defaultValue, a.maxValue) for a in fvar.axes} + + if not inputNormalized: + location = { + tag: normalizeValue(value, axes[tag]) for tag, value in location.items() + } + + if "avar" in font: + location = font["avar"].renormalizeLocation(location, font, dropZeroes) + + if not outputNormalized: + location = { + tag: _denormalize(value, axes[tag]) for tag, value in location.items() + } + + return location + + +def main(args=None): + """Map variation coordinates through the `avar` table.""" + + from fontTools.ttLib import TTFont + import argparse + + if args is None: + import sys + + args = sys.argv[1:] + + parser = argparse.ArgumentParser( + "fonttools varLib.avar.map", + description="Map variation coordinates through the `avar` table.", + ) + parser.add_argument("font", metavar="varfont.ttf", help="Variable-font file.") + parser.add_argument( + "coords", + metavar="[AXIS=value...]", + help="Coordinates to map, e.g. 'wght=700 wdth=75'.", + nargs="*", + default=None, + ) + parser.add_argument( + "-f", action="store_true", help="Do not omit axes at default location." + ) + parser.add_argument( + "-i", action="store_true", help="Input coordinates are normalized (-1..1)." + ) + parser.add_argument( + "-o", action="store_true", help="Output coordinates as normalized (-1..1)." + ) + + options = parser.parse_args(args) + + if not options.coords: + parser.error( + "No coordinates provided. Please specify at least one axis coordinate (e.g., wght=500)" + ) + + if options.font.endswith(".designspace"): + from .build import build + + font = TTFont() + build(font, options.font) + else: + font = TTFont(options.font) + if "fvar" not in font: + parser.error(f"Font '{options.font}' does not contain an 'fvar' table.") + + location = { + tag: float(value) for tag, value in (item.split("=") for item in options.coords) + } + + mapped = map( + font, + location, + inputNormalized=options.i, + outputNormalized=options.o, + dropZeroes=not options.f, + ) + assert mapped is not None + + for tag in mapped: + v = mapped[tag] + v = int(v) if v == int(v) else v + print(f"{tag}={v:g}") + + +if __name__ == "__main__": + import sys + + sys.exit(main()) diff --git a/contrib/python/fonttools/fontTools/varLib/avar/plan.py b/contrib/python/fonttools/fontTools/varLib/avar/plan.py new file mode 100644 index 00000000000..c211dd24626 --- /dev/null +++ b/contrib/python/fonttools/fontTools/varLib/avar/plan.py @@ -0,0 +1,1004 @@ +from fontTools.ttLib import newTable +from fontTools.ttLib.tables._f_v_a_r import Axis as fvarAxis +from fontTools.pens.areaPen import AreaPen +from fontTools.pens.basePen import NullPen +from fontTools.pens.statisticsPen import StatisticsPen +from fontTools.varLib.models import piecewiseLinearMap, normalizeValue +from fontTools.misc.cliTools import makeOutputFileName +import math +import logging +from pprint import pformat + +__all__ = [ + "planWeightAxis", + "planWidthAxis", + "planSlantAxis", + "planOpticalSizeAxis", + "planAxis", + "sanitizeWeight", + "sanitizeWidth", + "sanitizeSlant", + "measureWeight", + "measureWidth", + "measureSlant", + "normalizeLinear", + "normalizeLog", + "normalizeDegrees", + "interpolateLinear", + "interpolateLog", + "processAxis", + "makeDesignspaceSnippet", + "addEmptyAvar", + "main", +] + +log = logging.getLogger("fontTools.varLib.avar.plan") + +WEIGHTS = [ + 50, + 100, + 150, + 200, + 250, + 300, + 350, + 400, + 450, + 500, + 550, + 600, + 650, + 700, + 750, + 800, + 850, + 900, + 950, +] + +WIDTHS = [ + 25.0, + 37.5, + 50.0, + 62.5, + 75.0, + 87.5, + 100.0, + 112.5, + 125.0, + 137.5, + 150.0, + 162.5, + 175.0, + 187.5, + 200.0, +] + +SLANTS = list(math.degrees(math.atan(d / 20.0)) for d in range(-20, 21)) + +SIZES = [ + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 14, + 18, + 24, + 30, + 36, + 48, + 60, + 72, + 96, + 120, + 144, + 192, + 240, + 288, +] + + +SAMPLES = 8 + + +def normalizeLinear(value, rangeMin, rangeMax): + """Linearly normalize value in [rangeMin, rangeMax] to [0, 1], with extrapolation.""" + return (value - rangeMin) / (rangeMax - rangeMin) + + +def interpolateLinear(t, a, b): + """Linear interpolation between a and b, with t typically in [0, 1].""" + return a + t * (b - a) + + +def normalizeLog(value, rangeMin, rangeMax): + """Logarithmically normalize value in [rangeMin, rangeMax] to [0, 1], with extrapolation.""" + logMin = math.log(rangeMin) + logMax = math.log(rangeMax) + return (math.log(value) - logMin) / (logMax - logMin) + + +def interpolateLog(t, a, b): + """Logarithmic interpolation between a and b, with t typically in [0, 1].""" + logA = math.log(a) + logB = math.log(b) + return math.exp(logA + t * (logB - logA)) + + +def normalizeDegrees(value, rangeMin, rangeMax): + """Angularly normalize value in [rangeMin, rangeMax] to [0, 1], with extrapolation.""" + tanMin = math.tan(math.radians(rangeMin)) + tanMax = math.tan(math.radians(rangeMax)) + return (math.tan(math.radians(value)) - tanMin) / (tanMax - tanMin) + + +def measureWeight(glyphset, glyphs=None): + """Measure the perceptual average weight of the given glyphs.""" + if isinstance(glyphs, dict): + frequencies = glyphs + else: + frequencies = {g: 1 for g in glyphs} + + wght_sum = wdth_sum = 0 + for glyph_name in glyphs: + if frequencies is not None: + frequency = frequencies.get(glyph_name, 0) + if frequency == 0: + continue + else: + frequency = 1 + + glyph = glyphset[glyph_name] + + pen = AreaPen(glyphset=glyphset) + glyph.draw(pen) + + mult = glyph.width * frequency + wght_sum += mult * abs(pen.value) + wdth_sum += mult + + return wght_sum / wdth_sum + + +def measureWidth(glyphset, glyphs=None): + """Measure the average width of the given glyphs.""" + if isinstance(glyphs, dict): + frequencies = glyphs + else: + frequencies = {g: 1 for g in glyphs} + + wdth_sum = 0 + freq_sum = 0 + for glyph_name in glyphs: + if frequencies is not None: + frequency = frequencies.get(glyph_name, 0) + if frequency == 0: + continue + else: + frequency = 1 + + glyph = glyphset[glyph_name] + + pen = NullPen() + glyph.draw(pen) + + wdth_sum += glyph.width * frequency + freq_sum += frequency + + return wdth_sum / freq_sum + + +def measureSlant(glyphset, glyphs=None): + """Measure the perceptual average slant angle of the given glyphs.""" + if isinstance(glyphs, dict): + frequencies = glyphs + else: + frequencies = {g: 1 for g in glyphs} + + slnt_sum = 0 + freq_sum = 0 + for glyph_name in glyphs: + if frequencies is not None: + frequency = frequencies.get(glyph_name, 0) + if frequency == 0: + continue + else: + frequency = 1 + + glyph = glyphset[glyph_name] + + pen = StatisticsPen(glyphset=glyphset) + glyph.draw(pen) + + mult = glyph.width * frequency + slnt_sum += mult * pen.slant + freq_sum += mult + + return -math.degrees(math.atan(slnt_sum / freq_sum)) + + +def sanitizeWidth(userTriple, designTriple, pins, measurements): + """Sanitize the width axis limits.""" + + minVal, defaultVal, maxVal = ( + measurements[designTriple[0]], + measurements[designTriple[1]], + measurements[designTriple[2]], + ) + + calculatedMinVal = userTriple[1] * (minVal / defaultVal) + calculatedMaxVal = userTriple[1] * (maxVal / defaultVal) + + log.info("Original width axis limits: %g:%g:%g", *userTriple) + log.info( + "Calculated width axis limits: %g:%g:%g", + calculatedMinVal, + userTriple[1], + calculatedMaxVal, + ) + + if ( + abs(calculatedMinVal - userTriple[0]) / userTriple[1] > 0.05 + or abs(calculatedMaxVal - userTriple[2]) / userTriple[1] > 0.05 + ): + log.warning("Calculated width axis min/max do not match user input.") + log.warning( + " Current width axis limits: %g:%g:%g", + *userTriple, + ) + log.warning( + " Suggested width axis limits: %g:%g:%g", + calculatedMinVal, + userTriple[1], + calculatedMaxVal, + ) + + return False + + return True + + +def sanitizeWeight(userTriple, designTriple, pins, measurements): + """Sanitize the weight axis limits.""" + + if len(set(userTriple)) < 3: + return True + + minVal, defaultVal, maxVal = ( + measurements[designTriple[0]], + measurements[designTriple[1]], + measurements[designTriple[2]], + ) + + logMin = math.log(minVal) + logDefault = math.log(defaultVal) + logMax = math.log(maxVal) + + t = (userTriple[1] - userTriple[0]) / (userTriple[2] - userTriple[0]) + y = math.exp(logMin + t * (logMax - logMin)) + t = (y - minVal) / (maxVal - minVal) + calculatedDefaultVal = userTriple[0] + t * (userTriple[2] - userTriple[0]) + + log.info("Original weight axis limits: %g:%g:%g", *userTriple) + log.info( + "Calculated weight axis limits: %g:%g:%g", + userTriple[0], + calculatedDefaultVal, + userTriple[2], + ) + + if abs(calculatedDefaultVal - userTriple[1]) / userTriple[1] > 0.05: + log.warning("Calculated weight axis default does not match user input.") + + log.warning( + " Current weight axis limits: %g:%g:%g", + *userTriple, + ) + + log.warning( + " Suggested weight axis limits, changing default: %g:%g:%g", + userTriple[0], + calculatedDefaultVal, + userTriple[2], + ) + + t = (userTriple[2] - userTriple[0]) / (userTriple[1] - userTriple[0]) + y = math.exp(logMin + t * (logDefault - logMin)) + t = (y - minVal) / (defaultVal - minVal) + calculatedMaxVal = userTriple[0] + t * (userTriple[1] - userTriple[0]) + log.warning( + " Suggested weight axis limits, changing maximum: %g:%g:%g", + userTriple[0], + userTriple[1], + calculatedMaxVal, + ) + + t = (userTriple[0] - userTriple[2]) / (userTriple[1] - userTriple[2]) + y = math.exp(logMax + t * (logDefault - logMax)) + t = (y - maxVal) / (defaultVal - maxVal) + calculatedMinVal = userTriple[2] + t * (userTriple[1] - userTriple[2]) + log.warning( + " Suggested weight axis limits, changing minimum: %g:%g:%g", + calculatedMinVal, + userTriple[1], + userTriple[2], + ) + + return False + + return True + + +def sanitizeSlant(userTriple, designTriple, pins, measurements): + """Sanitize the slant axis limits.""" + + log.info("Original slant axis limits: %g:%g:%g", *userTriple) + log.info( + "Calculated slant axis limits: %g:%g:%g", + measurements[designTriple[0]], + measurements[designTriple[1]], + measurements[designTriple[2]], + ) + + if ( + abs(measurements[designTriple[0]] - userTriple[0]) > 1 + or abs(measurements[designTriple[1]] - userTriple[1]) > 1 + or abs(measurements[designTriple[2]] - userTriple[2]) > 1 + ): + log.warning("Calculated slant axis min/default/max do not match user input.") + log.warning( + " Current slant axis limits: %g:%g:%g", + *userTriple, + ) + log.warning( + " Suggested slant axis limits: %g:%g:%g", + measurements[designTriple[0]], + measurements[designTriple[1]], + measurements[designTriple[2]], + ) + + return False + + return True + + +def planAxis( + measureFunc, + normalizeFunc, + interpolateFunc, + glyphSetFunc, + axisTag, + axisLimits, + values, + samples=None, + glyphs=None, + designLimits=None, + pins=None, + sanitizeFunc=None, +): + """Plan an axis. + + measureFunc: callable that takes a glyphset and an optional + list of glyphnames, and returns the glyphset-wide measurement + to be used for the axis. + + normalizeFunc: callable that takes a measurement and a minimum + and maximum, and normalizes the measurement into the range 0..1, + possibly extrapolating too. + + interpolateFunc: callable that takes a normalized t value, and a + minimum and maximum, and returns the interpolated value, + possibly extrapolating too. + + glyphSetFunc: callable that takes a variations "location" dictionary, + and returns a glyphset. + + axisTag: the axis tag string. + + axisLimits: a triple of minimum, default, and maximum values for + the axis. Or an `fvar` Axis object. + + values: a list of output values to map for this axis. + + samples: the number of samples to use when sampling. Default 8. + + glyphs: a list of glyph names to use when sampling. Defaults to None, + which will process all glyphs. + + designLimits: an optional triple of minimum, default, and maximum values + represenging the "design" limits for the axis. If not provided, the + axisLimits will be used. + + pins: an optional dictionary of before/after mapping entries to pin in + the output. + + sanitizeFunc: an optional callable to call to sanitize the axis limits. + """ + + if isinstance(axisLimits, fvarAxis): + axisLimits = (axisLimits.minValue, axisLimits.defaultValue, axisLimits.maxValue) + minValue, defaultValue, maxValue = axisLimits + + if samples is None: + samples = SAMPLES + if glyphs is None: + glyphs = glyphSetFunc({}).keys() + if pins is None: + pins = {} + else: + pins = pins.copy() + + log.info( + "Axis limits min %g / default %g / max %g", minValue, defaultValue, maxValue + ) + triple = (minValue, defaultValue, maxValue) + + if designLimits is not None: + log.info("Axis design-limits min %g / default %g / max %g", *designLimits) + else: + designLimits = triple + + if pins: + log.info("Pins %s", sorted(pins.items())) + pins.update( + { + minValue: designLimits[0], + defaultValue: designLimits[1], + maxValue: designLimits[2], + } + ) + + out = {} + outNormalized = {} + + axisMeasurements = {} + for value in sorted({minValue, defaultValue, maxValue} | set(pins.keys())): + glyphset = glyphSetFunc(location={axisTag: value}) + designValue = pins[value] + axisMeasurements[designValue] = measureFunc(glyphset, glyphs) + + if sanitizeFunc is not None: + log.info("Sanitizing axis limit values for the `%s` axis.", axisTag) + sanitizeFunc(triple, designLimits, pins, axisMeasurements) + + log.debug("Calculated average value:\n%s", pformat(axisMeasurements)) + + for (rangeMin, targetMin), (rangeMax, targetMax) in zip( + list(sorted(pins.items()))[:-1], + list(sorted(pins.items()))[1:], + ): + targetValues = {w for w in values if rangeMin < w < rangeMax} + if not targetValues: + continue + + normalizedMin = normalizeValue(rangeMin, triple) + normalizedMax = normalizeValue(rangeMax, triple) + normalizedTargetMin = normalizeValue(targetMin, designLimits) + normalizedTargetMax = normalizeValue(targetMax, designLimits) + + log.info("Planning target values %s.", sorted(targetValues)) + log.info("Sampling %u points in range %g,%g.", samples, rangeMin, rangeMax) + valueMeasurements = axisMeasurements.copy() + for sample in range(1, samples + 1): + value = rangeMin + (rangeMax - rangeMin) * sample / (samples + 1) + log.debug("Sampling value %g.", value) + glyphset = glyphSetFunc(location={axisTag: value}) + designValue = piecewiseLinearMap(value, pins) + valueMeasurements[designValue] = measureFunc(glyphset, glyphs) + log.debug("Sampled average value:\n%s", pformat(valueMeasurements)) + + measurementValue = {} + for value in sorted(valueMeasurements): + measurementValue[valueMeasurements[value]] = value + + out[rangeMin] = targetMin + outNormalized[normalizedMin] = normalizedTargetMin + for value in sorted(targetValues): + t = normalizeFunc(value, rangeMin, rangeMax) + targetMeasurement = interpolateFunc( + t, valueMeasurements[targetMin], valueMeasurements[targetMax] + ) + targetValue = piecewiseLinearMap(targetMeasurement, measurementValue) + log.debug("Planned mapping value %g to %g." % (value, targetValue)) + out[value] = targetValue + valueNormalized = normalizedMin + (value - rangeMin) / ( + rangeMax - rangeMin + ) * (normalizedMax - normalizedMin) + outNormalized[valueNormalized] = normalizedTargetMin + ( + targetValue - targetMin + ) / (targetMax - targetMin) * (normalizedTargetMax - normalizedTargetMin) + out[rangeMax] = targetMax + outNormalized[normalizedMax] = normalizedTargetMax + + log.info("Planned mapping for the `%s` axis:\n%s", axisTag, pformat(out)) + log.info( + "Planned normalized mapping for the `%s` axis:\n%s", + axisTag, + pformat(outNormalized), + ) + + if all(abs(k - v) < 0.01 for k, v in outNormalized.items()): + log.info("Detected identity mapping for the `%s` axis. Dropping.", axisTag) + out = {} + outNormalized = {} + + return out, outNormalized + + +def planWeightAxis( + glyphSetFunc, + axisLimits, + weights=None, + samples=None, + glyphs=None, + designLimits=None, + pins=None, + sanitize=False, +): + """Plan a weight (`wght`) axis. + + weights: A list of weight values to plan for. If None, the default + values are used. + + This function simply calls planAxis with values=weights, and the appropriate + arguments. See documenation for planAxis for more information. + """ + + if weights is None: + weights = WEIGHTS + + return planAxis( + measureWeight, + normalizeLinear, + interpolateLog, + glyphSetFunc, + "wght", + axisLimits, + values=weights, + samples=samples, + glyphs=glyphs, + designLimits=designLimits, + pins=pins, + sanitizeFunc=sanitizeWeight if sanitize else None, + ) + + +def planWidthAxis( + glyphSetFunc, + axisLimits, + widths=None, + samples=None, + glyphs=None, + designLimits=None, + pins=None, + sanitize=False, +): + """Plan a width (`wdth`) axis. + + widths: A list of width values (percentages) to plan for. If None, the default + values are used. + + This function simply calls planAxis with values=widths, and the appropriate + arguments. See documenation for planAxis for more information. + """ + + if widths is None: + widths = WIDTHS + + return planAxis( + measureWidth, + normalizeLinear, + interpolateLinear, + glyphSetFunc, + "wdth", + axisLimits, + values=widths, + samples=samples, + glyphs=glyphs, + designLimits=designLimits, + pins=pins, + sanitizeFunc=sanitizeWidth if sanitize else None, + ) + + +def planSlantAxis( + glyphSetFunc, + axisLimits, + slants=None, + samples=None, + glyphs=None, + designLimits=None, + pins=None, + sanitize=False, +): + """Plan a slant (`slnt`) axis. + + slants: A list slant angles to plan for. If None, the default + values are used. + + This function simply calls planAxis with values=slants, and the appropriate + arguments. See documenation for planAxis for more information. + """ + + if slants is None: + slants = SLANTS + + return planAxis( + measureSlant, + normalizeDegrees, + interpolateLinear, + glyphSetFunc, + "slnt", + axisLimits, + values=slants, + samples=samples, + glyphs=glyphs, + designLimits=designLimits, + pins=pins, + sanitizeFunc=sanitizeSlant if sanitize else None, + ) + + +def planOpticalSizeAxis( + glyphSetFunc, + axisLimits, + sizes=None, + samples=None, + glyphs=None, + designLimits=None, + pins=None, + sanitize=False, +): + """Plan a optical-size (`opsz`) axis. + + sizes: A list of optical size values to plan for. If None, the default + values are used. + + This function simply calls planAxis with values=sizes, and the appropriate + arguments. See documenation for planAxis for more information. + """ + + if sizes is None: + sizes = SIZES + + return planAxis( + measureWeight, + normalizeLog, + interpolateLog, + glyphSetFunc, + "opsz", + axisLimits, + values=sizes, + samples=samples, + glyphs=glyphs, + designLimits=designLimits, + pins=pins, + ) + + +def makeDesignspaceSnippet(axisTag, axisName, axisLimit, mapping): + """Make a designspace snippet for a single axis.""" + + designspaceSnippet = ( + ' <axis tag="%s" name="%s" minimum="%g" default="%g" maximum="%g"' + % ((axisTag, axisName) + axisLimit) + ) + if mapping: + designspaceSnippet += ">\n" + else: + designspaceSnippet += "/>" + + for key, value in mapping.items(): + designspaceSnippet += ' <map input="%g" output="%g"/>\n' % (key, value) + + if mapping: + designspaceSnippet += " </axis>" + + return designspaceSnippet + + +def addEmptyAvar(font): + """Add an empty `avar` table to the font.""" + font["avar"] = avar = newTable("avar") + for axis in font["fvar"].axes: + avar.segments[axis.axisTag] = {} + + +def processAxis( + font, + planFunc, + axisTag, + axisName, + values, + samples=None, + glyphs=None, + designLimits=None, + pins=None, + sanitize=False, + plot=False, +): + """Process a single axis.""" + + axisLimits = None + for axis in font["fvar"].axes: + if axis.axisTag == axisTag: + axisLimits = axis + break + if axisLimits is None: + return "" + axisLimits = (axisLimits.minValue, axisLimits.defaultValue, axisLimits.maxValue) + + log.info("Planning %s axis.", axisName) + + if "avar" in font: + existingMapping = font["avar"].segments[axisTag] + font["avar"].segments[axisTag] = {} + else: + existingMapping = None + + if values is not None and isinstance(values, str): + values = [float(w) for w in values.split()] + + if designLimits is not None and isinstance(designLimits, str): + designLimits = [float(d) for d in designLimits.split(":")] + assert ( + len(designLimits) == 3 + and designLimits[0] <= designLimits[1] <= designLimits[2] + ) + else: + designLimits = None + + if pins is not None and isinstance(pins, str): + newPins = {} + for pin in pins.split(): + before, after = pin.split(":") + newPins[float(before)] = float(after) + pins = newPins + del newPins + + mapping, mappingNormalized = planFunc( + font.getGlyphSet, + axisLimits, + values, + samples=samples, + glyphs=glyphs, + designLimits=designLimits, + pins=pins, + sanitize=sanitize, + ) + + if plot: + from matplotlib import pyplot + + pyplot.plot( + sorted(mappingNormalized), + [mappingNormalized[k] for k in sorted(mappingNormalized)], + ) + pyplot.show() + + if existingMapping is not None: + log.info("Existing %s mapping:\n%s", axisName, pformat(existingMapping)) + + if mapping: + if "avar" not in font: + addEmptyAvar(font) + font["avar"].segments[axisTag] = mappingNormalized + else: + if "avar" in font: + font["avar"].segments[axisTag] = {} + + designspaceSnippet = makeDesignspaceSnippet( + axisTag, + axisName, + axisLimits, + mapping, + ) + return designspaceSnippet + + +def main(args=None): + """Plan the standard axis mappings for a variable font""" + + if args is None: + import sys + + args = sys.argv[1:] + + from fontTools import configLogger + from fontTools.ttLib import TTFont + import argparse + + parser = argparse.ArgumentParser( + "fonttools varLib.avar.plan", + description="Plan `avar` table for variable font", + ) + parser.add_argument("font", metavar="varfont.ttf", help="Variable-font file.") + parser.add_argument( + "-o", + "--output-file", + type=str, + help="Output font file name.", + ) + parser.add_argument( + "--weights", type=str, help="Space-separate list of weights to generate." + ) + parser.add_argument( + "--widths", type=str, help="Space-separate list of widths to generate." + ) + parser.add_argument( + "--slants", type=str, help="Space-separate list of slants to generate." + ) + parser.add_argument( + "--sizes", type=str, help="Space-separate list of optical-sizes to generate." + ) + parser.add_argument("--samples", type=int, help="Number of samples.") + parser.add_argument( + "-s", "--sanitize", action="store_true", help="Sanitize axis limits" + ) + parser.add_argument( + "-g", + "--glyphs", + type=str, + help="Space-separate list of glyphs to use for sampling.", + ) + parser.add_argument( + "--weight-design-limits", + type=str, + help="min:default:max in design units for the `wght` axis.", + ) + parser.add_argument( + "--width-design-limits", + type=str, + help="min:default:max in design units for the `wdth` axis.", + ) + parser.add_argument( + "--slant-design-limits", + type=str, + help="min:default:max in design units for the `slnt` axis.", + ) + parser.add_argument( + "--optical-size-design-limits", + type=str, + help="min:default:max in design units for the `opsz` axis.", + ) + parser.add_argument( + "--weight-pins", + type=str, + help="Space-separate list of before:after pins for the `wght` axis.", + ) + parser.add_argument( + "--width-pins", + type=str, + help="Space-separate list of before:after pins for the `wdth` axis.", + ) + parser.add_argument( + "--slant-pins", + type=str, + help="Space-separate list of before:after pins for the `slnt` axis.", + ) + parser.add_argument( + "--optical-size-pins", + type=str, + help="Space-separate list of before:after pins for the `opsz` axis.", + ) + parser.add_argument( + "-p", "--plot", action="store_true", help="Plot the resulting mapping." + ) + + logging_group = parser.add_mutually_exclusive_group(required=False) + logging_group.add_argument( + "-v", "--verbose", action="store_true", help="Run more verbosely." + ) + logging_group.add_argument( + "-q", "--quiet", action="store_true", help="Turn verbosity off." + ) + + options = parser.parse_args(args) + + configLogger( + level=("DEBUG" if options.verbose else "WARNING" if options.quiet else "INFO") + ) + + font = TTFont(options.font) + if not "fvar" in font: + log.error("Not a variable font.") + return 1 + + if options.glyphs is not None: + glyphs = options.glyphs.split() + if ":" in options.glyphs: + glyphs = {} + for g in options.glyphs.split(): + if ":" in g: + glyph, frequency = g.split(":") + glyphs[glyph] = float(frequency) + else: + glyphs[g] = 1.0 + else: + glyphs = None + + designspaceSnippets = [] + + designspaceSnippets.append( + processAxis( + font, + planWeightAxis, + "wght", + "Weight", + values=options.weights, + samples=options.samples, + glyphs=glyphs, + designLimits=options.weight_design_limits, + pins=options.weight_pins, + sanitize=options.sanitize, + plot=options.plot, + ) + ) + designspaceSnippets.append( + processAxis( + font, + planWidthAxis, + "wdth", + "Width", + values=options.widths, + samples=options.samples, + glyphs=glyphs, + designLimits=options.width_design_limits, + pins=options.width_pins, + sanitize=options.sanitize, + plot=options.plot, + ) + ) + designspaceSnippets.append( + processAxis( + font, + planSlantAxis, + "slnt", + "Slant", + values=options.slants, + samples=options.samples, + glyphs=glyphs, + designLimits=options.slant_design_limits, + pins=options.slant_pins, + sanitize=options.sanitize, + plot=options.plot, + ) + ) + designspaceSnippets.append( + processAxis( + font, + planOpticalSizeAxis, + "opsz", + "OpticalSize", + values=options.sizes, + samples=options.samples, + glyphs=glyphs, + designLimits=options.optical_size_design_limits, + pins=options.optical_size_pins, + sanitize=options.sanitize, + plot=options.plot, + ) + ) + + log.info("Designspace snippet:") + for snippet in designspaceSnippets: + if snippet: + print(snippet) + + if options.output_file is None: + outfile = makeOutputFileName(options.font, overWrite=True, suffix=".avar") + else: + outfile = options.output_file + if outfile: + log.info("Saving %s", outfile) + font.save(outfile) + + +if __name__ == "__main__": + import sys + + sys.exit(main()) diff --git a/contrib/python/fonttools/fontTools/varLib/avar.py b/contrib/python/fonttools/fontTools/varLib/avar/unbuild.py index 164a4a80587..d592bd73104 100644 --- a/contrib/python/fonttools/fontTools/varLib/avar.py +++ b/contrib/python/fonttools/fontTools/varLib/avar/unbuild.py @@ -1,12 +1,8 @@ -from fontTools.varLib import _add_avar, load_designspace from fontTools.varLib.models import VariationModel from fontTools.varLib.varStore import VarStoreInstancer from fontTools.misc.fixedTools import fixedToFloat as fi2fl -from fontTools.misc.cliTools import makeOutputFileName from itertools import product -import logging - -log = logging.getLogger("fontTools.varLib.avar") +import sys def _denormalize(v, axis): @@ -182,76 +178,91 @@ def mappings_from_avar(font, denormalize=True): return axisMaps, mappings +def unbuild(font, f=sys.stdout): + fvar = font["fvar"] + axes = fvar.axes + segments, mappings = mappings_from_avar(font) + + if "name" in font: + name = font["name"] + axisNames = {axis.axisTag: name.getDebugName(axis.axisNameID) for axis in axes} + else: + axisNames = {a.axisTag: a.axisTag for a in axes} + + print("<?xml version='1.0' encoding='UTF-8'?>", file=f) + print('<designspace format="5.1">', file=f) + print(" <axes>", file=f) + for axis in axes: + + axisName = axisNames[axis.axisTag] + + triplet = (axis.minValue, axis.defaultValue, axis.maxValue) + triplet = [int(v) if v == int(v) else v for v in triplet] + + axisMap = segments.get(axis.axisTag) + closing = "/>" if axisMap is None else ">" + + print( + f' <axis tag="{axis.axisTag}" name="{axisName}" minimum="{triplet[0]}" maximum="{triplet[2]}" default="{triplet[1]}"{closing}', + file=f, + ) + if axisMap is not None: + for k in sorted(axisMap.keys()): + v = axisMap[k] + k = int(k) if k == int(k) else k + v = int(v) if v == int(v) else v + print(f' <map input="{k}" output="{v}"/>', file=f) + print(" </axis>", file=f) + if mappings: + print(" <mappings>", file=f) + for inputLoc, outputLoc in mappings: + print(" <mapping>", file=f) + print(" <input>", file=f) + for tag in sorted(inputLoc.keys()): + v = inputLoc[tag] + v = int(v) if v == int(v) else v + print( + f' <dimension name="{axisNames[tag]}" xvalue="{v}"/>', + file=f, + ) + print(" </input>", file=f) + print(" <output>", file=f) + for tag in sorted(outputLoc.keys()): + v = outputLoc[tag] + v = int(v) if v == int(v) else v + print( + f' <dimension name="{axisNames[tag]}" xvalue="{v}"/>', + file=f, + ) + print(" </output>", file=f) + print(" </mapping>", file=f) + print(" </mappings>", file=f) + print(" </axes>", file=f) + print("</designspace>", file=f) + + def main(args=None): - """Add `avar` table from designspace file to variable font.""" + """Print `avar` table as a designspace snippet.""" if args is None: - import sys - args = sys.argv[1:] - from fontTools import configLogger from fontTools.ttLib import TTFont - from fontTools.designspaceLib import DesignSpaceDocument import argparse parser = argparse.ArgumentParser( - "fonttools varLib.avar", - description="Add `avar` table from designspace file to variable font.", + "fonttools varLib.avar.unbuild", + description="Print `avar` table as a designspace snippet.", ) parser.add_argument("font", metavar="varfont.ttf", help="Variable-font file.") - parser.add_argument( - "designspace", - metavar="family.designspace", - help="Designspace file.", - nargs="?", - default=None, - ) - parser.add_argument( - "-o", - "--output-file", - type=str, - help="Output font file name.", - ) - parser.add_argument( - "-v", "--verbose", action="store_true", help="Run more verbosely." - ) - options = parser.parse_args(args) - configLogger(level=("INFO" if options.verbose else "WARNING")) - font = TTFont(options.font) - if not "fvar" in font: - log.error("Not a variable font.") + if "fvar" not in font: + print("Not a variable font.", file=sys.stderr) return 1 - if options.designspace is None: - from pprint import pprint - - segments, mappings = mappings_from_avar(font) - pprint(segments) - pprint(mappings) - print(len(mappings), "mappings") - return - - axisTags = [a.axisTag for a in font["fvar"].axes] - - ds = load_designspace(options.designspace, require_sources=False) - - if "avar" in font: - log.warning("avar table already present, overwriting.") - del font["avar"] - - _add_avar(font, ds.axes, ds.axisMappings, axisTags) - - if options.output_file is None: - outfile = makeOutputFileName(options.font, overWrite=True, suffix=".avar") - else: - outfile = options.output_file - if outfile: - log.info("Saving %s", outfile) - font.save(outfile) + unbuild(font) if __name__ == "__main__": diff --git a/contrib/python/fonttools/fontTools/varLib/avarPlanner.py b/contrib/python/fonttools/fontTools/varLib/avarPlanner.py index 2e173443a54..1fc63b1c1b3 100644 --- a/contrib/python/fonttools/fontTools/varLib/avarPlanner.py +++ b/contrib/python/fonttools/fontTools/varLib/avarPlanner.py @@ -1,1004 +1,8 @@ -from fontTools.ttLib import newTable -from fontTools.ttLib.tables._f_v_a_r import Axis as fvarAxis -from fontTools.pens.areaPen import AreaPen -from fontTools.pens.basePen import NullPen -from fontTools.pens.statisticsPen import StatisticsPen -from fontTools.varLib.models import piecewiseLinearMap, normalizeValue -from fontTools.misc.cliTools import makeOutputFileName -import math -import logging -from pprint import pformat - -__all__ = [ - "planWeightAxis", - "planWidthAxis", - "planSlantAxis", - "planOpticalSizeAxis", - "planAxis", - "sanitizeWeight", - "sanitizeWidth", - "sanitizeSlant", - "measureWeight", - "measureWidth", - "measureSlant", - "normalizeLinear", - "normalizeLog", - "normalizeDegrees", - "interpolateLinear", - "interpolateLog", - "processAxis", - "makeDesignspaceSnippet", - "addEmptyAvar", - "main", -] - -log = logging.getLogger("fontTools.varLib.avarPlanner") - -WEIGHTS = [ - 50, - 100, - 150, - 200, - 250, - 300, - 350, - 400, - 450, - 500, - 550, - 600, - 650, - 700, - 750, - 800, - 850, - 900, - 950, -] - -WIDTHS = [ - 25.0, - 37.5, - 50.0, - 62.5, - 75.0, - 87.5, - 100.0, - 112.5, - 125.0, - 137.5, - 150.0, - 162.5, - 175.0, - 187.5, - 200.0, -] - -SLANTS = list(math.degrees(math.atan(d / 20.0)) for d in range(-20, 21)) - -SIZES = [ - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 14, - 18, - 24, - 30, - 36, - 48, - 60, - 72, - 96, - 120, - 144, - 192, - 240, - 288, -] - - -SAMPLES = 8 - - -def normalizeLinear(value, rangeMin, rangeMax): - """Linearly normalize value in [rangeMin, rangeMax] to [0, 1], with extrapolation.""" - return (value - rangeMin) / (rangeMax - rangeMin) - - -def interpolateLinear(t, a, b): - """Linear interpolation between a and b, with t typically in [0, 1].""" - return a + t * (b - a) - - -def normalizeLog(value, rangeMin, rangeMax): - """Logarithmically normalize value in [rangeMin, rangeMax] to [0, 1], with extrapolation.""" - logMin = math.log(rangeMin) - logMax = math.log(rangeMax) - return (math.log(value) - logMin) / (logMax - logMin) - - -def interpolateLog(t, a, b): - """Logarithmic interpolation between a and b, with t typically in [0, 1].""" - logA = math.log(a) - logB = math.log(b) - return math.exp(logA + t * (logB - logA)) - - -def normalizeDegrees(value, rangeMin, rangeMax): - """Angularly normalize value in [rangeMin, rangeMax] to [0, 1], with extrapolation.""" - tanMin = math.tan(math.radians(rangeMin)) - tanMax = math.tan(math.radians(rangeMax)) - return (math.tan(math.radians(value)) - tanMin) / (tanMax - tanMin) - - -def measureWeight(glyphset, glyphs=None): - """Measure the perceptual average weight of the given glyphs.""" - if isinstance(glyphs, dict): - frequencies = glyphs - else: - frequencies = {g: 1 for g in glyphs} - - wght_sum = wdth_sum = 0 - for glyph_name in glyphs: - if frequencies is not None: - frequency = frequencies.get(glyph_name, 0) - if frequency == 0: - continue - else: - frequency = 1 - - glyph = glyphset[glyph_name] - - pen = AreaPen(glyphset=glyphset) - glyph.draw(pen) - - mult = glyph.width * frequency - wght_sum += mult * abs(pen.value) - wdth_sum += mult - - return wght_sum / wdth_sum - - -def measureWidth(glyphset, glyphs=None): - """Measure the average width of the given glyphs.""" - if isinstance(glyphs, dict): - frequencies = glyphs - else: - frequencies = {g: 1 for g in glyphs} - - wdth_sum = 0 - freq_sum = 0 - for glyph_name in glyphs: - if frequencies is not None: - frequency = frequencies.get(glyph_name, 0) - if frequency == 0: - continue - else: - frequency = 1 - - glyph = glyphset[glyph_name] - - pen = NullPen() - glyph.draw(pen) - - wdth_sum += glyph.width * frequency - freq_sum += frequency - - return wdth_sum / freq_sum - - -def measureSlant(glyphset, glyphs=None): - """Measure the perceptual average slant angle of the given glyphs.""" - if isinstance(glyphs, dict): - frequencies = glyphs - else: - frequencies = {g: 1 for g in glyphs} - - slnt_sum = 0 - freq_sum = 0 - for glyph_name in glyphs: - if frequencies is not None: - frequency = frequencies.get(glyph_name, 0) - if frequency == 0: - continue - else: - frequency = 1 - - glyph = glyphset[glyph_name] - - pen = StatisticsPen(glyphset=glyphset) - glyph.draw(pen) - - mult = glyph.width * frequency - slnt_sum += mult * pen.slant - freq_sum += mult - - return -math.degrees(math.atan(slnt_sum / freq_sum)) - - -def sanitizeWidth(userTriple, designTriple, pins, measurements): - """Sanitize the width axis limits.""" - - minVal, defaultVal, maxVal = ( - measurements[designTriple[0]], - measurements[designTriple[1]], - measurements[designTriple[2]], - ) - - calculatedMinVal = userTriple[1] * (minVal / defaultVal) - calculatedMaxVal = userTriple[1] * (maxVal / defaultVal) - - log.info("Original width axis limits: %g:%g:%g", *userTriple) - log.info( - "Calculated width axis limits: %g:%g:%g", - calculatedMinVal, - userTriple[1], - calculatedMaxVal, - ) - - if ( - abs(calculatedMinVal - userTriple[0]) / userTriple[1] > 0.05 - or abs(calculatedMaxVal - userTriple[2]) / userTriple[1] > 0.05 - ): - log.warning("Calculated width axis min/max do not match user input.") - log.warning( - " Current width axis limits: %g:%g:%g", - *userTriple, - ) - log.warning( - " Suggested width axis limits: %g:%g:%g", - calculatedMinVal, - userTriple[1], - calculatedMaxVal, - ) - - return False - - return True - - -def sanitizeWeight(userTriple, designTriple, pins, measurements): - """Sanitize the weight axis limits.""" - - if len(set(userTriple)) < 3: - return True - - minVal, defaultVal, maxVal = ( - measurements[designTriple[0]], - measurements[designTriple[1]], - measurements[designTriple[2]], - ) - - logMin = math.log(minVal) - logDefault = math.log(defaultVal) - logMax = math.log(maxVal) - - t = (userTriple[1] - userTriple[0]) / (userTriple[2] - userTriple[0]) - y = math.exp(logMin + t * (logMax - logMin)) - t = (y - minVal) / (maxVal - minVal) - calculatedDefaultVal = userTriple[0] + t * (userTriple[2] - userTriple[0]) - - log.info("Original weight axis limits: %g:%g:%g", *userTriple) - log.info( - "Calculated weight axis limits: %g:%g:%g", - userTriple[0], - calculatedDefaultVal, - userTriple[2], - ) - - if abs(calculatedDefaultVal - userTriple[1]) / userTriple[1] > 0.05: - log.warning("Calculated weight axis default does not match user input.") - - log.warning( - " Current weight axis limits: %g:%g:%g", - *userTriple, - ) - - log.warning( - " Suggested weight axis limits, changing default: %g:%g:%g", - userTriple[0], - calculatedDefaultVal, - userTriple[2], - ) - - t = (userTriple[2] - userTriple[0]) / (userTriple[1] - userTriple[0]) - y = math.exp(logMin + t * (logDefault - logMin)) - t = (y - minVal) / (defaultVal - minVal) - calculatedMaxVal = userTriple[0] + t * (userTriple[1] - userTriple[0]) - log.warning( - " Suggested weight axis limits, changing maximum: %g:%g:%g", - userTriple[0], - userTriple[1], - calculatedMaxVal, - ) - - t = (userTriple[0] - userTriple[2]) / (userTriple[1] - userTriple[2]) - y = math.exp(logMax + t * (logDefault - logMax)) - t = (y - maxVal) / (defaultVal - maxVal) - calculatedMinVal = userTriple[2] + t * (userTriple[1] - userTriple[2]) - log.warning( - " Suggested weight axis limits, changing minimum: %g:%g:%g", - calculatedMinVal, - userTriple[1], - userTriple[2], - ) - - return False - - return True - - -def sanitizeSlant(userTriple, designTriple, pins, measurements): - """Sanitize the slant axis limits.""" - - log.info("Original slant axis limits: %g:%g:%g", *userTriple) - log.info( - "Calculated slant axis limits: %g:%g:%g", - measurements[designTriple[0]], - measurements[designTriple[1]], - measurements[designTriple[2]], - ) - - if ( - abs(measurements[designTriple[0]] - userTriple[0]) > 1 - or abs(measurements[designTriple[1]] - userTriple[1]) > 1 - or abs(measurements[designTriple[2]] - userTriple[2]) > 1 - ): - log.warning("Calculated slant axis min/default/max do not match user input.") - log.warning( - " Current slant axis limits: %g:%g:%g", - *userTriple, - ) - log.warning( - " Suggested slant axis limits: %g:%g:%g", - measurements[designTriple[0]], - measurements[designTriple[1]], - measurements[designTriple[2]], - ) - - return False - - return True - - -def planAxis( - measureFunc, - normalizeFunc, - interpolateFunc, - glyphSetFunc, - axisTag, - axisLimits, - values, - samples=None, - glyphs=None, - designLimits=None, - pins=None, - sanitizeFunc=None, -): - """Plan an axis. - - measureFunc: callable that takes a glyphset and an optional - list of glyphnames, and returns the glyphset-wide measurement - to be used for the axis. - - normalizeFunc: callable that takes a measurement and a minimum - and maximum, and normalizes the measurement into the range 0..1, - possibly extrapolating too. - - interpolateFunc: callable that takes a normalized t value, and a - minimum and maximum, and returns the interpolated value, - possibly extrapolating too. - - glyphSetFunc: callable that takes a variations "location" dictionary, - and returns a glyphset. - - axisTag: the axis tag string. - - axisLimits: a triple of minimum, default, and maximum values for - the axis. Or an `fvar` Axis object. - - values: a list of output values to map for this axis. - - samples: the number of samples to use when sampling. Default 8. - - glyphs: a list of glyph names to use when sampling. Defaults to None, - which will process all glyphs. - - designLimits: an optional triple of minimum, default, and maximum values - represenging the "design" limits for the axis. If not provided, the - axisLimits will be used. - - pins: an optional dictionary of before/after mapping entries to pin in - the output. - - sanitizeFunc: an optional callable to call to sanitize the axis limits. - """ - - if isinstance(axisLimits, fvarAxis): - axisLimits = (axisLimits.minValue, axisLimits.defaultValue, axisLimits.maxValue) - minValue, defaultValue, maxValue = axisLimits - - if samples is None: - samples = SAMPLES - if glyphs is None: - glyphs = glyphSetFunc({}).keys() - if pins is None: - pins = {} - else: - pins = pins.copy() - - log.info( - "Axis limits min %g / default %g / max %g", minValue, defaultValue, maxValue - ) - triple = (minValue, defaultValue, maxValue) - - if designLimits is not None: - log.info("Axis design-limits min %g / default %g / max %g", *designLimits) - else: - designLimits = triple - - if pins: - log.info("Pins %s", sorted(pins.items())) - pins.update( - { - minValue: designLimits[0], - defaultValue: designLimits[1], - maxValue: designLimits[2], - } - ) - - out = {} - outNormalized = {} - - axisMeasurements = {} - for value in sorted({minValue, defaultValue, maxValue} | set(pins.keys())): - glyphset = glyphSetFunc(location={axisTag: value}) - designValue = pins[value] - axisMeasurements[designValue] = measureFunc(glyphset, glyphs) - - if sanitizeFunc is not None: - log.info("Sanitizing axis limit values for the `%s` axis.", axisTag) - sanitizeFunc(triple, designLimits, pins, axisMeasurements) - - log.debug("Calculated average value:\n%s", pformat(axisMeasurements)) - - for (rangeMin, targetMin), (rangeMax, targetMax) in zip( - list(sorted(pins.items()))[:-1], - list(sorted(pins.items()))[1:], - ): - targetValues = {w for w in values if rangeMin < w < rangeMax} - if not targetValues: - continue - - normalizedMin = normalizeValue(rangeMin, triple) - normalizedMax = normalizeValue(rangeMax, triple) - normalizedTargetMin = normalizeValue(targetMin, designLimits) - normalizedTargetMax = normalizeValue(targetMax, designLimits) - - log.info("Planning target values %s.", sorted(targetValues)) - log.info("Sampling %u points in range %g,%g.", samples, rangeMin, rangeMax) - valueMeasurements = axisMeasurements.copy() - for sample in range(1, samples + 1): - value = rangeMin + (rangeMax - rangeMin) * sample / (samples + 1) - log.debug("Sampling value %g.", value) - glyphset = glyphSetFunc(location={axisTag: value}) - designValue = piecewiseLinearMap(value, pins) - valueMeasurements[designValue] = measureFunc(glyphset, glyphs) - log.debug("Sampled average value:\n%s", pformat(valueMeasurements)) - - measurementValue = {} - for value in sorted(valueMeasurements): - measurementValue[valueMeasurements[value]] = value - - out[rangeMin] = targetMin - outNormalized[normalizedMin] = normalizedTargetMin - for value in sorted(targetValues): - t = normalizeFunc(value, rangeMin, rangeMax) - targetMeasurement = interpolateFunc( - t, valueMeasurements[targetMin], valueMeasurements[targetMax] - ) - targetValue = piecewiseLinearMap(targetMeasurement, measurementValue) - log.debug("Planned mapping value %g to %g." % (value, targetValue)) - out[value] = targetValue - valueNormalized = normalizedMin + (value - rangeMin) / ( - rangeMax - rangeMin - ) * (normalizedMax - normalizedMin) - outNormalized[valueNormalized] = normalizedTargetMin + ( - targetValue - targetMin - ) / (targetMax - targetMin) * (normalizedTargetMax - normalizedTargetMin) - out[rangeMax] = targetMax - outNormalized[normalizedMax] = normalizedTargetMax - - log.info("Planned mapping for the `%s` axis:\n%s", axisTag, pformat(out)) - log.info( - "Planned normalized mapping for the `%s` axis:\n%s", - axisTag, - pformat(outNormalized), - ) - - if all(abs(k - v) < 0.01 for k, v in outNormalized.items()): - log.info("Detected identity mapping for the `%s` axis. Dropping.", axisTag) - out = {} - outNormalized = {} - - return out, outNormalized - - -def planWeightAxis( - glyphSetFunc, - axisLimits, - weights=None, - samples=None, - glyphs=None, - designLimits=None, - pins=None, - sanitize=False, -): - """Plan a weight (`wght`) axis. - - weights: A list of weight values to plan for. If None, the default - values are used. - - This function simply calls planAxis with values=weights, and the appropriate - arguments. See documenation for planAxis for more information. - """ - - if weights is None: - weights = WEIGHTS - - return planAxis( - measureWeight, - normalizeLinear, - interpolateLog, - glyphSetFunc, - "wght", - axisLimits, - values=weights, - samples=samples, - glyphs=glyphs, - designLimits=designLimits, - pins=pins, - sanitizeFunc=sanitizeWeight if sanitize else None, - ) - - -def planWidthAxis( - glyphSetFunc, - axisLimits, - widths=None, - samples=None, - glyphs=None, - designLimits=None, - pins=None, - sanitize=False, -): - """Plan a width (`wdth`) axis. - - widths: A list of width values (percentages) to plan for. If None, the default - values are used. - - This function simply calls planAxis with values=widths, and the appropriate - arguments. See documenation for planAxis for more information. - """ - - if widths is None: - widths = WIDTHS - - return planAxis( - measureWidth, - normalizeLinear, - interpolateLinear, - glyphSetFunc, - "wdth", - axisLimits, - values=widths, - samples=samples, - glyphs=glyphs, - designLimits=designLimits, - pins=pins, - sanitizeFunc=sanitizeWidth if sanitize else None, - ) - - -def planSlantAxis( - glyphSetFunc, - axisLimits, - slants=None, - samples=None, - glyphs=None, - designLimits=None, - pins=None, - sanitize=False, -): - """Plan a slant (`slnt`) axis. - - slants: A list slant angles to plan for. If None, the default - values are used. - - This function simply calls planAxis with values=slants, and the appropriate - arguments. See documenation for planAxis for more information. - """ - - if slants is None: - slants = SLANTS - - return planAxis( - measureSlant, - normalizeDegrees, - interpolateLinear, - glyphSetFunc, - "slnt", - axisLimits, - values=slants, - samples=samples, - glyphs=glyphs, - designLimits=designLimits, - pins=pins, - sanitizeFunc=sanitizeSlant if sanitize else None, - ) - - -def planOpticalSizeAxis( - glyphSetFunc, - axisLimits, - sizes=None, - samples=None, - glyphs=None, - designLimits=None, - pins=None, - sanitize=False, -): - """Plan a optical-size (`opsz`) axis. - - sizes: A list of optical size values to plan for. If None, the default - values are used. - - This function simply calls planAxis with values=sizes, and the appropriate - arguments. See documenation for planAxis for more information. - """ - - if sizes is None: - sizes = SIZES - - return planAxis( - measureWeight, - normalizeLog, - interpolateLog, - glyphSetFunc, - "opsz", - axisLimits, - values=sizes, - samples=samples, - glyphs=glyphs, - designLimits=designLimits, - pins=pins, - ) - - -def makeDesignspaceSnippet(axisTag, axisName, axisLimit, mapping): - """Make a designspace snippet for a single axis.""" - - designspaceSnippet = ( - ' <axis tag="%s" name="%s" minimum="%g" default="%g" maximum="%g"' - % ((axisTag, axisName) + axisLimit) - ) - if mapping: - designspaceSnippet += ">\n" - else: - designspaceSnippet += "/>" - - for key, value in mapping.items(): - designspaceSnippet += ' <map input="%g" output="%g"/>\n' % (key, value) - - if mapping: - designspaceSnippet += " </axis>" - - return designspaceSnippet - - -def addEmptyAvar(font): - """Add an empty `avar` table to the font.""" - font["avar"] = avar = newTable("avar") - for axis in fvar.axes: - avar.segments[axis.axisTag] = {} - - -def processAxis( - font, - planFunc, - axisTag, - axisName, - values, - samples=None, - glyphs=None, - designLimits=None, - pins=None, - sanitize=False, - plot=False, -): - """Process a single axis.""" - - axisLimits = None - for axis in font["fvar"].axes: - if axis.axisTag == axisTag: - axisLimits = axis - break - if axisLimits is None: - return "" - axisLimits = (axisLimits.minValue, axisLimits.defaultValue, axisLimits.maxValue) - - log.info("Planning %s axis.", axisName) - - if "avar" in font: - existingMapping = font["avar"].segments[axisTag] - font["avar"].segments[axisTag] = {} - else: - existingMapping = None - - if values is not None and isinstance(values, str): - values = [float(w) for w in values.split()] - - if designLimits is not None and isinstance(designLimits, str): - designLimits = [float(d) for d in options.designLimits.split(":")] - assert ( - len(designLimits) == 3 - and designLimits[0] <= designLimits[1] <= designLimits[2] - ) - else: - designLimits = None - - if pins is not None and isinstance(pins, str): - newPins = {} - for pin in pins.split(): - before, after = pin.split(":") - newPins[float(before)] = float(after) - pins = newPins - del newPins - - mapping, mappingNormalized = planFunc( - font.getGlyphSet, - axisLimits, - values, - samples=samples, - glyphs=glyphs, - designLimits=designLimits, - pins=pins, - sanitize=sanitize, - ) - - if plot: - from matplotlib import pyplot - - pyplot.plot( - sorted(mappingNormalized), - [mappingNormalized[k] for k in sorted(mappingNormalized)], - ) - pyplot.show() - - if existingMapping is not None: - log.info("Existing %s mapping:\n%s", axisName, pformat(existingMapping)) - - if mapping: - if "avar" not in font: - addEmptyAvar(font) - font["avar"].segments[axisTag] = mappingNormalized - else: - if "avar" in font: - font["avar"].segments[axisTag] = {} - - designspaceSnippet = makeDesignspaceSnippet( - axisTag, - axisName, - axisLimits, - mapping, - ) - return designspaceSnippet - - def main(args=None): - """Plan the standard axis mappings for a variable font""" - - if args is None: - import sys - - args = sys.argv[1:] - - from fontTools import configLogger - from fontTools.ttLib import TTFont - import argparse - - parser = argparse.ArgumentParser( - "fonttools varLib.avarPlanner", - description="Plan `avar` table for variable font", - ) - parser.add_argument("font", metavar="varfont.ttf", help="Variable-font file.") - parser.add_argument( - "-o", - "--output-file", - type=str, - help="Output font file name.", - ) - parser.add_argument( - "--weights", type=str, help="Space-separate list of weights to generate." - ) - parser.add_argument( - "--widths", type=str, help="Space-separate list of widths to generate." - ) - parser.add_argument( - "--slants", type=str, help="Space-separate list of slants to generate." - ) - parser.add_argument( - "--sizes", type=str, help="Space-separate list of optical-sizes to generate." - ) - parser.add_argument("--samples", type=int, help="Number of samples.") - parser.add_argument( - "-s", "--sanitize", action="store_true", help="Sanitize axis limits" - ) - parser.add_argument( - "-g", - "--glyphs", - type=str, - help="Space-separate list of glyphs to use for sampling.", - ) - parser.add_argument( - "--weight-design-limits", - type=str, - help="min:default:max in design units for the `wght` axis.", - ) - parser.add_argument( - "--width-design-limits", - type=str, - help="min:default:max in design units for the `wdth` axis.", - ) - parser.add_argument( - "--slant-design-limits", - type=str, - help="min:default:max in design units for the `slnt` axis.", - ) - parser.add_argument( - "--optical-size-design-limits", - type=str, - help="min:default:max in design units for the `opsz` axis.", - ) - parser.add_argument( - "--weight-pins", - type=str, - help="Space-separate list of before:after pins for the `wght` axis.", - ) - parser.add_argument( - "--width-pins", - type=str, - help="Space-separate list of before:after pins for the `wdth` axis.", - ) - parser.add_argument( - "--slant-pins", - type=str, - help="Space-separate list of before:after pins for the `slnt` axis.", - ) - parser.add_argument( - "--optical-size-pins", - type=str, - help="Space-separate list of before:after pins for the `opsz` axis.", - ) - parser.add_argument( - "-p", "--plot", action="store_true", help="Plot the resulting mapping." - ) + from .avar.plan import main - logging_group = parser.add_mutually_exclusive_group(required=False) - logging_group.add_argument( - "-v", "--verbose", action="store_true", help="Run more verbosely." - ) - logging_group.add_argument( - "-q", "--quiet", action="store_true", help="Turn verbosity off." - ) - - options = parser.parse_args(args) - - configLogger( - level=("DEBUG" if options.verbose else "WARNING" if options.quiet else "INFO") - ) - - font = TTFont(options.font) - if not "fvar" in font: - log.error("Not a variable font.") - return 1 - - if options.glyphs is not None: - glyphs = options.glyphs.split() - if ":" in options.glyphs: - glyphs = {} - for g in options.glyphs.split(): - if ":" in g: - glyph, frequency = g.split(":") - glyphs[glyph] = float(frequency) - else: - glyphs[g] = 1.0 - else: - glyphs = None - - designspaceSnippets = [] - - designspaceSnippets.append( - processAxis( - font, - planWeightAxis, - "wght", - "Weight", - values=options.weights, - samples=options.samples, - glyphs=glyphs, - designLimits=options.weight_design_limits, - pins=options.weight_pins, - sanitize=options.sanitize, - plot=options.plot, - ) - ) - designspaceSnippets.append( - processAxis( - font, - planWidthAxis, - "wdth", - "Width", - values=options.widths, - samples=options.samples, - glyphs=glyphs, - designLimits=options.width_design_limits, - pins=options.width_pins, - sanitize=options.sanitize, - plot=options.plot, - ) - ) - designspaceSnippets.append( - processAxis( - font, - planSlantAxis, - "slnt", - "Slant", - values=options.slants, - samples=options.samples, - glyphs=glyphs, - designLimits=options.slant_design_limits, - pins=options.slant_pins, - sanitize=options.sanitize, - plot=options.plot, - ) - ) - designspaceSnippets.append( - processAxis( - font, - planOpticalSizeAxis, - "opsz", - "OpticalSize", - values=options.sizes, - samples=options.samples, - glyphs=glyphs, - designLimits=options.optical_size_design_limits, - pins=options.optical_size_pins, - sanitize=options.sanitize, - plot=options.plot, - ) - ) - - log.info("Designspace snippet:") - for snippet in designspaceSnippets: - if snippet: - print(snippet) - - if options.output_file is None: - outfile = makeOutputFileName(options.font, overWrite=True, suffix=".avar") - else: - outfile = options.output_file - if outfile: - log.info("Saving %s", outfile) - font.save(outfile) + main(args) if __name__ == "__main__": - import sys - - sys.exit(main()) + main() diff --git a/contrib/python/fonttools/fontTools/varLib/interpolatableHelpers.py b/contrib/python/fonttools/fontTools/varLib/interpolatableHelpers.py index 5cf22cf879a..67b9ea27c68 100644 --- a/contrib/python/fonttools/fontTools/varLib/interpolatableHelpers.py +++ b/contrib/python/fonttools/fontTools/varLib/interpolatableHelpers.py @@ -174,6 +174,9 @@ def min_cost_perfect_bipartite_matching_bruteforce(G): return best, best_cost +# Prefer `scipy.optimize.linear_sum_assignment` for performance. +# `Munkres` is also supported as a fallback for minimalistic systems +# where installing SciPy is not feasible. try: from scipy.optimize import linear_sum_assignment diff --git a/contrib/python/fonttools/ya.make b/contrib/python/fonttools/ya.make index a1876a3fa8a..de9c4ac87cf 100644 --- a/contrib/python/fonttools/ya.make +++ b/contrib/python/fonttools/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(4.59.2) +VERSION(4.60.0) LICENSE(MIT) @@ -23,6 +23,7 @@ PY_SRCS( fontTools/__main__.py fontTools/afmLib.py fontTools/agl.py + fontTools/annotations.py fontTools/cffLib/CFF2ToCFF.py fontTools/cffLib/CFFToCFF2.py fontTools/cffLib/__init__.py @@ -83,6 +84,7 @@ PY_SRCS( fontTools/misc/dictTools.py fontTools/misc/eexec.py fontTools/misc/encodingTools.py + fontTools/misc/enumTools.py fontTools/misc/etree.py fontTools/misc/filenames.py fontTools/misc/filesystem/__init__.py @@ -309,7 +311,12 @@ PY_SRCS( fontTools/unicodedata/__init__.py fontTools/varLib/__init__.py fontTools/varLib/__main__.py - fontTools/varLib/avar.py + fontTools/varLib/avar/__init__.py + fontTools/varLib/avar/__main__.py + fontTools/varLib/avar/build.py + fontTools/varLib/avar/map.py + fontTools/varLib/avar/plan.py + fontTools/varLib/avar/unbuild.py fontTools/varLib/avarPlanner.py fontTools/varLib/builder.py fontTools/varLib/cff.py diff --git a/contrib/python/xmltodict/py3/.dist-info/METADATA b/contrib/python/xmltodict/py3/.dist-info/METADATA index e820bae190d..7eacf2e1faa 100644 --- a/contrib/python/xmltodict/py3/.dist-info/METADATA +++ b/contrib/python/xmltodict/py3/.dist-info/METADATA @@ -1,12 +1,17 @@ Metadata-Version: 2.4 Name: xmltodict -Version: 1.0.0 +Version: 1.0.2 Summary: Makes working with XML feel like you are working with JSON -Home-page: https://github.com/martinblech/xmltodict Author: Martin Blech -Author-email: [email protected] -License: MIT -Platform: all +License: Copyright (C) 2012 Martin Blech and individual contributors. + + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Project-URL: Homepage, https://github.com/martinblech/xmltodict Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: MIT License Classifier: Operating System :: OS Independent @@ -17,28 +22,22 @@ Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Text Processing :: Markup :: XML Requires-Python: >=3.9 Description-Content-Type: text/markdown License-File: LICENSE -Dynamic: author -Dynamic: author-email -Dynamic: classifier -Dynamic: description -Dynamic: description-content-type -Dynamic: home-page -Dynamic: license +Provides-Extra: test +Requires-Dist: pytest; extra == "test" +Requires-Dist: pytest-cov; extra == "test" Dynamic: license-file -Dynamic: platform -Dynamic: requires-python -Dynamic: summary # xmltodict `xmltodict` is a Python module that makes working with XML feel like you are working with [JSON](http://docs.python.org/library/json.html), as in this ["spec"](http://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html): -[](https://app.travis-ci.com/martinblech/xmltodict) +[](https://github.com/martinblech/xmltodict/actions/workflows/test.yml) ```python >>> print(json.dumps(xmltodict.parse(""" @@ -250,7 +249,7 @@ Parse XML input into a Python dictionary. - `cdata_separator=''`: Separator string to join multiple text nodes. This joins adjacent text nodes. For example, set to a space to avoid concatenation. - `postprocessor=None`: Function to modify parsed items. - `dict_constructor=dict`: Constructor for dictionaries (e.g., dict). -- `strip_whitespace=True`: Remove leading/trailing whitespace in text nodes. Default is True; this trims whitespace in text nodes. Set to False to preserve whitespace exactly. +- `strip_whitespace=True`: Remove leading/trailing whitespace in text nodes. Default is True; this trims whitespace in text nodes. Set to False to preserve whitespace exactly. When `process_comments=True`, this same flag also trims comment text; disable `strip_whitespace` if you need to preserve comment indentation or padding. - `namespaces=None`: Mapping of namespaces to prefixes, or None to keep full URIs. - `force_list=None`: Force list values for specific elements. Can be a boolean (True/False), a tuple of element names to force lists for, or a callable function that receives (path, key, value) and returns True/False. Useful for elements that may appear once or multiple times to ensure consistent list output. - `item_depth=0`: Depth at which to call `item_callback`. @@ -273,6 +272,11 @@ Convert a Python dictionary back into XML. - `newl='\n'`: Newline character for pretty printing. - `expand_iter=None`: Tag name to use for items in nested lists (breaks roundtripping). +> **Note:** When building XML from dictionaries, keys whose values are empty +> lists are skipped. For example, `{'a': []}` produces no `<a>` element. Add a +> placeholder child (for example, `{'a': ['']}`) if an explicit empty container +> element is required in the output. + Note: xmltodict aims to cover the common 90% of cases. It does not preserve every XML nuance (attribute order, mixed content ordering, multiple top-level comments). For exact fidelity, use a full XML library such as lxml. ## Examples diff --git a/contrib/python/xmltodict/py3/README.md b/contrib/python/xmltodict/py3/README.md index dec9d9bdabf..b1782a73e07 100644 --- a/contrib/python/xmltodict/py3/README.md +++ b/contrib/python/xmltodict/py3/README.md @@ -2,7 +2,7 @@ `xmltodict` is a Python module that makes working with XML feel like you are working with [JSON](http://docs.python.org/library/json.html), as in this ["spec"](http://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html): -[](https://app.travis-ci.com/martinblech/xmltodict) +[](https://github.com/martinblech/xmltodict/actions/workflows/test.yml) ```python >>> print(json.dumps(xmltodict.parse(""" @@ -214,7 +214,7 @@ Parse XML input into a Python dictionary. - `cdata_separator=''`: Separator string to join multiple text nodes. This joins adjacent text nodes. For example, set to a space to avoid concatenation. - `postprocessor=None`: Function to modify parsed items. - `dict_constructor=dict`: Constructor for dictionaries (e.g., dict). -- `strip_whitespace=True`: Remove leading/trailing whitespace in text nodes. Default is True; this trims whitespace in text nodes. Set to False to preserve whitespace exactly. +- `strip_whitespace=True`: Remove leading/trailing whitespace in text nodes. Default is True; this trims whitespace in text nodes. Set to False to preserve whitespace exactly. When `process_comments=True`, this same flag also trims comment text; disable `strip_whitespace` if you need to preserve comment indentation or padding. - `namespaces=None`: Mapping of namespaces to prefixes, or None to keep full URIs. - `force_list=None`: Force list values for specific elements. Can be a boolean (True/False), a tuple of element names to force lists for, or a callable function that receives (path, key, value) and returns True/False. Useful for elements that may appear once or multiple times to ensure consistent list output. - `item_depth=0`: Depth at which to call `item_callback`. @@ -237,6 +237,11 @@ Convert a Python dictionary back into XML. - `newl='\n'`: Newline character for pretty printing. - `expand_iter=None`: Tag name to use for items in nested lists (breaks roundtripping). +> **Note:** When building XML from dictionaries, keys whose values are empty +> lists are skipped. For example, `{'a': []}` produces no `<a>` element. Add a +> placeholder child (for example, `{'a': ['']}`) if an explicit empty container +> element is required in the output. + Note: xmltodict aims to cover the common 90% of cases. It does not preserve every XML nuance (attribute order, mixed content ordering, multiple top-level comments). For exact fidelity, use a full XML library such as lxml. ## Examples diff --git a/contrib/python/xmltodict/py3/tests/test_dicttoxml.py b/contrib/python/xmltodict/py3/tests/test_dicttoxml.py index 87c3d256714..bd2300aa38e 100644 --- a/contrib/python/xmltodict/py3/tests/test_dicttoxml.py +++ b/contrib/python/xmltodict/py3/tests/test_dicttoxml.py @@ -1,6 +1,5 @@ from xmltodict import parse, unparse - -import unittest +import pytest import re from textwrap import dedent @@ -11,539 +10,587 @@ def _strip(fullxml): return _HEADER_RE.sub('', fullxml) -class DictToXMLTestCase(unittest.TestCase): - def test_root(self): - obj = {'a': None} - self.assertEqual(obj, parse(unparse(obj))) - self.assertEqual(unparse(obj), unparse(parse(unparse(obj)))) +def test_root(): + obj = {'a': None} + assert obj == parse(unparse(obj)) + assert unparse(obj) == unparse(parse(unparse(obj))) + + +def test_simple_cdata(): + obj = {'a': 'b'} + assert obj == parse(unparse(obj)) + assert unparse(obj) == unparse(parse(unparse(obj))) + + +def test_cdata(): + obj = {'a': {'#text': 'y'}} + assert obj == parse(unparse(obj), force_cdata=True) + assert unparse(obj) == unparse(parse(unparse(obj))) + + +def test_attrib(): + obj = {'a': {'@href': 'x'}} + assert obj == parse(unparse(obj)) + assert unparse(obj) == unparse(parse(unparse(obj))) + + +def test_attrib_and_cdata(): + obj = {'a': {'@href': 'x', '#text': 'y'}} + assert obj == parse(unparse(obj)) + assert unparse(obj) == unparse(parse(unparse(obj))) + + +def test_list(): + obj = {'a': {'b': ['1', '2', '3']}} + assert obj == parse(unparse(obj)) + assert unparse(obj) == unparse(parse(unparse(obj))) - def test_simple_cdata(self): - obj = {'a': 'b'} - self.assertEqual(obj, parse(unparse(obj))) - self.assertEqual(unparse(obj), unparse(parse(unparse(obj)))) - def test_cdata(self): - obj = {'a': {'#text': 'y'}} - self.assertEqual(obj, parse(unparse(obj), force_cdata=True)) - self.assertEqual(unparse(obj), unparse(parse(unparse(obj)))) +def test_list_expand_iter(): + obj = {'a': {'b': [['1', '2'], ['3',]]}} + #assert obj == parse(unparse(obj, expand_iter="item"))) + exp_xml = dedent('''\ + <?xml version="1.0" encoding="utf-8"?> + <a><b><item>1</item><item>2</item></b><b><item>3</item></b></a>''') + assert exp_xml == unparse(obj, expand_iter="item") - def test_attrib(self): - obj = {'a': {'@href': 'x'}} - self.assertEqual(obj, parse(unparse(obj))) - self.assertEqual(unparse(obj), unparse(parse(unparse(obj)))) - def test_attrib_and_cdata(self): - obj = {'a': {'@href': 'x', '#text': 'y'}} - self.assertEqual(obj, parse(unparse(obj))) - self.assertEqual(unparse(obj), unparse(parse(unparse(obj)))) +def test_generator(): + obj = {'a': {'b': ['1', '2', '3']}} - def test_list(self): - obj = {'a': {'b': ['1', '2', '3']}} - self.assertEqual(obj, parse(unparse(obj))) - self.assertEqual(unparse(obj), unparse(parse(unparse(obj)))) + def lazy_obj(): + return {'a': {'b': (i for i in ('1', '2', '3'))}} + assert obj == parse(unparse(lazy_obj())) + assert unparse(lazy_obj()) == unparse(parse(unparse(lazy_obj()))) - def test_list_expand_iter(self): - obj = {'a': {'b': [['1', '2'], ['3',]]}} - #self.assertEqual(obj, parse(unparse(obj, expand_iter="item"))) - exp_xml = dedent('''\ - <?xml version="1.0" encoding="utf-8"?> - <a><b><item>1</item><item>2</item></b><b><item>3</item></b></a>''') - self.assertEqual(exp_xml, unparse(obj, expand_iter="item")) - def test_generator(self): - obj = {'a': {'b': ['1', '2', '3']}} +def test_no_root(): + with pytest.raises(ValueError): + unparse({}) - def lazy_obj(): - return {'a': {'b': (i for i in ('1', '2', '3'))}} - self.assertEqual(obj, parse(unparse(lazy_obj()))) - self.assertEqual(unparse(lazy_obj()), - unparse(parse(unparse(lazy_obj())))) - def test_no_root(self): - self.assertRaises(ValueError, unparse, {}) +def test_multiple_roots(): + with pytest.raises(ValueError): + unparse({'a': '1', 'b': '2'}) + with pytest.raises(ValueError): + unparse({'a': ['1', '2', '3']}) - def test_multiple_roots(self): - self.assertRaises(ValueError, unparse, {'a': '1', 'b': '2'}) - self.assertRaises(ValueError, unparse, {'a': ['1', '2', '3']}) - def test_no_root_nofulldoc(self): - self.assertEqual(unparse({}, full_document=False), '') +def test_no_root_nofulldoc(): + assert unparse({}, full_document=False) == '' - def test_multiple_roots_nofulldoc(self): - obj = {"a": 1, "b": 2} - xml = unparse(obj, full_document=False) - self.assertEqual(xml, '<a>1</a><b>2</b>') - obj = {'a': [1, 2]} - xml = unparse(obj, full_document=False) - self.assertEqual(xml, '<a>1</a><a>2</a>') - def test_nested(self): - obj = {'a': {'b': '1', 'c': '2'}} - self.assertEqual(obj, parse(unparse(obj))) - self.assertEqual(unparse(obj), unparse(parse(unparse(obj)))) - obj = {'a': {'b': {'c': {'@a': 'x', '#text': 'y'}}}} - self.assertEqual(obj, parse(unparse(obj))) - self.assertEqual(unparse(obj), unparse(parse(unparse(obj)))) +def test_multiple_roots_nofulldoc(): + obj = {"a": 1, "b": 2} + xml = unparse(obj, full_document=False) + assert xml == '<a>1</a><b>2</b>' + obj = {'a': [1, 2]} + xml = unparse(obj, full_document=False) + assert xml == '<a>1</a><a>2</a>' - def test_semistructured(self): - xml = '<a>abc<d/>efg</a>' - self.assertEqual(_strip(unparse(parse(xml))), - '<a><d></d>abcefg</a>') - def test_preprocessor(self): - obj = {"a": {"b:int": [1, 2], "b": "c"}} +def test_nested(): + obj = {'a': {'b': '1', 'c': '2'}} + assert obj == parse(unparse(obj)) + assert unparse(obj) == unparse(parse(unparse(obj))) + obj = {'a': {'b': {'c': {'@a': 'x', '#text': 'y'}}}} + assert obj == parse(unparse(obj)) + assert unparse(obj) == unparse(parse(unparse(obj))) - def p(key, value): - try: - key, _ = key.split(':') - except ValueError: - pass - return key, value - self.assertEqual(_strip(unparse(obj, preprocessor=p)), - '<a><b>1</b><b>2</b><b>c</b></a>') +def test_semistructured(): + xml = '<a>abc<d/>efg</a>' + assert _strip(unparse(parse(xml))) == '<a><d></d>abcefg</a>' - def test_preprocessor_skipkey(self): - obj = {'a': {'b': 1, 'c': 2}} - def p(key, value): - if key == 'b': - return None - return key, value +def test_preprocessor(): + obj = {"a": {"b:int": [1, 2], "b": "c"}} - self.assertEqual(_strip(unparse(obj, preprocessor=p)), - '<a><c>2</c></a>') + def p(key, value): + try: + key, _ = key.split(':') + except ValueError: + pass + return key, value - def test_attr_order_roundtrip(self): - xml = '<root a="1" b="2" c="3"></root>' - self.assertEqual(xml, _strip(unparse(parse(xml)))) + assert _strip(unparse(obj, preprocessor=p)) == '<a><b>1</b><b>2</b><b>c</b></a>' - def test_pretty_print(self): - obj = { - "a": { - "b": [{"c": [1, 2]}, 3], - "x": "y", - } + +def test_preprocessor_skipkey(): + obj = {'a': {'b': 1, 'c': 2}} + + def p(key, value): + if key == 'b': + return None + return key, value + + assert _strip(unparse(obj, preprocessor=p)) == '<a><c>2</c></a>' + + +def test_attr_order_roundtrip(): + xml = '<root a="1" b="2" c="3"></root>' + assert xml == _strip(unparse(parse(xml))) + + +def test_pretty_print(): + obj = { + "a": { + "b": [{"c": [1, 2]}, 3], + "x": "y", } - newl = '\n' - indent = '....' - xml = dedent('''\ - <?xml version="1.0" encoding="utf-8"?> - <a> - ....<b> - ........<c>1</c> - ........<c>2</c> - ....</b> - ....<b>3</b> - ....<x>y</x> - </a>''') - self.assertEqual(xml, unparse(obj, pretty=True, - newl=newl, indent=indent)) + } + newl = '\n' + indent = '....' + xml = dedent('''\ + <?xml version="1.0" encoding="utf-8"?> + <a> + ....<b> + ........<c>1</c> + ........<c>2</c> + ....</b> + ....<b>3</b> + ....<x>y</x> + </a>''') + assert xml == unparse(obj, pretty=True, newl=newl, indent=indent) + + +def test_unparse_with_element_comment(): + obj = {"a": {"#comment": "note", "b": "1"}} + xml = _strip(unparse(obj, full_document=True)) + assert xml == "<a><!--note--><b>1</b></a>" + + +def test_unparse_with_multiple_element_comments(): + obj = {"a": {"#comment": ["n1", "n2"], "b": "1"}} + xml = _strip(unparse(obj, full_document=True)) + assert xml == "<a><!--n1--><!--n2--><b>1</b></a>" - def test_unparse_with_element_comment(self): - obj = {"a": {"#comment": "note", "b": "1"}} - xml = _strip(unparse(obj, full_document=True)) - self.assertEqual(xml, "<a><!--note--><b>1</b></a>") - def test_unparse_with_multiple_element_comments(self): - obj = {"a": {"#comment": ["n1", "n2"], "b": "1"}} - xml = _strip(unparse(obj, full_document=True)) - self.assertEqual(xml, "<a><!--n1--><!--n2--><b>1</b></a>") +def test_unparse_with_top_level_comment(): + obj = {"#comment": "top", "a": "1"} + xml = _strip(unparse(obj, full_document=True)) + assert xml == "<!--top--><a>1</a>" - def test_unparse_with_top_level_comment(self): - obj = {"#comment": "top", "a": "1"} - xml = _strip(unparse(obj, full_document=True)) - self.assertEqual(xml, "<!--top--><a>1</a>") - def test_unparse_with_multiple_top_level_comments(self): - obj = {"#comment": ["t1", "t2"], "a": "1"} - xml = _strip(unparse(obj, full_document=True)) - self.assertEqual(xml, "<!--t1--><!--t2--><a>1</a>") +def test_unparse_with_multiple_top_level_comments(): + obj = {"#comment": ["t1", "t2"], "a": "1"} + xml = _strip(unparse(obj, full_document=True)) + assert xml == "<!--t1--><!--t2--><a>1</a>" - def test_pretty_print_with_int_indent(self): - obj = { - "a": { - "b": [{"c": [1, 2]}, 3], - "x": "y", - } + +def test_unparse_rejects_comment_with_double_hyphen(): + obj = {"#comment": "bad--comment", "a": "1"} + with pytest.raises(ValueError, match="cannot contain '--'"): + unparse(obj, full_document=True) + + +def test_unparse_rejects_comment_ending_with_hyphen(): + obj = {"#comment": "trailing-", "a": "1"} + with pytest.raises(ValueError, match="cannot end with '-'"): + unparse(obj, full_document=True) + + +def test_pretty_print_with_int_indent(): + obj = { + "a": { + "b": [{"c": [1, 2]}, 3], + "x": "y", } - newl = '\n' - indent = 2 - xml = dedent('''\ - <?xml version="1.0" encoding="utf-8"?> - <a> - <b> - <c>1</c> - <c>2</c> - </b> - <b>3</b> - <x>y</x> - </a>''') - self.assertEqual(xml, unparse(obj, pretty=True, - newl=newl, indent=indent)) + } + newl = '\n' + indent = 2 + xml = dedent('''\ + <?xml version="1.0" encoding="utf-8"?> + <a> + <b> + <c>1</c> + <c>2</c> + </b> + <b>3</b> + <x>y</x> + </a>''') + assert xml == unparse(obj, pretty=True, newl=newl, indent=indent) + + +def test_comment_roundtrip_limited(): + # Input with top-level comments and an element-level comment + xml = """ + <!--top1--><a><b>1</b><!--e1--></a><!--top2--> + """ + # Parse with comment processing enabled + parsed1 = parse(xml, process_comments=True) + # Unparse and parse again (roundtrip) + xml2 = unparse(parsed1) + parsed2 = parse(xml2, process_comments=True) + + # Content preserved + assert 'a' in parsed2 + assert parsed2['a']['b'] == '1' + + # Element-level comment preserved under '#comment' + assert parsed2['a']['#comment'] == 'e1' + + # Top-level comments preserved as a list (order not guaranteed) + top = parsed2.get('#comment') + assert top is not None + top_list = top if isinstance(top, list) else [top] + assert set(top_list) == {'top1', 'top2'} + - def test_comment_roundtrip_limited(self): - # Input with top-level comments and an element-level comment - xml = """ - <!--top1--><a><b>1</b><!--e1--></a><!--top2--> - """ - # Parse with comment processing enabled - parsed1 = parse(xml, process_comments=True) - # Unparse and parse again (roundtrip) - xml2 = unparse(parsed1) - parsed2 = parse(xml2, process_comments=True) +def test_encoding(): + value = chr(39321) + obj = {'a': value} + utf8doc = unparse(obj, encoding='utf-8') + latin1doc = unparse(obj, encoding='iso-8859-1') + assert parse(utf8doc) == parse(latin1doc) + assert parse(utf8doc) == obj - # Content preserved - self.assertIn('a', parsed2) - self.assertEqual(parsed2['a']['b'], '1') - # Element-level comment preserved under '#comment' - self.assertEqual(parsed2['a']['#comment'], 'e1') +def test_fulldoc(): + xml_declaration_re = re.compile( + '^' + re.escape('<?xml version="1.0" encoding="utf-8"?>')) + assert xml_declaration_re.match(unparse({'a': 1})) + assert not xml_declaration_re.match(unparse({'a': 1}, full_document=False)) - # Top-level comments preserved as a list (order not guaranteed) - top = parsed2.get('#comment') - self.assertIsNotNone(top) - top_list = top if isinstance(top, list) else [top] - self.assertEqual(set(top_list), {'top1', 'top2'}) - def test_encoding(self): - value = chr(39321) - obj = {'a': value} - utf8doc = unparse(obj, encoding='utf-8') - latin1doc = unparse(obj, encoding='iso-8859-1') - self.assertEqual(parse(utf8doc), parse(latin1doc)) - self.assertEqual(parse(utf8doc), obj) +def test_non_string_value(): + obj = {'a': 1} + assert '<a>1</a>' == _strip(unparse(obj)) - def test_fulldoc(self): - xml_declaration_re = re.compile( - '^' + re.escape('<?xml version="1.0" encoding="utf-8"?>')) - self.assertTrue(xml_declaration_re.match(unparse({'a': 1}))) - self.assertFalse( - xml_declaration_re.match(unparse({'a': 1}, full_document=False))) - def test_non_string_value(self): - obj = {'a': 1} - self.assertEqual('<a>1</a>', _strip(unparse(obj))) +def test_non_string_attr(): + obj = {'a': {'@attr': 1}} + assert '<a attr="1"></a>' == _strip(unparse(obj)) - def test_non_string_attr(self): - obj = {'a': {'@attr': 1}} - self.assertEqual('<a attr="1"></a>', _strip(unparse(obj))) - def test_short_empty_elements(self): - obj = {'a': None} - self.assertEqual('<a/>', _strip(unparse(obj, short_empty_elements=True))) +def test_short_empty_elements(): + obj = {'a': None} + assert '<a/>' == _strip(unparse(obj, short_empty_elements=True)) - def test_namespace_support(self): - obj = { - "http://defaultns.com/:root": { - "@xmlns": { - "": "http://defaultns.com/", - "a": "http://a.com/", - "b": "http://b.com/", - }, - "http://defaultns.com/:x": { - "@http://a.com/:attr": "val", - "#text": "1", - }, - "http://a.com/:y": "2", - "http://b.com/:z": "3", + +def test_namespace_support(): + obj = { + "http://defaultns.com/:root": { + "@xmlns": { + "": "http://defaultns.com/", + "a": "http://a.com/", + "b": "http://b.com/", }, - } - ns = { - 'http://defaultns.com/': '', - 'http://a.com/': 'a', - 'http://b.com/': 'b', - } + "http://defaultns.com/:x": { + "@http://a.com/:attr": "val", + "#text": "1", + }, + "http://a.com/:y": "2", + "http://b.com/:z": "3", + }, + } + ns = { + 'http://defaultns.com/': '', + 'http://a.com/': 'a', + 'http://b.com/': 'b', + } - expected_xml = '''<?xml version="1.0" encoding="utf-8"?> + expected_xml = '''<?xml version="1.0" encoding="utf-8"?> <root xmlns="http://defaultns.com/" xmlns:a="http://a.com/" \ xmlns:b="http://b.com/"><x a:attr="val">1</x><a:y>2</a:y><b:z>3</b:z></root>''' - xml = unparse(obj, namespaces=ns) + xml = unparse(obj, namespaces=ns) - self.assertEqual(xml, expected_xml) + assert xml == expected_xml - def test_boolean_unparse(self): - expected_xml = '<?xml version="1.0" encoding="utf-8"?>\n<x>true</x>' - xml = unparse(dict(x=True)) - self.assertEqual(xml, expected_xml) - expected_xml = '<?xml version="1.0" encoding="utf-8"?>\n<x>false</x>' - xml = unparse(dict(x=False)) - self.assertEqual(xml, expected_xml) +def test_boolean_unparse(): + expected_xml = '<?xml version="1.0" encoding="utf-8"?>\n<x>true</x>' + xml = unparse(dict(x=True)) + assert xml == expected_xml - def test_rejects_tag_name_with_angle_brackets(self): - # Minimal guard: disallow '<' or '>' to prevent breaking tag context - with self.assertRaises(ValueError): - unparse({"m><tag>content</tag": "unsafe"}, full_document=False) + expected_xml = '<?xml version="1.0" encoding="utf-8"?>\n<x>false</x>' + xml = unparse(dict(x=False)) + assert xml == expected_xml - def test_rejects_attribute_name_with_angle_brackets(self): - # Now we expect bad attribute names to be rejected - with self.assertRaises(ValueError): - unparse( - {"a": {"@m><tag>content</tag": "unsafe", "#text": "x"}}, - full_document=False, - ) - def test_rejects_malicious_xmlns_prefix(self): - # xmlns prefixes go under @xmlns mapping; reject angle brackets in prefix - with self.assertRaises(ValueError): - unparse( - { - "a": { - "@xmlns": {"m><bad": "http://example.com/"}, - "#text": "x", - } - }, - full_document=False, - ) +def test_rejects_tag_name_with_angle_brackets(): + # Minimal guard: disallow '<' or '>' to prevent breaking tag context + with pytest.raises(ValueError): + unparse({"m><tag>content</tag": "unsafe"}, full_document=False) - def test_attribute_values_with_angle_brackets_are_escaped(self): - # Attribute values should be escaped by XMLGenerator - xml = unparse({"a": {"@attr": "1<middle>2", "#text": "x"}}, full_document=False) - # The generated XML should contain escaped '<' and '>' within the attribute value - self.assertIn('attr="1<middle>2"', xml) - def test_rejects_tag_name_starting_with_question(self): - with self.assertRaises(ValueError): - unparse({"?pi": "data"}, full_document=False) +def test_rejects_attribute_name_with_angle_brackets(): + # Now we expect bad attribute names to be rejected + with pytest.raises(ValueError): + unparse( + {"a": {"@m><tag>content</tag": "unsafe", "#text": "x"}}, + full_document=False, + ) - def test_rejects_tag_name_starting_with_bang(self): - with self.assertRaises(ValueError): - unparse({"!decl": "data"}, full_document=False) - def test_rejects_attribute_name_starting_with_question(self): - with self.assertRaises(ValueError): - unparse({"a": {"@?weird": "x"}}, full_document=False) +def test_rejects_malicious_xmlns_prefix(): + # xmlns prefixes go under @xmlns mapping; reject angle brackets in prefix + with pytest.raises(ValueError): + unparse( + { + "a": { + "@xmlns": {"m><bad": "http://example.com/"}, + "#text": "x", + } + }, + full_document=False, + ) - def test_rejects_attribute_name_starting_with_bang(self): - with self.assertRaises(ValueError): - unparse({"a": {"@!weird": "x"}}, full_document=False) - def test_rejects_xmlns_prefix_starting_with_question_or_bang(self): - with self.assertRaises(ValueError): - unparse({"a": {"@xmlns": {"?p": "http://e/"}}}, full_document=False) - with self.assertRaises(ValueError): - unparse({"a": {"@xmlns": {"!p": "http://e/"}}}, full_document=False) +def test_attribute_values_with_angle_brackets_are_escaped(): + # Attribute values should be escaped by XMLGenerator + xml = unparse({"a": {"@attr": "1<middle>2", "#text": "x"}}, full_document=False) + # The generated XML should contain escaped '<' and '>' within the attribute value + assert 'attr="1<middle>2"' in xml - def test_rejects_non_string_names(self): - class Weird: - def __str__(self): - return "bad>name" - # Non-string element key - with self.assertRaises(ValueError): - unparse({Weird(): "x"}, full_document=False) - # Non-string attribute key - with self.assertRaises(ValueError): - unparse({"a": {Weird(): "x"}}, full_document=False) +def test_rejects_tag_name_starting_with_question(): + with pytest.raises(ValueError): + unparse({"?pi": "data"}, full_document=False) - def test_rejects_tag_name_with_slash(self): - with self.assertRaises(ValueError): - unparse({"bad/name": "x"}, full_document=False) - def test_rejects_tag_name_with_whitespace(self): - for name in ["bad name", "bad\tname", "bad\nname"]: - with self.assertRaises(ValueError): - unparse({name: "x"}, full_document=False) +def test_rejects_tag_name_starting_with_bang(): + with pytest.raises(ValueError): + unparse({"!decl": "data"}, full_document=False) - def test_rejects_attribute_name_with_slash(self): - with self.assertRaises(ValueError): - unparse({"a": {"@bad/name": "x"}}, full_document=False) - def test_rejects_attribute_name_with_whitespace(self): - for name in ["@bad name", "@bad\tname", "@bad\nname"]: - with self.assertRaises(ValueError): - unparse({"a": {name: "x"}}, full_document=False) +def test_rejects_attribute_name_starting_with_question(): + with pytest.raises(ValueError): + unparse({"a": {"@?weird": "x"}}, full_document=False) - def test_rejects_xmlns_prefix_with_slash_or_whitespace(self): - # Slash - with self.assertRaises(ValueError): - unparse({"a": {"@xmlns": {"bad/prefix": "http://e/"}}}, full_document=False) - # Whitespace - with self.assertRaises(ValueError): - unparse({"a": {"@xmlns": {"bad prefix": "http://e/"}}}, full_document=False) - def test_rejects_names_with_quotes_and_equals(self): - # Element names - for name in ['a"b', "a'b", "a=b"]: - with self.assertRaises(ValueError): - unparse({name: "x"}, full_document=False) - # Attribute names - for name in ['@a"b', "@a'b", "@a=b"]: - with self.assertRaises(ValueError): - unparse({"a": {name: "x"}}, full_document=False) - # xmlns prefixes - for prefix in ['a"b', "a'b", "a=b"]: - with self.assertRaises(ValueError): - unparse({"a": {"@xmlns": {prefix: "http://e/"}}}, full_document=False) +def test_rejects_attribute_name_starting_with_bang(): + with pytest.raises(ValueError): + unparse({"a": {"@!weird": "x"}}, full_document=False) - def test_pretty_print_and_short_empty_elements_consistency(self): - """Test that pretty and compact modes produce equivalent results when stripped. - This test covers issue #352: Edge case with pretty_print and short_empty_elements. - When short_empty_elements=True, empty elements should be written as <tag/> - regardless of whether pretty printing is enabled. - """ - # Test case from issue #352: empty list child - input_dict = {"Foos": {"Foo": []}} +def test_rejects_xmlns_prefix_starting_with_question_or_bang(): + with pytest.raises(ValueError): + unparse({"a": {"@xmlns": {"?p": "http://e/"}}}, full_document=False) + with pytest.raises(ValueError): + unparse({"a": {"@xmlns": {"!p": "http://e/"}}}, full_document=False) - compact = unparse( - input_dict, pretty=False, short_empty_elements=True, full_document=False - ) - pretty = unparse( - input_dict, pretty=True, short_empty_elements=True, full_document=False - ) - pretty_compacted = pretty.replace("\n", "").replace("\t", "") - # They should be equal when pretty formatting is stripped - self.assertEqual(pretty_compacted, compact) - self.assertEqual(compact, "<Foos/>") - self.assertEqual(pretty_compacted, "<Foos/>") +def test_rejects_non_string_names(): + class Weird: + def __str__(self): + return "bad>name" - def test_empty_list_filtering(self): - """Test that empty lists are filtered out and don't create empty child elements.""" - # Test various cases with empty lists - test_cases = [ - # Case 1: Single empty list child - ({"Foos": {"Foo": []}}, "<Foos/>"), - # Case 2: Multiple empty list children - ({"Foos": {"Foo": [], "Bar": []}}, "<Foos/>"), - # Case 3: Mixed empty and non-empty children - ({"Foos": {"Foo": [], "Bar": "value"}}, "<Foos><Bar>value</Bar></Foos>"), - # Case 4: Nested empty lists - ({"Foos": {"Foo": {"Bar": []}}}, "<Foos><Foo/></Foos>"), - # Case 5: Empty list with attributes - ({"Foos": {"@attr": "value", "Foo": []}}, '<Foos attr="value"/>'), - ] + # Non-string element key + with pytest.raises(ValueError): + unparse({Weird(): "x"}, full_document=False) + # Non-string attribute key + with pytest.raises(ValueError): + unparse({"a": {Weird(): "x"}}, full_document=False) - for input_dict, expected_compact in test_cases: - with self.subTest(input_dict=input_dict): - # Test compact mode - compact = unparse( - input_dict, - pretty=False, - short_empty_elements=True, - full_document=False, - ) - self.assertEqual(compact, expected_compact) - # Test pretty mode - pretty = unparse( - input_dict, - pretty=True, - short_empty_elements=True, - full_document=False, - ) - pretty_compacted = pretty.replace("\n", "").replace("\t", "") - self.assertEqual(pretty_compacted, expected_compact) +def test_rejects_tag_name_with_slash(): + with pytest.raises(ValueError): + unparse({"bad/name": "x"}, full_document=False) - def test_empty_list_filtering_with_short_empty_elements_false(self): - """Test that empty lists are still filtered when short_empty_elements=False.""" - input_dict = {"Foos": {"Foo": []}} - # With short_empty_elements=False, empty elements should be <tag></tag> - compact = unparse( - input_dict, pretty=False, short_empty_elements=False, full_document=False - ) - pretty = unparse( - input_dict, pretty=True, short_empty_elements=False, full_document=False - ) - pretty_compacted = pretty.replace("\n", "").replace("\t", "") +def test_rejects_tag_name_with_whitespace(): + for name in ["bad name", "bad\tname", "bad\nname"]: + with pytest.raises(ValueError): + unparse({name: "x"}, full_document=False) - # They should be equal when pretty formatting is stripped - self.assertEqual(pretty_compacted, compact) - self.assertEqual(compact, "<Foos></Foos>") - self.assertEqual(pretty_compacted, "<Foos></Foos>") - def test_non_empty_lists_are_not_filtered(self): - """Test that non-empty lists are not filtered out.""" - # Test with non-empty lists - input_dict = {"Foos": {"Foo": ["item1", "item2"]}} +def test_rejects_attribute_name_with_slash(): + with pytest.raises(ValueError): + unparse({"a": {"@bad/name": "x"}}, full_document=False) - compact = unparse( - input_dict, pretty=False, short_empty_elements=True, full_document=False - ) - pretty = unparse( - input_dict, pretty=True, short_empty_elements=True, full_document=False - ) - pretty_compacted = pretty.replace("\n", "").replace("\t", "") - # The lists should be processed normally - self.assertEqual(pretty_compacted, compact) - self.assertEqual(compact, "<Foos><Foo>item1</Foo><Foo>item2</Foo></Foos>") - self.assertEqual( - pretty_compacted, "<Foos><Foo>item1</Foo><Foo>item2</Foo></Foos>" - ) +def test_rejects_attribute_name_with_whitespace(): + for name in ["@bad name", "@bad\tname", "@bad\nname"]: + with pytest.raises(ValueError): + unparse({"a": {name: "x"}}, full_document=False) + + +def test_rejects_xmlns_prefix_with_slash_or_whitespace(): + # Slash + with pytest.raises(ValueError): + unparse({"a": {"@xmlns": {"bad/prefix": "http://e/"}}}, full_document=False) + # Whitespace + with pytest.raises(ValueError): + unparse({"a": {"@xmlns": {"bad prefix": "http://e/"}}}, full_document=False) + + +def test_rejects_names_with_quotes_and_equals(): + # Element names + for name in ['a"b', "a'b", "a=b"]: + with pytest.raises(ValueError): + unparse({name: "x"}, full_document=False) + # Attribute names + for name in ['@a"b', "@a'b", "@a=b"]: + with pytest.raises(ValueError): + unparse({"a": {name: "x"}}, full_document=False) + # xmlns prefixes + for prefix in ['a"b', "a'b", "a=b"]: + with pytest.raises(ValueError): + unparse({"a": {"@xmlns": {prefix: "http://e/"}}}, full_document=False) + + +def test_pretty_print_and_short_empty_elements_consistency(): + """Test that pretty and compact modes produce equivalent results when stripped. - def test_empty_dict_vs_empty_list_behavior(self): - """Test the difference between empty dicts and empty lists.""" - # Empty dict should create a child element - input_dict_dict = {"Foos": {"Foo": {}}} - compact_dict = unparse( - input_dict_dict, + This test covers issue #352: Edge case with pretty_print and short_empty_elements. + When short_empty_elements=True, empty elements should be written as <tag/> + regardless of whether pretty printing is enabled. + """ + # Test case from issue #352: empty list child + input_dict = {"Foos": {"Foo": []}} + + compact = unparse( + input_dict, pretty=False, short_empty_elements=True, full_document=False + ) + pretty = unparse( + input_dict, pretty=True, short_empty_elements=True, full_document=False + ) + pretty_compacted = pretty.replace("\n", "").replace("\t", "") + + # They should be equal when pretty formatting is stripped + assert pretty_compacted == compact + assert compact == "<Foos/>" + assert pretty_compacted == "<Foos/>" + + +def test_empty_list_filtering(): + """Test that empty lists are filtered out and don't create empty child elements.""" + # Test various cases with empty lists + test_cases = [ + # Case 1: Single empty list child + ({"Foos": {"Foo": []}}, "<Foos/>"), + # Case 2: Multiple empty list children + ({"Foos": {"Foo": [], "Bar": []}}, "<Foos/>"), + # Case 3: Mixed empty and non-empty children + ({"Foos": {"Foo": [], "Bar": "value"}}, "<Foos><Bar>value</Bar></Foos>"), + # Case 4: Nested empty lists + ({"Foos": {"Foo": {"Bar": []}}}, "<Foos><Foo/></Foos>"), + # Case 5: Empty list with attributes + ({"Foos": {"@attr": "value", "Foo": []}}, '<Foos attr="value"/>'), + ] + + for input_dict, expected_compact in test_cases: + # Test compact mode + compact = unparse( + input_dict, pretty=False, short_empty_elements=True, full_document=False, ) - self.assertEqual(compact_dict, "<Foos><Foo/></Foos>") + assert compact == expected_compact - # Empty list should be filtered out - input_dict_list = {"Foos": {"Foo": []}} - compact_list = unparse( - input_dict_list, - pretty=False, + # Test pretty mode + pretty = unparse( + input_dict, + pretty=True, short_empty_elements=True, full_document=False, ) - self.assertEqual(compact_list, "<Foos/>") + pretty_compacted = pretty.replace("\n", "").replace("\t", "") + assert pretty_compacted == expected_compact + + +def test_empty_list_filtering_with_short_empty_elements_false(): + """Test that empty lists are still filtered when short_empty_elements=False.""" + input_dict = {"Foos": {"Foo": []}} + + # With short_empty_elements=False, empty elements should be <tag></tag> + compact = unparse( + input_dict, pretty=False, short_empty_elements=False, full_document=False + ) + pretty = unparse( + input_dict, pretty=True, short_empty_elements=False, full_document=False + ) + pretty_compacted = pretty.replace("\n", "").replace("\t", "") + + # They should be equal when pretty formatting is stripped + assert pretty_compacted == compact + assert compact == "<Foos></Foos>" + assert pretty_compacted == "<Foos></Foos>" + + +def test_non_empty_lists_are_not_filtered(): + """Test that non-empty lists are not filtered out.""" + # Test with non-empty lists + input_dict = {"Foos": {"Foo": ["item1", "item2"]}} + + compact = unparse( + input_dict, pretty=False, short_empty_elements=True, full_document=False + ) + pretty = unparse( + input_dict, pretty=True, short_empty_elements=True, full_document=False + ) + pretty_compacted = pretty.replace("\n", "").replace("\t", "") + + # The lists should be processed normally + assert pretty_compacted == compact + assert compact == "<Foos><Foo>item1</Foo><Foo>item2</Foo></Foos>" + assert ( + pretty_compacted == "<Foos><Foo>item1</Foo><Foo>item2</Foo></Foos>" + ) + + +def test_empty_dict_vs_empty_list_behavior(): + """Test the difference between empty dicts and empty lists.""" + # Empty dict should create a child element + input_dict_dict = {"Foos": {"Foo": {}}} + compact_dict = unparse( + input_dict_dict, + pretty=False, + short_empty_elements=True, + full_document=False, + ) + assert compact_dict == "<Foos><Foo/></Foos>" + + # Empty list should be filtered out + input_dict_list = {"Foos": {"Foo": []}} + compact_list = unparse( + input_dict_list, + pretty=False, + short_empty_elements=True, + full_document=False, + ) + assert compact_list == "<Foos/>" + + # They should be different + assert compact_dict != compact_list - # They should be different - self.assertNotEqual(compact_dict, compact_list) - def test_non_string_text_with_attributes(self): - """Test that non-string #text values work when tag has attributes. +def test_non_string_text_with_attributes(): + """Test that non-string #text values work when tag has attributes. - This test covers GitHub issue #366: Tag value (#text) must be a string - when tag has additional parameters - unparse. + This test covers GitHub issue #366: Tag value (#text) must be a string + when tag has additional parameters - unparse. - Also tests that plain values and explicit #text values are treated - consistently (both go through the same conversion logic). - """ - # Test cases for explicit #text values with attributes - self.assertEqual(unparse({"a": {"@param": "test", "#text": 1}}, full_document=False), - '<a param="test">1</a>') + Also tests that plain values and explicit #text values are treated + consistently (both go through the same conversion logic). + """ + # Test cases for explicit #text values with attributes + assert unparse({"a": {"@param": "test", "#text": 1}}, full_document=False) == '<a param="test">1</a>' - self.assertEqual(unparse({"a": {"@param": 42, "#text": 3.14}}, full_document=False), - '<a param="42">3.14</a>') + assert unparse({"a": {"@param": 42, "#text": 3.14}}, full_document=False) == '<a param="42">3.14</a>' - self.assertEqual(unparse({"a": {"@param": "flag", "#text": True}}, full_document=False), - '<a param="flag">true</a>') + assert unparse({"a": {"@param": "flag", "#text": True}}, full_document=False) == '<a param="flag">true</a>' - self.assertEqual(unparse({"a": {"@param": "test", "#text": None}}, full_document=False), - '<a param="test">None</a>') + assert unparse({"a": {"@param": "test", "#text": None}}, full_document=False) == '<a param="test">None</a>' - self.assertEqual(unparse({"a": {"@param": "test", "#text": "string"}}, full_document=False), - '<a param="test">string</a>') + assert unparse({"a": {"@param": "test", "#text": "string"}}, full_document=False) == '<a param="test">string</a>' - self.assertEqual(unparse({"a": {"@attr1": "value1", "@attr2": 2, "#text": 100}}, full_document=False), - '<a attr1="value1" attr2="2">100</a>') + assert unparse({"a": {"@attr1": "value1", "@attr2": 2, "#text": 100}}, full_document=False) == '<a attr1="value1" attr2="2">100</a>' - # Test cases for plain values (should be treated the same as #text) - self.assertEqual(unparse({"a": 1}, full_document=False), '<a>1</a>') - self.assertEqual(unparse({"a": 3.14}, full_document=False), '<a>3.14</a>') - self.assertEqual(unparse({"a": True}, full_document=False), '<a>true</a>') - self.assertEqual(unparse({"a": "hello"}, full_document=False), '<a>hello</a>') - self.assertEqual(unparse({"a": None}, full_document=False), '<a></a>') + # Test cases for plain values (should be treated the same as #text) + assert unparse({"a": 1}, full_document=False) == '<a>1</a>' + assert unparse({"a": 3.14}, full_document=False) == '<a>3.14</a>' + assert unparse({"a": True}, full_document=False) == '<a>true</a>' + assert unparse({"a": "hello"}, full_document=False) == '<a>hello</a>' + assert unparse({"a": None}, full_document=False) == '<a></a>' - # Consistency tests: plain values should match explicit #text values - self.assertEqual(unparse({"a": 42}, full_document=False), - unparse({"a": {"#text": 42}}, full_document=False)) + # Consistency tests: plain values should match explicit #text values + assert unparse({"a": 42}, full_document=False) == unparse({"a": {"#text": 42}}, full_document=False) - self.assertEqual(unparse({"a": 3.14}, full_document=False), - unparse({"a": {"#text": 3.14}}, full_document=False)) + assert unparse({"a": 3.14}, full_document=False) == unparse({"a": {"#text": 3.14}}, full_document=False) - self.assertEqual(unparse({"a": True}, full_document=False), - unparse({"a": {"#text": True}}, full_document=False)) + assert unparse({"a": True}, full_document=False) == unparse({"a": {"#text": True}}, full_document=False) - self.assertEqual(unparse({"a": "hello"}, full_document=False), - unparse({"a": {"#text": "hello"}}, full_document=False)) + assert unparse({"a": "hello"}, full_document=False) == unparse({"a": {"#text": "hello"}}, full_document=False) diff --git a/contrib/python/xmltodict/py3/tests/test_xmltodict.py b/contrib/python/xmltodict/py3/tests/test_xmltodict.py index 5c2bbe7d243..7be2eb5c31d 100644 --- a/contrib/python/xmltodict/py3/tests/test_xmltodict.py +++ b/contrib/python/xmltodict/py3/tests/test_xmltodict.py @@ -1,378 +1,399 @@ from xmltodict import parse, ParsingInterrupted import collections -import unittest - -try: - from io import BytesIO as StringIO -except ImportError: - from xmltodict import StringIO +import pytest +from io import BytesIO from xml.parsers.expat import ParserCreate from xml.parsers import expat -def _encode(s): - try: - return bytes(s, 'ascii') - except (NameError, TypeError): - return s +def test_string_vs_file(): + xml = '<a>data</a>' + assert parse(xml) == parse(BytesIO(xml.encode('ascii'))) -class XMLToDictTestCase(unittest.TestCase): +def test_minimal(): + assert parse('<a/>') == {'a': None} + assert parse('<a/>', force_cdata=True) == {'a': None} - def test_string_vs_file(self): - xml = '<a>data</a>' - self.assertEqual(parse(xml), - parse(StringIO(_encode(xml)))) - def test_minimal(self): - self.assertEqual(parse('<a/>'), - {'a': None}) - self.assertEqual(parse('<a/>', force_cdata=True), - {'a': None}) +def test_simple(): + assert parse('<a>data</a>') == {'a': 'data'} - def test_simple(self): - self.assertEqual(parse('<a>data</a>'), - {'a': 'data'}) - def test_force_cdata(self): - self.assertEqual(parse('<a>data</a>', force_cdata=True), - {'a': {'#text': 'data'}}) +def test_force_cdata(): + assert parse('<a>data</a>', force_cdata=True) == {'a': {'#text': 'data'}} - def test_selective_force_cdata_tuple(self): - xml = "<a><b>data1</b><c>data2</c><d>data3</d></a>" - # Test with tuple of specific element names - result = parse(xml, force_cdata=("b", "d")) - expected = { - "a": {"b": {"#text": "data1"}, "c": "data2", "d": {"#text": "data3"}} - } - self.assertEqual(result, expected) - def test_selective_force_cdata_single_element(self): - xml = "<a><b>data1</b><c>data2</c></a>" - # Test with single element name - result = parse(xml, force_cdata=("b",)) - expected = {"a": {"b": {"#text": "data1"}, "c": "data2"}} - self.assertEqual(result, expected) +def test_selective_force_cdata_tuple(): + xml = "<a><b>data1</b><c>data2</c><d>data3</d></a>" + # Test with tuple of specific element names + result = parse(xml, force_cdata=("b", "d")) + expected = { + "a": {"b": {"#text": "data1"}, "c": "data2", "d": {"#text": "data3"}} + } + assert result == expected - def test_selective_force_cdata_empty_tuple(self): - xml = "<a><b>data1</b><c>data2</c></a>" - # Test with empty tuple (should behave like force_cdata=False) - result = parse(xml, force_cdata=()) - expected = {"a": {"b": "data1", "c": "data2"}} - self.assertEqual(result, expected) - def test_selective_force_cdata_callable(self): - xml = "<a><b>data1</b><c>data2</c><d>data3</d></a>" +def test_selective_force_cdata_single_element(): + xml = "<a><b>data1</b><c>data2</c></a>" + # Test with single element name + result = parse(xml, force_cdata=("b",)) + expected = {"a": {"b": {"#text": "data1"}, "c": "data2"}} + assert result == expected - # Test with callable function - def should_force_cdata(path, key, value): - return key in ["b", "d"] - result = parse(xml, force_cdata=should_force_cdata) - expected = { - "a": {"b": {"#text": "data1"}, "c": "data2", "d": {"#text": "data3"}} - } - self.assertEqual(result, expected) +def test_selective_force_cdata_empty_tuple(): + xml = "<a><b>data1</b><c>data2</c></a>" + # Test with empty tuple (should behave like force_cdata=False) + result = parse(xml, force_cdata=()) + expected = {"a": {"b": "data1", "c": "data2"}} + assert result == expected - def test_selective_force_cdata_nested_elements(self): - xml = "<a><b><c>data1</c></b><d>data2</d></a>" - # Test with nested elements - only 'c' should be forced - result = parse(xml, force_cdata=("c",)) - expected = {"a": {"b": {"c": {"#text": "data1"}}, "d": "data2"}} - self.assertEqual(result, expected) - def test_selective_force_cdata_with_attributes(self): - xml = '<a><b attr="value">data1</b><c>data2</c></a>' - # Test with attributes - force_cdata should still work - result = parse(xml, force_cdata=("b",)) - expected = {"a": {"b": {"@attr": "value", "#text": "data1"}, "c": "data2"}} - self.assertEqual(result, expected) +def test_selective_force_cdata_callable(): + xml = "<a><b>data1</b><c>data2</c><d>data3</d></a>" - def test_selective_force_cdata_backwards_compatibility(self): - xml = "<a><b>data1</b><c>data2</c></a>" - # Test that boolean True still works (backwards compatibility) - result_true = parse(xml, force_cdata=True) - expected_true = {"a": {"b": {"#text": "data1"}, "c": {"#text": "data2"}}} - self.assertEqual(result_true, expected_true) + # Test with callable function + def should_force_cdata(path, key, value): + return key in ["b", "d"] - # Test that boolean False still works (backwards compatibility) - result_false = parse(xml, force_cdata=False) - expected_false = {"a": {"b": "data1", "c": "data2"}} - self.assertEqual(result_false, expected_false) + result = parse(xml, force_cdata=should_force_cdata) + expected = { + "a": {"b": {"#text": "data1"}, "c": "data2", "d": {"#text": "data3"}} + } + assert result == expected - def test_custom_cdata(self): - self.assertEqual(parse('<a>data</a>', - force_cdata=True, - cdata_key='_CDATA_'), - {'a': {'_CDATA_': 'data'}}) - def test_list(self): - self.assertEqual(parse('<a><b>1</b><b>2</b><b>3</b></a>'), - {'a': {'b': ['1', '2', '3']}}) +def test_selective_force_cdata_nested_elements(): + xml = "<a><b><c>data1</c></b><d>data2</d></a>" + # Test with nested elements - only 'c' should be forced + result = parse(xml, force_cdata=("c",)) + expected = {"a": {"b": {"c": {"#text": "data1"}}, "d": "data2"}} + assert result == expected - def test_attrib(self): - self.assertEqual(parse('<a href="xyz"/>'), - {'a': {'@href': 'xyz'}}) - def test_skip_attrib(self): - self.assertEqual(parse('<a href="xyz"/>', xml_attribs=False), - {'a': None}) +def test_selective_force_cdata_with_attributes(): + xml = '<a><b attr="value">data1</b><c>data2</c></a>' + # Test with attributes - force_cdata should still work + result = parse(xml, force_cdata=("b",)) + expected = {"a": {"b": {"@attr": "value", "#text": "data1"}, "c": "data2"}} + assert result == expected - def test_custom_attrib(self): - self.assertEqual(parse('<a href="xyz"/>', - attr_prefix='!'), - {'a': {'!href': 'xyz'}}) - def test_attrib_and_cdata(self): - self.assertEqual(parse('<a href="xyz">123</a>'), - {'a': {'@href': 'xyz', '#text': '123'}}) +def test_selective_force_cdata_backwards_compatibility(): + xml = "<a><b>data1</b><c>data2</c></a>" + # Test that boolean True still works (backwards compatibility) + result_true = parse(xml, force_cdata=True) + expected_true = {"a": {"b": {"#text": "data1"}, "c": {"#text": "data2"}}} + assert result_true == expected_true - def test_semi_structured(self): - self.assertEqual(parse('<a>abc<b/>def</a>'), - {'a': {'b': None, '#text': 'abcdef'}}) - self.assertEqual(parse('<a>abc<b/>def</a>', - cdata_separator='\n'), - {'a': {'b': None, '#text': 'abc\ndef'}}) + # Test that boolean False still works (backwards compatibility) + result_false = parse(xml, force_cdata=False) + expected_false = {"a": {"b": "data1", "c": "data2"}} + assert result_false == expected_false - def test_nested_semi_structured(self): - self.assertEqual(parse('<a>abc<b>123<c/>456</b>def</a>'), - {'a': {'#text': 'abcdef', 'b': { - '#text': '123456', 'c': None}}}) - def test_skip_whitespace(self): - xml = """ - <root> +def test_custom_cdata(): + assert parse('<a>data</a>', force_cdata=True, cdata_key='_CDATA_') == {'a': {'_CDATA_': 'data'}} - <emptya> </emptya> - <emptyb attr="attrvalue"> +def test_list(): + assert parse('<a><b>1</b><b>2</b><b>3</b></a>') == {'a': {'b': ['1', '2', '3']}} - </emptyb> - <value>hello</value> - </root> - """ - self.assertEqual( - parse(xml), - {'root': {'emptya': None, - 'emptyb': {'@attr': 'attrvalue'}, - 'value': 'hello'}}) +def test_attrib(): + assert parse('<a href="xyz"/>') == {'a': {'@href': 'xyz'}} - def test_keep_whitespace(self): - xml = "<root> </root>" - self.assertEqual(parse(xml), dict(root=None)) - self.assertEqual(parse(xml, strip_whitespace=False), - dict(root=' ')) - def test_streaming(self): - def cb(path, item): - cb.count += 1 - self.assertEqual(path, [('a', {'x': 'y'}), ('b', None)]) - self.assertEqual(item, str(cb.count)) - return True - cb.count = 0 - parse('<a x="y"><b>1</b><b>2</b><b>3</b></a>', - item_depth=2, item_callback=cb) - self.assertEqual(cb.count, 3) +def test_skip_attrib(): + assert parse('<a href="xyz"/>', xml_attribs=False) == {'a': None} - def test_streaming_interrupt(self): - def cb(path, item): - return False - self.assertRaises(ParsingInterrupted, - parse, '<a>x</a>', - item_depth=1, item_callback=cb) - def test_streaming_generator(self): - def cb(path, item): - cb.count += 1 - self.assertEqual(path, [('a', {'x': 'y'}), ('b', None)]) - self.assertEqual(item, str(cb.count)) - return True - cb.count = 0 - parse((n for n in '<a x="y"><b>1</b><b>2</b><b>3</b></a>'), - item_depth=2, item_callback=cb) - self.assertEqual(cb.count, 3) +def test_custom_attrib(): + assert parse('<a href="xyz"/>', attr_prefix='!') == {'a': {'!href': 'xyz'}} + + +def test_attrib_and_cdata(): + assert parse('<a href="xyz">123</a>') == {'a': {'@href': 'xyz', '#text': '123'}} + + +def test_semi_structured(): + assert parse('<a>abc<b/>def</a>') == {'a': {'b': None, '#text': 'abcdef'}} + assert parse('<a>abc<b/>def</a>', cdata_separator='\n') == {'a': {'b': None, '#text': 'abc\ndef'}} + + +def test_nested_semi_structured(): + assert parse('<a>abc<b>123<c/>456</b>def</a>') == {'a': {'#text': 'abcdef', 'b': {'#text': '123456', 'c': None}}} + + +def test_skip_whitespace(): + xml = """ + <root> + + + <emptya> </emptya> + <emptyb attr="attrvalue"> + + + </emptyb> + <value>hello</value> + </root> + """ + assert parse(xml) == {'root': {'emptya': None, 'emptyb': {'@attr': 'attrvalue'}, 'value': 'hello'}} + + +def test_keep_whitespace(): + xml = "<root> </root>" + assert parse(xml) == dict(root=None) + assert parse(xml, strip_whitespace=False) == dict(root=' ') + + +def test_streaming(): + def cb(path, item): + cb.count += 1 + assert path == [('a', {'x': 'y'}), ('b', None)] + assert item == str(cb.count) + return True + cb.count = 0 + parse('<a x="y"><b>1</b><b>2</b><b>3</b></a>', item_depth=2, item_callback=cb) + assert cb.count == 3 + - def test_streaming_returns_none(self): - # When streaming (item_depth > 0), parse should return None - def cb(path, item): - return True +def test_streaming_interrupt(): + def cb(path, item): + return False + with pytest.raises(ParsingInterrupted): + parse('<a>x</a>', item_depth=1, item_callback=cb) - result = parse("<a><b>1</b><b>2</b></a>", item_depth=2, item_callback=cb) - self.assertIsNone(result) - def test_postprocessor(self): - def postprocessor(path, key, value): - try: - return key + ':int', int(value) - except (ValueError, TypeError): - return key, value - self.assertEqual({'a': {'b:int': [1, 2], 'b': 'x'}}, - parse('<a><b>1</b><b>2</b><b>x</b></a>', - postprocessor=postprocessor)) +def test_streaming_generator(): + def cb(path, item): + cb.count += 1 + assert path == [('a', {'x': 'y'}), ('b', None)] + assert item == str(cb.count) + return True + cb.count = 0 + parse((n for n in '<a x="y"><b>1</b><b>2</b><b>3</b></a>'), item_depth=2, item_callback=cb) + assert cb.count == 3 - def test_postprocessor_attribute(self): - def postprocessor(path, key, value): - try: - return key + ':int', int(value) - except (ValueError, TypeError): - return key, value - self.assertEqual({'a': {'@b:int': 1}}, - parse('<a b="1"/>', - postprocessor=postprocessor)) - def test_postprocessor_skip(self): - def postprocessor(path, key, value): - if key == 'b': - value = int(value) - if value == 3: - return None +def test_streaming_returns_none(): + # When streaming (item_depth > 0), parse should return None + def cb(path, item): + return True + + result = parse("<a><b>1</b><b>2</b></a>", item_depth=2, item_callback=cb) + assert result is None + + +def test_postprocessor(): + def postprocessor(path, key, value): + try: + return key + ':int', int(value) + except (ValueError, TypeError): return key, value - self.assertEqual({'a': {'b': [1, 2]}}, - parse('<a><b>1</b><b>2</b><b>3</b></a>', - postprocessor=postprocessor)) + assert {'a': {'b:int': [1, 2], 'b': 'x'}} == parse('<a><b>1</b><b>2</b><b>x</b></a>', postprocessor=postprocessor) - def test_unicode(self): - value = chr(39321) - self.assertEqual({'a': value}, - parse(f'<a>{value}</a>')) - def test_encoded_string(self): - value = chr(39321) - xml = f'<a>{value}</a>' - self.assertEqual(parse(xml), - parse(xml.encode('utf-8'))) +def test_postprocessor_attribute(): + def postprocessor(path, key, value): + try: + return key + ':int', int(value) + except (ValueError, TypeError): + return key, value + assert {'a': {'@b:int': 1}} == parse('<a b="1"/>', postprocessor=postprocessor) - def test_namespace_support(self): - xml = """ - <root xmlns="http://defaultns.com/" - xmlns:a="http://a.com/" - xmlns:b="http://b.com/" - version="1.00"> - <x a:attr="val">1</x> - <a:y>2</a:y> - <b:z>3</b:z> - </root> - """ - d = { - 'http://defaultns.com/:root': { - '@version': '1.00', - '@xmlns': { - '': 'http://defaultns.com/', - 'a': 'http://a.com/', - 'b': 'http://b.com/', - }, - 'http://defaultns.com/:x': { - '@http://a.com/:attr': 'val', - '#text': '1', - }, - 'http://a.com/:y': '2', - 'http://b.com/:z': '3', - } - } - res = parse(xml, process_namespaces=True) - self.assertEqual(res, d) - def test_namespace_collapse(self): - xml = """ - <root xmlns="http://defaultns.com/" - xmlns:a="http://a.com/" - xmlns:b="http://b.com/" - version="1.00"> - <x a:attr="val">1</x> - <a:y>2</a:y> - <b:z>3</b:z> - </root> - """ - namespaces = { - 'http://defaultns.com/': '', - 'http://a.com/': 'ns_a', - } - d = { - 'root': { - '@version': '1.00', - '@xmlns': { - '': 'http://defaultns.com/', - 'a': 'http://a.com/', - 'b': 'http://b.com/', - }, - 'x': { - '@ns_a:attr': 'val', - '#text': '1', - }, - 'ns_a:y': '2', - 'http://b.com/:z': '3', +def test_postprocessor_skip(): + def postprocessor(path, key, value): + if key == 'b': + value = int(value) + if value == 3: + return None + return key, value + assert {'a': {'b': [1, 2]}} == parse('<a><b>1</b><b>2</b><b>3</b></a>', postprocessor=postprocessor) + + +def test_unicode(): + value = chr(39321) + assert {'a': value} == parse(f'<a>{value}</a>') + + +def test_encoded_string(): + value = chr(39321) + xml = f'<a>{value}</a>' + assert parse(xml) == parse(xml.encode('utf-8')) + + +def test_namespace_support(): + xml = """ + <root xmlns="http://defaultns.com/" + xmlns:a="http://a.com/" + xmlns:b="http://b.com/" + version="1.00"> + <x a:attr="val">1</x> + <a:y>2</a:y> + <b:z>3</b:z> + </root> + """ + d = { + 'http://defaultns.com/:root': { + '@version': '1.00', + '@xmlns': { + '': 'http://defaultns.com/', + 'a': 'http://a.com/', + 'b': 'http://b.com/', + }, + 'http://defaultns.com/:x': { + '@http://a.com/:attr': 'val', + '#text': '1', }, + 'http://a.com/:y': '2', + 'http://b.com/:z': '3', } - res = parse(xml, process_namespaces=True, namespaces=namespaces) - self.assertEqual(res, d) + } + res = parse(xml, process_namespaces=True) + assert res == d - def test_namespace_collapse_all(self): - xml = """ - <root xmlns="http://defaultns.com/" - xmlns:a="http://a.com/" - xmlns:b="http://b.com/" - version="1.00"> - <x a:attr="val">1</x> - <a:y>2</a:y> - <b:z>3</b:z> - </root> - """ - namespaces = collections.defaultdict(lambda: None) - d = { - 'root': { - '@version': '1.00', - '@xmlns': { - '': 'http://defaultns.com/', - 'a': 'http://a.com/', - 'b': 'http://b.com/', - }, - 'x': { - '@attr': 'val', - '#text': '1', - }, - 'y': '2', - 'z': '3', + +def test_namespace_collapse(): + xml = """ + <root xmlns="http://defaultns.com/" + xmlns:a="http://a.com/" + xmlns:b="http://b.com/" + version="1.00"> + <x a:attr="val">1</x> + <a:y>2</a:y> + <b:z>3</b:z> + </root> + """ + namespaces = { + 'http://defaultns.com/': '', + 'http://a.com/': 'ns_a', + } + d = { + 'root': { + '@version': '1.00', + '@xmlns': { + '': 'http://defaultns.com/', + 'a': 'http://a.com/', + 'b': 'http://b.com/', }, - } - res = parse(xml, process_namespaces=True, namespaces=namespaces) - self.assertEqual(res, d) + 'x': { + '@ns_a:attr': 'val', + '#text': '1', + }, + 'ns_a:y': '2', + 'http://b.com/:z': '3', + }, + } + res = parse(xml, process_namespaces=True, namespaces=namespaces) + assert res == d - def test_namespace_ignore(self): - xml = """ - <root xmlns="http://defaultns.com/" - xmlns:a="http://a.com/" - xmlns:b="http://b.com/" - version="1.00"> - <x>1</x> - <a:y>2</a:y> - <b:z>3</b:z> - </root> - """ - d = { - 'root': { - '@xmlns': 'http://defaultns.com/', - '@xmlns:a': 'http://a.com/', - '@xmlns:b': 'http://b.com/', - '@version': '1.00', - 'x': '1', - 'a:y': '2', - 'b:z': '3', + +def test_namespace_collapse_all(): + xml = """ + <root xmlns="http://defaultns.com/" + xmlns:a="http://a.com/" + xmlns:b="http://b.com/" + version="1.00"> + <x a:attr="val">1</x> + <a:y>2</a:y> + <b:z>3</b:z> + </root> + """ + namespaces = collections.defaultdict(lambda: None) + d = { + 'root': { + '@version': '1.00', + '@xmlns': { + '': 'http://defaultns.com/', + 'a': 'http://a.com/', + 'b': 'http://b.com/', + }, + 'x': { + '@attr': 'val', + '#text': '1', }, + 'y': '2', + 'z': '3', + }, + } + res = parse(xml, process_namespaces=True, namespaces=namespaces) + assert res == d + + +def test_namespace_ignore(): + xml = """ + <root xmlns="http://defaultns.com/" + xmlns:a="http://a.com/" + xmlns:b="http://b.com/" + version="1.00"> + <x>1</x> + <a:y>2</a:y> + <b:z>3</b:z> + </root> + """ + d = { + 'root': { + '@xmlns': 'http://defaultns.com/', + '@xmlns:a': 'http://a.com/', + '@xmlns:b': 'http://b.com/', + '@version': '1.00', + 'x': '1', + 'a:y': '2', + 'b:z': '3', + }, + } + assert parse(xml) == d + + +def test_force_list_basic(): + xml = """ + <servers> + <server> + <name>server1</name> + <os>os1</os> + </server> + </servers> + """ + expectedResult = { + 'servers': { + 'server': [ + { + 'name': 'server1', + 'os': 'os1', + }, + ], } - self.assertEqual(parse(xml), d) + } + assert parse(xml, force_list=('server',)) == expectedResult + - def test_force_list_basic(self): - xml = """ +def test_force_list_callable(): + xml = """ + <config> <servers> <server> <name>server1</name> <os>os1</os> </server> </servers> - """ - expectedResult = { + <skip> + <server></server> + </skip> + </config> + """ + + def force_list(path, key, value): + """Only return True for servers/server, but not for skip/server.""" + if key != 'server': + return False + return path and path[-1][0] == 'servers' + + expectedResult = { + 'config': { 'servers': { 'server': [ { @@ -380,243 +401,229 @@ class XMLToDictTestCase(unittest.TestCase): 'os': 'os1', }, ], - } - } - self.assertEqual(parse(xml, force_list=('server',)), expectedResult) + }, + 'skip': { + 'server': None, + }, + }, + } + assert parse(xml, force_list=force_list, dict_constructor=dict) == expectedResult - def test_force_list_callable(self): - xml = """ - <config> - <servers> - <server> - <name>server1</name> - <os>os1</os> - </server> - </servers> - <skip> - <server></server> - </skip> - </config> - """ - def force_list(path, key, value): - """Only return True for servers/server, but not for skip/server.""" - if key != 'server': - return False - return path and path[-1][0] == 'servers' +def test_disable_entities_true_rejects_xmlbomb(): + xml = """ + <!DOCTYPE xmlbomb [ + <!ENTITY a "1234567890" > + <!ENTITY b "&a;&a;&a;&a;&a;&a;&a;&a;"> + <!ENTITY c "&b;&b;&b;&b;&b;&b;&b;&b;"> + ]> + <bomb>&c;</bomb> + """ + with pytest.raises(ValueError, match="entities are disabled"): + parse(xml, disable_entities=True) - expectedResult = { - 'config': { - 'servers': { - 'server': [ - { - 'name': 'server1', - 'os': 'os1', - }, - ], - }, - 'skip': { - 'server': None, - }, - }, - } - self.assertEqual(parse(xml, force_list=force_list, dict_constructor=dict), expectedResult) - def test_disable_entities_true_ignores_xmlbomb(self): - xml = """ - <!DOCTYPE xmlbomb [ - <!ENTITY a "1234567890" > - <!ENTITY b "&a;&a;&a;&a;&a;&a;&a;&a;"> - <!ENTITY c "&b;&b;&b;&b;&b;&b;&b;&b;"> - ]> - <bomb>&c;</bomb> - """ - expectedResult = {'bomb': None} - try: - parse_attempt = parse(xml, disable_entities=True) - except expat.ExpatError: - self.assertTrue(True) - else: - self.assertEqual(parse_attempt, expectedResult) +def test_disable_entities_false_returns_xmlbomb(): + xml = """ + <!DOCTYPE xmlbomb [ + <!ENTITY a "1234567890" > + <!ENTITY b "&a;&a;&a;&a;&a;&a;&a;&a;"> + <!ENTITY c "&b;&b;&b;&b;&b;&b;&b;&b;"> + ]> + <bomb>&c;</bomb> + """ + bomb = "1234567890" * 64 + expectedResult = {'bomb': bomb} + assert parse(xml, disable_entities=False) == expectedResult - def test_disable_entities_false_returns_xmlbomb(self): - xml = """ - <!DOCTYPE xmlbomb [ - <!ENTITY a "1234567890" > - <!ENTITY b "&a;&a;&a;&a;&a;&a;&a;&a;"> - <!ENTITY c "&b;&b;&b;&b;&b;&b;&b;&b;"> - ]> - <bomb>&c;</bomb> - """ - bomb = "1234567890" * 64 - expectedResult = {'bomb': bomb} - self.assertEqual(parse(xml, disable_entities=False), expectedResult) - def test_disable_entities_true_ignores_external_dtd(self): - xml = """ - <!DOCTYPE external [ - <!ENTITY ee SYSTEM "http://www.python.org/"> - ]> - <root>ⅇ</root> - """ - expectedResult = {'root': None} - try: - parse_attempt = parse(xml, disable_entities=True) - except expat.ExpatError: - self.assertTrue(True) - else: - self.assertEqual(parse_attempt, expectedResult) +def test_external_entity(): + xml = """ + <!DOCTYPE external [ + <!ENTITY ee SYSTEM "http://www.python.org/"> + ]> + <root>ⅇ</root> + """ + with pytest.raises(ValueError, match="entities are disabled"): + parse(xml) + assert parse(xml, disable_entities=False) == {"root": None} + - def test_disable_entities_true_attempts_external_dtd(self): - xml = """ - <!DOCTYPE external [ - <!ENTITY ee SYSTEM "http://www.python.org/"> - ]> - <root>ⅇ</root> - """ +def test_external_entity_with_custom_expat(): + xml = """ + <!DOCTYPE external [ + <!ENTITY ee SYSTEM "http://www.python.org/"> + ]> + <root>ⅇ</root> + """ - def raising_external_ref_handler(*args, **kwargs): + class CustomExpat: + def __init__(self, external_entity_result): + self.external_entity_result = external_entity_result + + def ParserCreate(self, *args, **kwargs): parser = ParserCreate(*args, **kwargs) - parser.ExternalEntityRefHandler = lambda *x: 0 - try: - feature = "http://apache.org/xml/features/disallow-doctype-decl" - parser._reader.setFeature(feature, True) - except AttributeError: - pass + + def _handler(*args, **kwargs): + return self.external_entity_result + + parser.ExternalEntityRefHandler = _handler return parser - expat.ParserCreate = raising_external_ref_handler - # Using this try/catch because a TypeError is thrown before - # the ExpatError. - try: - parse(xml, disable_entities=False, expat=expat) - except expat.ExpatError: - self.assertTrue(True) - else: - self.assertTrue(False) - expat.ParserCreate = ParserCreate - def test_comments(self): - xml = """ - <a> - <b> - <!-- b comment --> - <c> - <!-- c comment --> - 1 - </c> - <d>2</d> - </b> - </a> - """ - expectedResult = { - 'a': { - 'b': { - '#comment': 'b comment', - 'c': { + ExpatError = expat.ExpatError - '#comment': 'c comment', - '#text': '1', - }, - 'd': '2', + with pytest.raises(expat.ExpatError): + parse(xml, disable_entities=False, expat=CustomExpat(0)) + assert parse(xml, disable_entities=False, expat=CustomExpat(1)) == {"root": None} + with pytest.raises(ValueError): + assert parse(xml, disable_entities=True, expat=CustomExpat(1)) + with pytest.raises(ValueError): + assert parse(xml, disable_entities=True, expat=CustomExpat(0)) + + +def test_disable_entities_true_allows_doctype_without_entities(): + xml = """<?xml version='1.0' encoding='UTF-8'?> + <!DOCTYPE data SYSTEM "diagram.dtd"> + <foo>bar</foo> + """ + assert parse(xml, disable_entities=True) == {"foo": "bar"} + assert parse(xml, disable_entities=False) == {"foo": "bar"} + + +def test_disable_entities_allows_comments_by_default(): + xml = """ + <a> + <!-- ignored --> + <b>1</b> + </a> + """ + assert parse(xml) == {'a': {'b': '1'}} + + +def test_comments(): + xml = """ + <a> + <b> + <!-- b comment --> + <c> + <!-- c comment --> + 1 + </c> + <d>2</d> + </b> + </a> + """ + expectedResult = { + 'a': { + 'b': { + '#comment': 'b comment', + 'c': { + + '#comment': 'c comment', + '#text': '1', }, - } + 'd': '2', + }, } - self.assertEqual(parse(xml, process_comments=True), expectedResult) + } + assert parse(xml, process_comments=True) == expectedResult + - def test_streaming_with_comments_and_attrs(self): - xml = """ - <a> - <b attr1="value"> - <!-- note --> - <c>cdata</c> - </b> - </a> - """ +def test_streaming_with_comments_and_attrs(): + xml = """ + <a> + <b attr1="value"> + <!-- note --> + <c>cdata</c> + </b> + </a> + """ - def handler(path, item): - expected = { - "@attr1": "value", - "#comment": "note", - "c": "cdata", - } - self.assertEqual(expected, item) - return True + def handler(path, item): + expected = { + "@attr1": "value", + "#comment": "note", + "c": "cdata", + } + assert expected == item + return True - parse(xml, item_depth=2, item_callback=handler, process_comments=True) + parse(xml, item_depth=2, item_callback=handler, process_comments=True) - def test_streaming_memory_usage(self): - # Guard against re-introducing accumulation of streamed items into parent - try: - import tracemalloc - except ImportError: - self.skipTest("tracemalloc not available") - NUM_ITEMS = 20000 +def test_streaming_memory_usage(): + # Guard against re-introducing accumulation of streamed items into parent + try: + import tracemalloc + except ImportError: + pytest.skip("tracemalloc not available") - def xml_gen(): - yield "<a>" - # generate many children with attribute and text - for i in range(NUM_ITEMS): - yield f'<b attr="v">{i % 10}</b>' - yield "</a>" + NUM_ITEMS = 20000 - count = 0 + def xml_gen(): + yield "<a>" + # generate many children with attribute and text + for i in range(NUM_ITEMS): + yield f'<b attr="v">{i % 10}</b>' + yield "</a>" - def cb(path, item): - nonlocal count - count += 1 - return True + count = 0 - tracemalloc.start() - parse(xml_gen(), item_depth=2, item_callback=cb) - current, peak = tracemalloc.get_traced_memory() - tracemalloc.stop() + def cb(path, item): + nonlocal count + count += 1 + return True - self.assertEqual(count, NUM_ITEMS) - # Peak memory should remain reasonably bounded; choose a conservative threshold - # This value should stay well below pathological accumulation levels - MAX_BYTES = 32 * 1024 # 32 KiB - self.assertLess(peak, MAX_BYTES, f"peak memory too high: {peak} bytes") + tracemalloc.start() + parse(xml_gen(), item_depth=2, item_callback=cb) + current, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() - def test_streaming_attrs(self): - xml = """ - <a> - <b attr1="value"> - <c>cdata</c> - </b> - </a> - """ - def handler(path, item): - expected = { - '@attr1': 'value', - 'c': 'cdata' - } - self.assertEqual(expected, item) - return True + assert count == NUM_ITEMS + # Peak memory should remain reasonably bounded; choose a conservative threshold + # This value should stay well below pathological accumulation levels + MAX_BYTES = 32 * 1024 # 32 KiB + assert peak < MAX_BYTES, f"peak memory too high: {peak} bytes" - parse(xml, item_depth=2, item_callback=handler) - def test_namespace_on_root_without_other_attrs(self): - xml = """ - <MyXML xmlns="http://www.xml.org/schemas/Test"> - <Tag1>Text1</Tag1> - <Tag2 attr2="en">Text2</Tag2> - <Tag3>Text3</Tag3> - <Tag4 attr4="en">Text4</Tag4> - </MyXML> - """ - namespaces = { - "http://www.xml.org/schemas/Test": None, - } +def test_streaming_attrs(): + xml = """ + <a> + <b attr1="value"> + <c>cdata</c> + </b> + </a> + """ + def handler(path, item): expected = { - "MyXML": { - "@xmlns": {"": "http://www.xml.org/schemas/Test"}, - "Tag1": "Text1", - "Tag2": {"@attr2": "en", "#text": "Text2"}, - "Tag3": "Text3", - "Tag4": {"@attr4": "en", "#text": "Text4"}, - } + '@attr1': 'value', + 'c': 'cdata' + } + assert expected == item + return True + + parse(xml, item_depth=2, item_callback=handler) + + +def test_namespace_on_root_without_other_attrs(): + xml = """ + <MyXML xmlns="http://www.xml.org/schemas/Test"> + <Tag1>Text1</Tag1> + <Tag2 attr2="en">Text2</Tag2> + <Tag3>Text3</Tag3> + <Tag4 attr4="en">Text4</Tag4> + </MyXML> + """ + namespaces = { + "http://www.xml.org/schemas/Test": None, + } + expected = { + "MyXML": { + "@xmlns": {"": "http://www.xml.org/schemas/Test"}, + "Tag1": "Text1", + "Tag2": {"@attr2": "en", "#text": "Text2"}, + "Tag3": "Text3", + "Tag4": {"@attr4": "en", "#text": "Text4"}, } - self.assertEqual( - parse(xml, process_namespaces=True, namespaces=namespaces), expected - ) + } + assert parse(xml, process_namespaces=True, namespaces=namespaces) == expected diff --git a/contrib/python/xmltodict/py3/xmltodict.py b/contrib/python/xmltodict/py3/xmltodict.py index 2735ad71c45..4e97074bae9 100644 --- a/contrib/python/xmltodict/py3/xmltodict.py +++ b/contrib/python/xmltodict/py3/xmltodict.py @@ -7,11 +7,6 @@ from xml.sax.xmlreader import AttributesImpl from io import StringIO from inspect import isgenerator -__author__ = 'Martin Blech' -__version__ = "1.0.0" # x-release-please-version -__license__ = 'MIT' - - class ParsingInterrupted(Exception): pass @@ -306,8 +301,8 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, a list should be forced is more complex. - If `process_comment` is `True` then comment will be added with comment_key - (default=`'#comment'`) to then tag which contains comment + If `process_comments` is `True`, comments will be added using `comment_key` + (default=`'#comment'`) to the tag that contains the comment. For example, given this input: <a> @@ -321,7 +316,7 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, </b> </a> - If called with process_comment=True, it will produce + If called with `process_comments=True`, it will produce this dictionary: 'a': { 'b': { @@ -334,6 +329,10 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, 'd': '2', }, } + Comment text is subject to the `strip_whitespace` flag: when it is left + at the default `True`, comments will have leading and trailing + whitespace removed. Disable `strip_whitespace` to keep comment + indentation or padding intact. """ handler = _DictSAXHandler(namespace_separator=namespace_separator, **kwargs) @@ -346,11 +345,7 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, encoding, namespace_separator ) - try: - parser.ordered_attributes = True - except AttributeError: - # Jython's expat does not support ordered_attributes - pass + parser.ordered_attributes = True parser.StartNamespaceDeclHandler = handler.startNamespaceDecl parser.StartElementHandler = handler.startElement parser.EndElementHandler = handler.endElement @@ -359,16 +354,10 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False, parser.CommentHandler = handler.comments parser.buffer_text = True if disable_entities: - try: - # Attempt to disable DTD in Jython's expat parser (Xerces-J). - feature = "http://apache.org/xml/features/disallow-doctype-decl" - parser._reader.setFeature(feature, True) - except AttributeError: - # For CPython / expat parser. - # Anything not handled ends up here and entities aren't expanded. - parser.DefaultHandler = lambda x: None - # Expects an integer return; zero means failure -> expat.ExpatError. - parser.ExternalEntityRefHandler = lambda *x: 1 + def _forbid_entities(*_args, **_kwargs): + raise ValueError("entities are disabled") + + parser.EntityDeclHandler = _forbid_entities if hasattr(xml_input, 'read'): parser.ParseFile(xml_input) elif isgenerator(xml_input): @@ -390,30 +379,6 @@ def _convert_value_to_string(value): if isinstance(value, bool): return "true" if value else "false" return str(value) - - -def _has_angle_brackets(value): - """Return True if value (a str) contains '<' or '>'. - - Non-string values return False. Uses fast substring checks implemented in C. - """ - return isinstance(value, str) and ("<" in value or ">" in value) - - -def _has_invalid_name_chars(value): - """Return True if value (a str) contains any disallowed name characters. - - Disallowed: '<', '>', '/', or any whitespace character. - Non-string values return False. - """ - if not isinstance(value, str): - return False - if "<" in value or ">" in value or "/" in value: - return True - # Check for any whitespace (spaces, tabs, newlines, etc.) - return any(ch.isspace() for ch in value) - - def _validate_name(value, kind): """Validate an element/attribute name for XML safety. @@ -437,6 +402,21 @@ def _validate_name(value, kind): raise ValueError(f"Invalid {kind} name: whitespace not allowed") +def _validate_comment(value): + if isinstance(value, bytes): + try: + value = value.decode("utf-8") + except UnicodeDecodeError as exc: + raise ValueError("Comment text must be valid UTF-8") from exc + if not isinstance(value, str): + raise ValueError("Comment text must be a string") + if "--" in value: + raise ValueError("Comment text cannot contain '--'") + if value.endswith("-"): + raise ValueError("Comment text cannot end with '-'") + return value + + def _process_namespace(name, namespaces, ns_sep=':', attr_prefix='@'): if not isinstance(name, str): return name @@ -475,7 +455,7 @@ def _emit(key, value, content_handler, if comment_text is None: continue comment_text = _convert_value_to_string(comment_text) - if comment_text == "": + if not comment_text: continue if pretty: content_handler.ignorableWhitespace(depth * indent) @@ -555,6 +535,7 @@ def _emit(key, value, content_handler, class _XMLGenerator(XMLGenerator): def comment(self, text): + text = _validate_comment(text) self._write(f"<!--{escape(text)}-->") @@ -570,6 +551,10 @@ def unparse(input_dict, output=None, encoding='utf-8', full_document=True, as XML node attributes, whereas keys equal to `cdata_key` (default=`'#text'`) are treated as character data. + Empty lists are omitted entirely: ``{"a": []}`` produces no ``<a>`` element. + Provide a placeholder entry (for example ``{"a": [""]}``) when an explicit + empty container element must be emitted. + The `pretty` parameter (default=`False`) enables pretty-printing. In this mode, lines are terminated with `'\n'` and indented with `'\t'`, but this can be customized with the `newl` and `indent` parameters. diff --git a/contrib/python/xmltodict/py3/ya.make b/contrib/python/xmltodict/py3/ya.make index b9534f46bc6..b27d369dcf0 100644 --- a/contrib/python/xmltodict/py3/ya.make +++ b/contrib/python/xmltodict/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(1.0.0) +VERSION(1.0.2) LICENSE(MIT) diff --git a/contrib/python/yarl/.dist-info/METADATA b/contrib/python/yarl/.dist-info/METADATA index 548567bd124..9870bc4c565 100644 --- a/contrib/python/yarl/.dist-info/METADATA +++ b/contrib/python/yarl/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: yarl -Version: 1.9.4 +Version: 1.12.1 Summary: Yet another URL library Home-page: https://github.com/aio-libs/yarl Author: Andrew Svetlov @@ -24,21 +24,20 @@ Classifier: License :: OSI Approved :: Apache Software License Classifier: Programming Language :: Cython Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 Classifier: Topic :: Internet :: WWW/HTTP Classifier: Topic :: Software Development :: Libraries :: Python Modules -Requires-Python: >=3.7 +Requires-Python: >=3.8 Description-Content-Type: text/x-rst License-File: LICENSE License-File: NOTICE Requires-Dist: idna >=2.0 Requires-Dist: multidict >=4.0 -Requires-Dist: typing-extensions >=3.7.4 ; python_version < "3.8" yarl ==== @@ -227,9 +226,6 @@ Please file an issue on the `bug tracker <https://github.com/aio-libs/yarl/issues>`_ if you have found a bug or have some suggestion in order to improve the library. -The library uses `Azure Pipelines <https://dev.azure.com/aio-libs/yarl>`_ for -Continuous Integration. - Discussion list --------------- @@ -250,6 +246,10 @@ It's *Apache 2* licensed and freely available. .. _multidict: https://github.com/aio-libs/multidict +========= +Changelog +========= + .. You should *NOT* be adding new change log entries to this file, this file is managed by towncrier. You *may* edit previous change logs to @@ -262,6 +262,561 @@ It's *Apache 2* licensed and freely available. .. towncrier release notes start +1.12.1 +====== + +*(2024-09-23)* + + +No significant changes. + + +---- + + +1.12.0 +====== + +*(2024-09-23)* + + +Features +-------- + +- Added ``~yarl.URL.path_safe`` to be able to fetch the path without ``%2F`` and ``%25`` decoded -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1150 <https://github.com/aio-libs/yarl/issues/1150>`__. + + +Removals and backward incompatible breaking changes +--------------------------------------------------- + +- Restore decoding ``%2F`` (``/``) in ``URL.path`` -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + This change restored the behavior before `#1057 <https://github.com/aio-libs/yarl/issues/1057>`__. + + *Related issues and pull requests on GitHub:* + `#1151 <https://github.com/aio-libs/yarl/issues/1151>`__. + + +Miscellaneous internal changes +------------------------------ + +- Improved performance of processing paths -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1143 <https://github.com/aio-libs/yarl/issues/1143>`__. + + +---- + + +1.11.1 +====== + +*(2024-09-09)* + + +Bug fixes +--------- + +- Allowed scheme replacement for relative URLs if the scheme does not require a host -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#280 <https://github.com/aio-libs/yarl/issues/280>`__, `#1138 <https://github.com/aio-libs/yarl/issues/1138>`__. + +- Allowed empty host for URL schemes other than the special schemes listed in the WHATWG URL spec -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1136 <https://github.com/aio-libs/yarl/issues/1136>`__. + + +Features +-------- + +- Loosened restriction on integers as query string values to allow classes that implement ``__int__`` -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1139 <https://github.com/aio-libs/yarl/issues/1139>`__. + + +Miscellaneous internal changes +------------------------------ + +- Improved performance of normalizing paths -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1137 <https://github.com/aio-libs/yarl/issues/1137>`__. + + +---- + + +1.11.0 +====== + +*(2024-09-08)* + + +Features +-------- + +- Added ``URL.extend_query()()`` method, which can be used to extend parameters without replacing same named keys -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + This method was primarily added to replace the inefficient hand rolled method currently used in ``aiohttp``. + + *Related issues and pull requests on GitHub:* + `#1128 <https://github.com/aio-libs/yarl/issues/1128>`__. + + +Miscellaneous internal changes +------------------------------ + +- Improved performance of the Cython ``cached_property`` implementation -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1122 <https://github.com/aio-libs/yarl/issues/1122>`__. + +- Simplified computing ports by removing unnecessary code -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1123 <https://github.com/aio-libs/yarl/issues/1123>`__. + +- Improved performance of encoding non IPv6 hosts -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1125 <https://github.com/aio-libs/yarl/issues/1125>`__. + +- Improved performance of ``URL.build()()`` when the path, query string, or fragment is an empty string -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1126 <https://github.com/aio-libs/yarl/issues/1126>`__. + +- Improved performance of the ``URL.update_query()()`` method -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1130 <https://github.com/aio-libs/yarl/issues/1130>`__. + +- Improved performance of processing query string changes when arguments are ``str`` -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1131 <https://github.com/aio-libs/yarl/issues/1131>`__. + + +---- + + +1.10.0 +====== + +*(2024-09-06)* + + +Bug fixes +--------- + +- Fixed joining a path when the existing path was empty -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + A regression in ``URL.join()()`` was introduced in `#1082 <https://github.com/aio-libs/yarl/issues/1082>`__. + + *Related issues and pull requests on GitHub:* + `#1118 <https://github.com/aio-libs/yarl/issues/1118>`__. + + +Features +-------- + +- Added ``URL.without_query_params()()`` method, to drop some parameters from query string -- by `@hongquan <https://github.com/sponsors/hongquan>`__. + + *Related issues and pull requests on GitHub:* + `#774 <https://github.com/aio-libs/yarl/issues/774>`__, `#898 <https://github.com/aio-libs/yarl/issues/898>`__, `#1010 <https://github.com/aio-libs/yarl/issues/1010>`__. + +- The previously protected types ``_SimpleQuery``, ``_QueryVariable``, and ``_Query`` are now available for use externally as ``SimpleQuery``, ``QueryVariable``, and ``Query`` -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1050 <https://github.com/aio-libs/yarl/issues/1050>`__, `#1113 <https://github.com/aio-libs/yarl/issues/1113>`__. + + +Contributor-facing changes +-------------------------- + +- Replaced all ``~typing.Optional`` with ``~typing.Union`` -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1095 <https://github.com/aio-libs/yarl/issues/1095>`__. + + +Miscellaneous internal changes +------------------------------ + +- Significantly improved performance of parsing the network location -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1112 <https://github.com/aio-libs/yarl/issues/1112>`__. + +- Added internal types to the cache to prevent future refactoring errors -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1117 <https://github.com/aio-libs/yarl/issues/1117>`__. + + +---- + + +1.9.11 +====== + +*(2024-09-04)* + + +Bug fixes +--------- + +- Fixed a ``TypeError`` with ``MultiDictProxy`` and Python 3.8 -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1084 <https://github.com/aio-libs/yarl/issues/1084>`__, `#1105 <https://github.com/aio-libs/yarl/issues/1105>`__, `#1107 <https://github.com/aio-libs/yarl/issues/1107>`__. + + +Miscellaneous internal changes +------------------------------ + +- Improved performance of encoding hosts -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + Previously, the library would unconditionally try to parse a host as an IP Address. The library now avoids trying to parse a host as an IP Address if the string is not in one of the formats described in ``3986#section-3.2.2``. + + *Related issues and pull requests on GitHub:* + `#1104 <https://github.com/aio-libs/yarl/issues/1104>`__. + + +---- + + +1.9.10 +====== + +*(2024-09-04)* + + +Bug fixes +--------- + +- ``URL.join()()`` has been changed to match + ``3986`` and align with + ``/ operation()`` and ``URL.joinpath()()`` + when joining URLs with empty segments. + Previously ``urllib.parse.urljoin`` was used, + which has known issues with empty segments + (`python/cpython#84774 <https://github.com/python/cpython/issues/84774>`_). + + Due to the semantics of ``URL.join()()``, joining an + URL with scheme requires making it relative, prefixing with ``./``. + + .. code-block:: pycon + + >>> URL("https://web.archive.org/web/").join(URL("./https://github.com/aio-libs/yarl")) + URL('https://web.archive.org/web/https://github.com/aio-libs/yarl') + + + Empty segments are honored in the base as well as the joined part. + + .. code-block:: pycon + + >>> URL("https://web.archive.org/web/https://").join(URL("github.com/aio-libs/yarl")) + URL('https://web.archive.org/web/https://github.com/aio-libs/yarl') + + + + -- by `@commonism <https://github.com/sponsors/commonism>`__ + + This change initially appeared in 1.9.5 but was reverted in 1.9.6 to resolve a problem with query string handling. + + *Related issues and pull requests on GitHub:* + `#1039 <https://github.com/aio-libs/yarl/issues/1039>`__, `#1082 <https://github.com/aio-libs/yarl/issues/1082>`__. + + +Features +-------- + +- Added ``~yarl.URL.absolute`` which is now preferred over ``URL.is_absolute()`` -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1100 <https://github.com/aio-libs/yarl/issues/1100>`__. + + +---- + + +1.9.9 +===== + +*(2024-09-04)* + + +Bug fixes +--------- + +- Added missing type on ``~yarl.URL.port`` -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1097 <https://github.com/aio-libs/yarl/issues/1097>`__. + + +---- + + +1.9.8 +===== + +*(2024-09-03)* + + +Features +-------- + +- Covered the ``~yarl.URL`` object with types -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1084 <https://github.com/aio-libs/yarl/issues/1084>`__. + +- Cache parsing of IP Addresses when encoding hosts -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1086 <https://github.com/aio-libs/yarl/issues/1086>`__. + + +Contributor-facing changes +-------------------------- + +- Covered the ``~yarl.URL`` object with types -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1084 <https://github.com/aio-libs/yarl/issues/1084>`__. + + +Miscellaneous internal changes +------------------------------ + +- Improved performance of handling ports -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + *Related issues and pull requests on GitHub:* + `#1081 <https://github.com/aio-libs/yarl/issues/1081>`__. + + +---- + + +1.9.7 +===== + +*(2024-09-01)* + + +Removals and backward incompatible breaking changes +--------------------------------------------------- + +- Removed support ``3986#section-3.2.3`` port normalization when the scheme is not one of ``http``, ``https``, ``wss``, or ``ws`` -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + Support for port normalization was recently added in `#1033 <https://github.com/aio-libs/yarl/issues/1033>`__ and contained code that would do blocking I/O if the scheme was not one of the four listed above. The code has been removed because this library is intended to be safe for usage with ``asyncio``. + + *Related issues and pull requests on GitHub:* + `#1076 <https://github.com/aio-libs/yarl/issues/1076>`__. + + +Miscellaneous internal changes +------------------------------ + +- Improved performance of property caching -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + The ``reify`` implementation from ``aiohttp`` was adapted to replace the internal ``cached_property`` implementation. + + *Related issues and pull requests on GitHub:* + `#1070 <https://github.com/aio-libs/yarl/issues/1070>`__. + + +---- + + +1.9.6 +===== + +*(2024-08-30)* + + +Bug fixes +--------- + +- Reverted ``3986`` compatible ``URL.join()()`` honoring empty segments which was introduced in `#1039 <https://github.com/aio-libs/yarl/issues/1039>`__. + + This change introduced a regression handling query string parameters with joined URLs. The change was reverted to maintain compatibility with the previous behavior. + + *Related issues and pull requests on GitHub:* + `#1067 <https://github.com/aio-libs/yarl/issues/1067>`__. + + +---- + + +1.9.5 +===== + +*(2024-08-30)* + + +Bug fixes +--------- + +- Joining URLs with empty segments has been changed + to match ``3986``. + + Previously empty segments would be removed from path, + breaking use-cases such as + + .. code-block:: python + + URL("https://web.archive.org/web/") / "https://github.com/" + + Now ``/ operation()`` and ``URL.joinpath()()`` + keep empty segments, but do not introduce new empty segments. + e.g. + + .. code-block:: python + + URL("https://example.org/") / "" + + does not introduce an empty segment. + + -- by `@commonism <https://github.com/sponsors/commonism>`__ and `@youtux <https://github.com/sponsors/youtux>`__ + + *Related issues and pull requests on GitHub:* + `#1026 <https://github.com/aio-libs/yarl/issues/1026>`__. + +- The default protocol ports of well-known URI schemes are now taken into account + during the normalization of the URL string representation in accordance with + ``3986#section-3.2.3``. + + Specified ports are removed from the ``str`` representation of a ``~yarl.URL`` + if the port matches the scheme's default port -- by `@commonism <https://github.com/sponsors/commonism>`__. + + *Related issues and pull requests on GitHub:* + `#1033 <https://github.com/aio-libs/yarl/issues/1033>`__. + +- ``URL.join()()`` has been changed to match + ``3986`` and align with + ``/ operation()`` and ``URL.joinpath()()`` + when joining URLs with empty segments. + Previously ``urllib.parse.urljoin`` was used, + which has known issues with empty segments + (`python/cpython#84774 <https://github.com/python/cpython/issues/84774>`_). + + Due to the semantics of ``URL.join()()``, joining an + URL with scheme requires making it relative, prefixing with ``./``. + + .. code-block:: pycon + + >>> URL("https://web.archive.org/web/").join(URL("./https://github.com/aio-libs/yarl")) + URL('https://web.archive.org/web/https://github.com/aio-libs/yarl') + + + Empty segments are honored in the base as well as the joined part. + + .. code-block:: pycon + + >>> URL("https://web.archive.org/web/https://").join(URL("github.com/aio-libs/yarl")) + URL('https://web.archive.org/web/https://github.com/aio-libs/yarl') + + + + -- by `@commonism <https://github.com/sponsors/commonism>`__ + + *Related issues and pull requests on GitHub:* + `#1039 <https://github.com/aio-libs/yarl/issues/1039>`__. + + +Removals and backward incompatible breaking changes +--------------------------------------------------- + +- Stopped decoding ``%2F`` (``/``) in ``URL.path``, as this could lead to code incorrectly treating it as a path separator + -- by `@Dreamsorcerer <https://github.com/sponsors/Dreamsorcerer>`__. + + *Related issues and pull requests on GitHub:* + `#1057 <https://github.com/aio-libs/yarl/issues/1057>`__. + +- Dropped support for Python 3.7 -- by `@Dreamsorcerer <https://github.com/sponsors/Dreamsorcerer>`__. + + *Related issues and pull requests on GitHub:* + `#1016 <https://github.com/aio-libs/yarl/issues/1016>`__. + + +Improved documentation +---------------------- + +- On the ``Contributing docs`` page, + a link to the ``Towncrier philosophy`` has been fixed. + + *Related issues and pull requests on GitHub:* + `#981 <https://github.com/aio-libs/yarl/issues/981>`__. + +- The pre-existing ``/ magic method()`` + has been documented in the API reference -- by `@commonism <https://github.com/sponsors/commonism>`__. + + *Related issues and pull requests on GitHub:* + `#1026 <https://github.com/aio-libs/yarl/issues/1026>`__. + + +Packaging updates and notes for downstreams +------------------------------------------- + +- A flaw in the logic for copying the project directory into a + temporary folder that led to infinite recursion when ``TMPDIR`` + was set to a project subdirectory path. This was happening in Fedora + and its downstream due to the use of `pyproject-rpm-macros + <https://src.fedoraproject.org/rpms/pyproject-rpm-macros>`__. It was + only reproducible with ``pip wheel`` and was not affecting the + ``pyproject-build`` users. + + -- by `@hroncok <https://github.com/sponsors/hroncok>`__ and `@webknjaz <https://github.com/sponsors/webknjaz>`__ + + *Related issues and pull requests on GitHub:* + `#992 <https://github.com/aio-libs/yarl/issues/992>`__, `#1014 <https://github.com/aio-libs/yarl/issues/1014>`__. + +- Support Python 3.13 and publish non-free-threaded wheels + + *Related issues and pull requests on GitHub:* + `#1054 <https://github.com/aio-libs/yarl/issues/1054>`__. + + +Contributor-facing changes +-------------------------- + +- The CI/CD setup has been updated to test ``arm64`` wheels + under macOS 14, except for Python 3.7 that is unsupported + in that environment -- by `@webknjaz <https://github.com/sponsors/webknjaz>`__. + + *Related issues and pull requests on GitHub:* + `#1015 <https://github.com/aio-libs/yarl/issues/1015>`__. + +- Removed unused type ignores and casts -- by `@hauntsaninja <https://github.com/sponsors/hauntsaninja>`__. + + *Related issues and pull requests on GitHub:* + `#1031 <https://github.com/aio-libs/yarl/issues/1031>`__. + + +Miscellaneous internal changes +------------------------------ + +- ``port``, ``scheme``, and ``raw_host`` are now ``cached_property`` -- by `@bdraco <https://github.com/sponsors/bdraco>`__. + + ``aiohttp`` accesses these properties quite often, which cause ``urllib`` to build the ``_hostinfo`` property every time. ``port``, ``scheme``, and ``raw_host`` are now cached properties, which will improve performance. + + *Related issues and pull requests on GitHub:* + `#1044 <https://github.com/aio-libs/yarl/issues/1044>`__, `#1058 <https://github.com/aio-libs/yarl/issues/1058>`__. + + +---- + + 1.9.4 (2023-12-06) ================== @@ -305,15 +860,17 @@ Contributor-facing changes It will also be reported to Codecov from any non-release CI jobs. To measure coverage in a development environment, *yarl* can be - installed in editable mode, which requires an environment variable - ``YARL_CYTHON_TRACING=1`` to be set: + installed in editable mode: .. code-block:: console - $ YARL_CYTHON_TRACING=1 python -Im pip install -e . + $ python -Im pip install -e . Editable install produces C-files required for the Cython coverage - plugin to map the measurements back to the PYX-files. (`#961 <https://github.com/aio-libs/yarl/issues/961>`__) + plugin to map the measurements back to the PYX-files. + + `#961 <https://github.com/aio-libs/yarl/issues/961>`__ + - It is now possible to request line tracing in Cython builds using the ``with-cython-tracing`` `PEP 517 <https://peps.python.org/pep-517>`__ config setting -- `@webknjaz <https://github.com/sponsors/webknjaz>`__. @@ -329,7 +886,19 @@ Contributor-facing changes $ python -Im pip install . --config-settings=with-cython-tracing=true For editable installs, this setting is on by default. Otherwise, it's - off unless requested explicitly. (`#962 <https://github.com/aio-libs/yarl/issues/962>`__) + off unless requested explicitly. + + The following produces C-files required for the Cython coverage + plugin to map the measurements back to the PYX-files: + + .. code-block:: console + + $ python -Im pip install -e . + + Alternatively, the ``YARL_CYTHON_TRACING=1`` environment variable + can be set to do the same as the `PEP 517 <https://peps.python.org/pep-517>`__ config setting. + + `#962 <https://github.com/aio-libs/yarl/issues/962>`__ 1.9.3 (2023-11-20) @@ -339,7 +908,7 @@ Bug fixes --------- - Stopped dropping trailing slashes in ``yarl.URL.joinpath()`` -- by `@gmacon <https://github.com/sponsors/gmacon>`__. (`#862 <https://github.com/aio-libs/yarl/issues/862>`__, `#866 <https://github.com/aio-libs/yarl/issues/866>`__) -- Started accepting string subclasses in ``__truediv__()`` operations (``URL / segment``) -- by `@mjpieters <https://github.com/sponsors/mjpieters>`__. (`#871 <https://github.com/aio-libs/yarl/issues/871>`__, `#884 <https://github.com/aio-libs/yarl/issues/884>`__) +- Started accepting string subclasses in ``yarl.URL.__truediv__()`` operations (``URL / segment``) -- by `@mjpieters <https://github.com/sponsors/mjpieters>`__. (`#871 <https://github.com/aio-libs/yarl/issues/871>`__, `#884 <https://github.com/aio-libs/yarl/issues/884>`__) - Fixed the human representation of URLs with square brackets in usernames and passwords -- by `@mjpieters <https://github.com/sponsors/mjpieters>`__. (`#876 <https://github.com/aio-libs/yarl/issues/876>`__, `#882 <https://github.com/aio-libs/yarl/issues/882>`__) - Updated type hints to include ``URL.missing_port()``, ``URL.__bytes__()`` and the ``encoding`` argument to ``yarl.URL.joinpath()`` @@ -408,7 +977,7 @@ Contributor-facing changes Bugfixes -------- -- Fix regression with ``__truediv__`` and absolute URLs with empty paths causing the raw path to lack the leading ``/``. +- Fix regression with ``yarl.URL.__truediv__()`` and absolute URLs with empty paths causing the raw path to lack the leading ``/``. (`#854 <https://github.com/aio-libs/yarl/issues/854>`_) @@ -430,7 +999,7 @@ Features -------- - Added ``URL.joinpath(*elements)``, to create a new URL appending multiple path elements. (`#704 <https://github.com/aio-libs/yarl/issues/704>`_) -- Made ``URL.__truediv__()`` return ``NotImplemented`` if called with an +- Made ``URL.__truediv__()()`` return ``NotImplemented`` if called with an unsupported type — by `@michaeljpeters <https://github.com/sponsors/michaeljpeters>`__. (`#832 <https://github.com/aio-libs/yarl/issues/832>`_) diff --git a/contrib/python/yarl/README.rst b/contrib/python/yarl/README.rst index 844ffff6925..fe9c856551d 100644 --- a/contrib/python/yarl/README.rst +++ b/contrib/python/yarl/README.rst @@ -185,9 +185,6 @@ Please file an issue on the `bug tracker <https://github.com/aio-libs/yarl/issues>`_ if you have found a bug or have some suggestion in order to improve the library. -The library uses `Azure Pipelines <https://dev.azure.com/aio-libs/yarl>`_ for -Continuous Integration. - Discussion list --------------- diff --git a/contrib/python/yarl/patches/02-dont-normalize-disable-tests.patch b/contrib/python/yarl/patches/02-dont-normalize-disable-tests.patch new file mode 100644 index 00000000000..79c9ccc2d3e --- /dev/null +++ b/contrib/python/yarl/patches/02-dont-normalize-disable-tests.patch @@ -0,0 +1,58 @@ +--- contrib/python/yarl/tests/test_url.py (index) ++++ contrib/python/yarl/tests/test_url.py (working tree) +@@ -1439,6 +1439,7 @@ def test_is_default_port_for_absolute_url_without_port(): + assert url.is_default_port() + + + def test_is_default_port_for_absolute_url_with_default_port(): + url = URL("http://example.com:80") + assert url.is_default_port() +--- contrib/python/yarl/tests/test_url_build.py (index) ++++ contrib/python/yarl/tests/test_url_build.py (working tree) +@@ -15,16 +15,19 @@ def test_build_simple(): + assert str(u) == "http://127.0.0.1" + + + def test_url_build_ipv6(): + u = URL.build(scheme="http", host="::1") + assert str(u) == "http://::1" + + + def test_url_build_ipv6_brackets(): + u = URL.build(scheme="http", host="[::1]") + assert str(u) == "http://::1" + + + def test_url_ipv4_in_ipv6(): + u = URL.build(scheme="http", host="2001:db8:122:344::192.0.2.33") + assert str(u) == "http://2001:db8:122:344::c000:221" +--- contrib/python/yarl/tests/test_url_update_netloc.py (index) ++++ contrib/python/yarl/tests/test_url_update_netloc.py (working tree) +@@ -196,6 +196,7 @@ def test_with_port(): + assert str(url.with_port(8888)) == "http://example.com:8888" + + + def test_with_default_port_normalization() -> None: + url = URL("http://example.com") + assert str(url.with_scheme("https")) == "https://example.com" +@@ -203,6 +204,7 @@ def test_with_default_port_normalization() -> None: + assert str(url.with_port(443).with_scheme("https")) == "https://example.com" + + + def test_with_custom_port_normalization() -> None: + url = URL("http://example.com") + u88 = url.with_port(88) +@@ -211,6 +213,7 @@ def test_with_custom_port_normalization() -> None: + assert str(u88.with_scheme("https")) == "https://example.com:88" + + + def test_with_explicit_port_normalization() -> None: + url = URL("http://example.com") + u80 = url.with_port(80) diff --git a/contrib/python/yarl/patches/02-dont-normalize.patch b/contrib/python/yarl/patches/02-dont-normalize.patch new file mode 100644 index 00000000000..ba35613fbcc --- /dev/null +++ b/contrib/python/yarl/patches/02-dont-normalize.patch @@ -0,0 +1,11 @@ +--- contrib/python/yarl/yarl/_url.py (index) ++++ contrib/python/yarl/yarl/_url.py (working tree) +@@ -373,7 +373,7 @@ class URL: + val = self._val + if not val.path and self.absolute and (val.query or val.fragment): + val = val._replace(path="/") +- if (port := self._port_not_default) is None: ++ if False and (port := self._port_not_default) is None: + # port normalization - using None for default ports to remove from rendering + # https://datatracker.ietf.org/doc/html/rfc3986.html#section-6.2.3 + val = val._replace( diff --git a/contrib/python/yarl/tests/test_cache.py b/contrib/python/yarl/tests/test_cache.py index 22141dd0854..46d5b01f159 100644 --- a/contrib/python/yarl/tests/test_cache.py +++ b/contrib/python/yarl/tests/test_cache.py @@ -13,7 +13,7 @@ def test_cache_clear() -> None: def test_cache_info() -> None: info = yarl.cache_info() - assert info.keys() == {"idna_encode", "idna_decode"} + assert info.keys() == {"idna_encode", "idna_decode", "ip_address"} def test_cache_configure_default() -> None: @@ -21,8 +21,12 @@ def test_cache_configure_default() -> None: def test_cache_configure_None() -> None: - yarl.cache_configure(idna_encode_size=None, idna_decode_size=None) + yarl.cache_configure( + idna_encode_size=None, idna_decode_size=None, ip_address_size=None + ) def test_cache_configure_explicit() -> None: - yarl.cache_configure(idna_encode_size=128, idna_decode_size=128) + yarl.cache_configure( + idna_encode_size=128, idna_decode_size=128, ip_address_size=128 + ) diff --git a/contrib/python/yarl/tests/test_helpers.py b/contrib/python/yarl/tests/test_helpers.py new file mode 100644 index 00000000000..cdfff129e6d --- /dev/null +++ b/contrib/python/yarl/tests/test_helpers.py @@ -0,0 +1,91 @@ +import platform + +import pytest + +from yarl import _helpers, _helpers_py + +IS_PYPY = platform.python_implementation() == "PyPy" + + +class CachedPropertyMixin: + cached_property = NotImplemented + + def test_cached_property(self) -> None: + class A: + def __init__(self): + self._cache = {} + + @self.cached_property # type: ignore[misc] + def prop(self): + return 1 + + a = A() + assert a.prop == 1 + + def test_cached_property_class(self) -> None: + class A: + def __init__(self): + """Init.""" + # self._cache not set because its never accessed in this test + + @self.cached_property # type: ignore[misc] + def prop(self): + """Docstring.""" + + assert isinstance(A.prop, self.cached_property) + assert A.prop.__doc__ == "Docstring." + + def test_cached_property_assignment(self) -> None: + class A: + def __init__(self): + self._cache = {} + + @self.cached_property # type: ignore[misc] + def prop(self): + """Mock property.""" + + a = A() + + with pytest.raises(AttributeError): + a.prop = 123 + + def test_cached_property_without_cache(self) -> None: + class A: + def __init__(self): + pass + + @self.cached_property # type: ignore[misc] + def prop(self): + """Mock property.""" + + a = A() + + with pytest.raises(AttributeError): + a.prop = 123 + + def test_cached_property_check_without_cache(self) -> None: + class A: + def __init__(self): + pass + + @self.cached_property # type: ignore[misc] + def prop(self): + """Mock property.""" + + a = A() + with pytest.raises(AttributeError): + assert a.prop == 1 + + +class TestPyCachedProperty(CachedPropertyMixin): + cached_property = _helpers_py.cached_property # type: ignore[assignment] + + +if ( + not _helpers.NO_EXTENSIONS + and not IS_PYPY + and hasattr(_helpers, "cached_property_c") +): + + class TestCCachedProperty(CachedPropertyMixin): + cached_property = _helpers.cached_property_c # type: ignore[assignment, attr-defined, unused-ignore] # noqa: E501 diff --git a/contrib/python/yarl/tests/test_normalize_path.py b/contrib/python/yarl/tests/test_normalize_path.py index defc4d8dd73..20d89818c8e 100644 --- a/contrib/python/yarl/tests/test_normalize_path.py +++ b/contrib/python/yarl/tests/test_normalize_path.py @@ -8,6 +8,7 @@ PATHS = [ ("/", "/"), ("//", "//"), ("///", "///"), + ("path", "path"), # Single-dot ("path/to", "path/to"), ("././path/to", "path/to"), @@ -15,6 +16,7 @@ PATHS = [ ("path/././to", "path/to"), ("path/to/.", "path/to/"), ("path/to/./.", "path/to/"), + ("/path/to/.", "/path/to/"), # Double-dots ("../path/to", "path/to"), ("path/../to", "to"), diff --git a/contrib/python/yarl/tests/test_update_query.py b/contrib/python/yarl/tests/test_update_query.py index 176259d750b..e652f457e26 100644 --- a/contrib/python/yarl/tests/test_update_query.py +++ b/contrib/python/yarl/tests/test_update_query.py @@ -249,6 +249,19 @@ def test_with_int_enum(): assert str(url2) == "http://example.com/path?a=1" +def test_with_class_that_implements__int__(): + """Allow classes that implement __int__ to be used in query strings.""" + + class myint: + + def __int__(self): + return 84 + + url = URL("http://example.com/path") + url2 = url.with_query(a=myint()) + assert str(url2) == "http://example.com/path?a=84" + + def test_with_float_enum(): class FloatEnum(float, enum.Enum): A = 1.1 @@ -353,6 +366,16 @@ def test_update_query_multiple_keys(): assert str(u2) == "http://example.com/path?a=3&a=4" +def test_update_query_with_non_ascii(): + url = URL("http://example.com/?foo=bar&baz=foo&%F0%9D%95%A6=%F0%9D%95%A6") + assert url.update_query({"𝕦": "𝕦"}) == url + + +def test_update_query_with_non_ascii_as_str(): + url = URL("http://example.com/?foo=bar&baz=foo&%F0%9D%95%A6=%F0%9D%95%A6") + assert url.update_query("𝕦=𝕦") == url + + # mod operator @@ -364,3 +387,81 @@ def test_update_query_with_mod_operator(): assert str(url % {"a": "1"} % {"b": "2"}) == "http://example.com/?a=1&b=2" assert str(url % {"a": "1"} % {"a": "3", "b": "2"}) == "http://example.com/?a=3&b=2" assert str(url / "foo" % {"a": "1"}) == "http://example.com/foo?a=1" + + +def test_extend_query(): + url = URL("http://example.com/") + assert str(url.extend_query({"a": "1"})) == "http://example.com/?a=1" + assert str(URL("test").extend_query(a=1)) == "test?a=1" + + url = URL("http://example.com/?foo=bar") + expected_url = URL("http://example.com/?foo=bar&baz=foo") + + assert url.extend_query({"baz": "foo"}) == expected_url + assert url.extend_query(baz="foo") == expected_url + assert url.extend_query("baz=foo") == expected_url + + +def test_extend_query_with_args_and_kwargs(): + url = URL("http://example.com/") + + with pytest.raises(ValueError): + url.extend_query("a", foo="bar") + + +def test_extend_query_with_multiple_args(): + url = URL("http://example.com/") + + with pytest.raises(ValueError): + url.extend_query("a", "b") + + +def test_extend_query_with_none_arg(): + url = URL("http://example.com/?foo=bar&baz=foo") + assert url.extend_query(None) == url + + +def test_extend_query_with_empty_dict(): + url = URL("http://example.com/?foo=bar&baz=foo") + assert url.extend_query({}) == url + + +def test_extend_query_existing_keys(): + url = URL("http://example.com/?a=2") + assert str(url.extend_query({"a": "1"})) == "http://example.com/?a=2&a=1" + assert str(URL("test").extend_query(a=1)) == "test?a=1" + + url = URL("http://example.com/?foo=bar&baz=original") + expected_url = URL("http://example.com/?foo=bar&baz=original&baz=foo") + + assert url.extend_query({"baz": "foo"}) == expected_url + assert url.extend_query(baz="foo") == expected_url + assert url.extend_query("baz=foo") == expected_url + + +def test_extend_query_with_args_and_kwargs_with_existing(): + url = URL("http://example.com/?a=original") + + with pytest.raises(ValueError): + url.extend_query("a", foo="bar") + + +def test_extend_query_with_non_ascii(): + url = URL("http://example.com/?foo=bar&baz=foo") + expected = URL("http://example.com/?foo=bar&baz=foo&%F0%9D%95%A6=%F0%9D%95%A6") + assert url.extend_query({"𝕦": "𝕦"}) == expected + + +def test_extend_query_with_non_ascii_as_str(): + url = URL("http://example.com/?foo=bar&baz=foo&") + expected = URL("http://example.com/?foo=bar&baz=foo&%F0%9D%95%A6=%F0%9D%95%A6") + assert url.extend_query("𝕦=𝕦") == expected + + +def test_extend_query_with_non_ascii_same_key(): + url = URL("http://example.com/?foo=bar&baz=foo&%F0%9D%95%A6=%F0%9D%95%A6") + expected = URL( + "http://example.com/?foo=bar&baz=foo" + "&%F0%9D%95%A6=%F0%9D%95%A6&%F0%9D%95%A6=%F0%9D%95%A6" + ) + assert url.extend_query({"𝕦": "𝕦"}) == expected diff --git a/contrib/python/yarl/tests/test_url.py b/contrib/python/yarl/tests/test_url.py index 59d543754d3..1dd98197fe1 100644 --- a/contrib/python/yarl/tests/test_url.py +++ b/contrib/python/yarl/tests/test_url.py @@ -1,5 +1,5 @@ from enum import Enum -from urllib.parse import SplitResult +from urllib.parse import SplitResult, quote, unquote import pytest @@ -9,7 +9,7 @@ from yarl import URL def test_inheritance(): with pytest.raises(TypeError) as ctx: - class MyURL(URL): # type: ignore[misc] + class MyURL(URL): pass assert ( @@ -114,11 +114,13 @@ def test_scheme(): def test_raw_user(): url = URL("http://[email protected]") assert "user" == url.raw_user + assert url.raw_user == url._val.username def test_raw_user_non_ascii(): url = URL("http://бажан@example.com") assert "%D0%B1%D0%B0%D0%B6%D0%B0%D0%BD" == url.raw_user + assert url.raw_user == url._val.username def test_no_user(): @@ -134,11 +136,13 @@ def test_user_non_ascii(): def test_raw_password(): url = URL("http://user:[email protected]") assert "password" == url.raw_password + assert url.raw_password == url._val.password def test_raw_password_non_ascii(): url = URL("http://user:пароль@example.com") assert "%D0%BF%D0%B0%D1%80%D0%BE%D0%BB%D1%8C" == url.raw_password + assert url.raw_password == url._val.password def test_password_non_ascii(): @@ -152,6 +156,14 @@ def test_password_without_user(): assert "password" == url.password +def test_empty_password_without_user(): + url = URL("http://:@example.com") + assert url.user is None + assert url.password == "" + assert url.raw_password == "" + assert url.raw_password == url._val.password + + def test_user_empty_password(): url = URL("http://user:@example.com") assert "user" == url.user @@ -161,11 +173,13 @@ def test_user_empty_password(): def test_raw_host(): url = URL("http://example.com") assert "example.com" == url.raw_host + assert url.raw_host == url._val.hostname def test_raw_host_non_ascii(): url = URL("http://оун-упа.укр") assert "xn----8sb1bdhvc.xn--j1amh" == url.raw_host + assert url.raw_host == url._val.hostname def test_host_non_ascii(): @@ -186,16 +200,19 @@ def test_host_with_underscore(): def test_raw_host_when_port_is_specified(): url = URL("http://example.com:8888") assert "example.com" == url.raw_host + assert url.raw_host == url._val.hostname def test_raw_host_from_str_with_ipv4(): url = URL("http://127.0.0.1:80") assert url.raw_host == "127.0.0.1" + assert url.raw_host == url._val.hostname def test_raw_host_from_str_with_ipv6(): url = URL("http://[::1]:80") assert url.raw_host == "::1" + assert url.raw_host == url._val.hostname def test_authority_full() -> None: @@ -219,15 +236,23 @@ def test_authority_full_nonasci() -> None: assert url.authority == "степан:пароль@слава.укр:8080" +def test_authority_unknown_scheme() -> None: + v = "scheme://user:[email protected]:43/path/to?a=1&b=2" + url = URL(v) + assert str(url) == v + + def test_lowercase(): url = URL("http://gitHUB.com") assert url.raw_host == "github.com" assert url.host == url.raw_host + assert url.raw_host == url._val.hostname def test_lowercase_nonascii(): url = URL("http://Слава.Укр") assert url.raw_host == "xn--80aaf8a3a.xn--j1amh" + assert url.raw_host == url._val.hostname assert url.host == "слава.укр" @@ -235,6 +260,7 @@ def test_compressed_ipv6(): url = URL("http://[1DEC:0:0:0::1]") assert url.raw_host == "1dec::1" assert url.host == url.raw_host + assert url.raw_host == url._val.hostname def test_ipv4_zone(): @@ -242,16 +268,19 @@ def test_ipv4_zone(): url = URL("http://1.2.3.4%тест%42:123") assert url.raw_host == "1.2.3.4%тест%42" assert url.host == url.raw_host + assert url.raw_host == url._val.hostname def test_port_for_explicit_port(): url = URL("http://example.com:8888") assert 8888 == url.port + assert url.explicit_port == url._val.port def test_port_for_implicit_port(): url = URL("http://example.com") assert 80 == url.port + assert url.explicit_port == url._val.port def test_port_for_relative_url(): @@ -267,21 +296,25 @@ def test_port_for_unknown_scheme(): def test_explicit_port_for_explicit_port(): url = URL("http://example.com:8888") assert 8888 == url.explicit_port + assert url.explicit_port == url._val.port def test_explicit_port_for_implicit_port(): url = URL("http://example.com") assert url.explicit_port is None + assert url.explicit_port == url._val.port def test_explicit_port_for_relative_url(): url = URL("/path/to") assert url.explicit_port is None + assert url.explicit_port == url._val.port def test_explicit_port_for_unknown_scheme(): url = URL("unknown://example.com") assert url.explicit_port is None + assert url.explicit_port == url._val.port def test_raw_path_string_empty(): @@ -312,6 +345,49 @@ def test_path_with_spaces(): assert "/a b" == url.path +def test_path_with_2F(): + """Path should decode %2F.""" + + url = URL("http://example.com/foo/bar%2fbaz") + assert url.path == "/foo/bar/baz" + + +def test_path_safe_with_2F(): + """Path safe should not decode %2F, otherwise it may look like a path separator.""" + + url = URL("http://example.com/foo/bar%2fbaz") + assert url.path_safe == "/foo/bar%2Fbaz" + + +def test_path_safe_with_25(): + """Path safe should not decode %25, otherwise it is prone to double unquoting.""" + + url = URL("http://example.com/foo/bar%252Fbaz") + assert url.path_safe == "/foo/bar%252Fbaz" + unquoted = url.path_safe.replace("%2F", "/").replace("%25", "%") + assert unquoted == "/foo/bar%2Fbaz" + + + "original_path", + [ + "m+@bar/baz", + "m%2B@bar/baz", + "m%252B@bar/baz", + "m%2F@bar/baz", + ], +) +def test_path_safe_only_round_trips(original_path: str) -> None: + """Path safe can round trip with documented decode method.""" + encoded_once = quote(original_path, safe="") + encoded_twice = quote(encoded_once, safe="") + + url = URL(f"http://example.com/{encoded_twice}") + unquoted = url.path_safe.replace("%2F", "/").replace("%25", "%") + assert unquoted == f"/{encoded_once}" + assert unquote(unquoted) == f"/{original_path}" + + def test_raw_path_for_empty_url(): url = URL() assert "" == url.raw_path @@ -800,6 +876,9 @@ def test_div_with_dots(): "/path/", ("to",), "http://example.com/path/to", id="path-with-slash" ), pytest.param( + "/path", ("",), "http://example.com/path/", id="path-add-trailing-slash" + ), + pytest.param( "/path?a=1#frag", ("to",), "http://example.com/path/to", @@ -807,6 +886,15 @@ def test_div_with_dots(): ), pytest.param("", ("path/",), "http://example.com/path/", id="trailing-slash"), pytest.param( + "", + ( + "path", + "", + ), + "http://example.com/path/", + id="trailing-slash-empty-string", + ), + pytest.param( "", ("path/", "to/"), "http://example.com/path/to/", id="duplicate-slash" ), pytest.param("", (), "http://example.com", id="empty-segments"), @@ -828,6 +916,39 @@ def test_joinpath(base, to_join, expected): @pytest.mark.parametrize( + "base,to_join,expected", + [ + pytest.param("path", "a", "path/a", id="default_default"), + pytest.param("path", "./a", "path/a", id="default_relative"), + pytest.param("path/", "a", "path/a", id="empty-segment_default"), + pytest.param("path/", "./a", "path/a", id="empty-segment_relative"), + pytest.param("path", ".//a", "path//a", id="default_empty-segment"), + pytest.param("path/", ".//a", "path//a", id="empty-segment_empty_segment"), + pytest.param("path//", "a", "path//a", id="empty-segments_default"), + pytest.param("path//", "./a", "path//a", id="empty-segments_relative"), + pytest.param("path//", ".//a", "path///a", id="empty-segments_empty-segment"), + pytest.param("path", "a/", "path/a/", id="default_trailing-empty-segment"), + pytest.param("path", "a//", "path/a//", id="default_trailing-empty-segments"), + pytest.param("path", "a//b", "path/a//b", id="default_embedded-empty-segment"), + ], +) +def test_joinpath_empty_segments(base, to_join, expected): + url = URL(f"http://example.com/{base}") + assert ( + f"http://example.com/{expected}" == str(url.joinpath(to_join)) + and str(url / to_join) == f"http://example.com/{expected}" + ) + + +def test_joinpath_single_empty_segments(): + """joining standalone empty segments does not create empty segments""" + a = URL("/1//2///3") + assert a.parts == ("/", "1", "", "2", "", "", "3") + b = URL("scheme://host").joinpath(*a.parts[1:]) + assert b.path == "/1/2/3" + + "url,to_join,expected", [ pytest.param(URL(), ("a",), ("a",), id="empty-url"), @@ -908,6 +1029,35 @@ def test_joinpath_path_starting_from_slash_is_forbidden(): assert url.joinpath("/to/others") +PATHS = [ + # No dots + ("", ""), + ("path", "path"), + # Single-dot + ("path/to", "path/to"), + ("././path/to", "path/to"), + ("path/./to", "path/to"), + ("path/././to", "path/to"), + ("path/to/.", "path/to/"), + ("path/to/./.", "path/to/"), + # Double-dots + ("../path/to", "path/to"), + ("path/../to", "to"), + ("path/../../to", "to"), + # Non-ASCII characters + ("μονοπάτι/../../να/ᴜɴɪ/ᴄᴏᴅᴇ", "να/ᴜɴɪ/ᴄᴏᴅᴇ"), + ("μονοπάτι/../../να/𝕦𝕟𝕚/𝕔𝕠𝕕𝕖/.", "να/𝕦𝕟𝕚/𝕔𝕠𝕕𝕖/"), +] + + [email protected]("original,expected", PATHS) +def test_join_path_normalized(original: str, expected: str) -> None: + """Test that joinpath normalizes paths.""" + base_url = URL("http://example.com") + new_url = base_url.joinpath(original) + assert new_url.path == f"/{expected}" + + # with_path @@ -1249,26 +1399,31 @@ def test_with_suffix_replace(): def test_is_absolute_for_relative_url(): url = URL("/path/to") assert not url.is_absolute() + assert not url.absolute def test_is_absolute_for_absolute_url(): url = URL("http://example.com") assert url.is_absolute() + assert url.absolute def test_is_non_absolute_for_empty_url(): url = URL() assert not url.is_absolute() + assert not url.absolute def test_is_non_absolute_for_empty_url2(): url = URL("") assert not url.is_absolute() + assert not url.absolute def test_is_absolute_path_starting_from_double_slash(): url = URL("//www.python.org") assert url.is_absolute() + assert url.absolute # is_default_port @@ -1284,9 +1439,11 @@ def test_is_default_port_for_absolute_url_without_port(): assert url.is_default_port() def test_is_default_port_for_absolute_url_with_default_port(): url = URL("http://example.com:80") assert url.is_default_port() + assert str(url) == "http://example.com" def test_is_default_port_for_absolute_url_with_nondefault_port(): @@ -1605,6 +1762,89 @@ def test_join_from_rfc_3986_abnormal(url, expected): assert base.join(url) == expected +EMPTY_SEGMENTS = [ + ( + "https://web.archive.org/web/", + "./https://github.com/aio-libs/yarl", + "https://web.archive.org/web/https://github.com/aio-libs/yarl", + ), + ( + "https://web.archive.org/web/https://github.com/", + "aio-libs/yarl", + "https://web.archive.org/web/https://github.com/aio-libs/yarl", + ), +] + + [email protected]("base,url,expected", EMPTY_SEGMENTS) +def test_join_empty_segments(base, url, expected): + base = URL(base) + url = URL(url) + expected = URL(expected) + joined = base.join(url) + assert joined == expected + + +SIMPLE_BASE = "http://a/b/c/d" +URLLIB_URLJOIN = [ + ("", "http://a/b/c/g?y/./x", "http://a/b/c/g?y/./x"), + ("", "http://a/./g", "http://a/./g"), + ("svn://pathtorepo/dir1", "dir2", "svn://pathtorepo/dir2"), + ("svn+ssh://pathtorepo/dir1", "dir2", "svn+ssh://pathtorepo/dir2"), + ("ws://a/b", "g", "ws://a/g"), + ("wss://a/b", "g", "wss://a/g"), + # test for issue22118 duplicate slashes + (SIMPLE_BASE + "/", "foo", SIMPLE_BASE + "/foo"), + # Non-RFC-defined tests, covering variations of base and trailing + # slashes + ("http://a/b/c/d/e/", "../../f/g/", "http://a/b/c/f/g/"), + ("http://a/b/c/d/e", "../../f/g/", "http://a/b/f/g/"), + ("http://a/b/c/d/e/", "/../../f/g/", "http://a/f/g/"), + ("http://a/b/c/d/e", "/../../f/g/", "http://a/f/g/"), + ("http://a/b/c/d/e/", "../../f/g", "http://a/b/c/f/g"), + ("http://a/b/", "../../f/g/", "http://a/f/g/"), + ("a", "b", "b"), + ("http:///", "..", "http:///"), + ("a/", "b", "a/b"), + ("a/b", "c", "a/c"), + ("a/b/", "c", "a/b/c"), + ( + "https://x.org/", + "/?text=Hello+G%C3%BCnter", + "https://x.org/?text=Hello+G%C3%BCnter", + ), + ( + "https://x.org/", + "?text=Hello+G%C3%BCnter", + "https://x.org/?text=Hello+G%C3%BCnter", + ), + ("http://example.com", "http://example.com", "http://example.com"), + ("http://x.org", "https://x.org#fragment", "https://x.org#fragment"), +] + + [email protected]("base,url,expected", URLLIB_URLJOIN) +def test_join_cpython_urljoin(base, url, expected): + # tests from cpython urljoin + base = URL(base) + url = URL(url) + expected = URL(expected) + joined = base.join(url) + assert joined == expected + + +def test_join_preserves_leading_slash(): + """Test that join preserves leading slash in path.""" + base = URL.build(scheme="https", host="localhost", port=443) + new = base.join(URL("") / "_msearch") + assert str(new) == "https://localhost/_msearch" + assert new.path == "/_msearch" + + +def test_empty_authority(): + assert URL("http:///").authority == "" + + def test_split_result_non_decoded(): with pytest.raises(ValueError): URL(SplitResult("http", "example.com", "path", "qs", "frag")) @@ -1700,6 +1940,7 @@ def test_relative_is_relative(): url = URL("http://user:[email protected]:8080/path?a=b#frag") rel = url.relative() assert not rel.is_absolute() + assert not rel.absolute def test_relative_abs_parts_are_removed(): @@ -1731,3 +1972,72 @@ def test_requoting(): u = URL("http://127.0.0.1/?next=http%3A//example.com/") assert u.raw_query_string == "next=http://example.com/" assert str(u) == "http://127.0.0.1/?next=http://example.com/" + + +def test_join_query_string(): + """Test that query strings are correctly joined.""" + original = URL("http://127.0.0.1:62869") + path_url = URL( + "/api?start=2022-03-27T14:05:00%2B03:00&end=2022-03-27T16:05:00%2B03:00" + ) + assert path_url.query.get("start") == "2022-03-27T14:05:00+03:00" + assert path_url.query.get("end") == "2022-03-27T16:05:00+03:00" + new = original.join(path_url) + assert new.query.get("start") == "2022-03-27T14:05:00+03:00" + assert new.query.get("end") == "2022-03-27T16:05:00+03:00" + + +def test_join_query_string_with_special_chars(): + """Test url joining when the query string has non-ascii params.""" + original = URL("http://127.0.0.1") + path_url = URL("/api?text=%D1%82%D0%B5%D0%BA%D1%81%D1%82") + assert path_url.query.get("text") == "текст" + new = original.join(path_url) + assert new.query.get("text") == "текст" + + +def test_join_encoded_url(): + """Test that url encoded urls are correctly joined.""" + original = URL("http://127.0.0.1:62869") + path_url = URL("/api/%34") + assert original.path == "/" + assert path_url.path == "/api/4" + new = original.join(path_url) + assert new.path == "/api/4" + + +# cache + + +def test_parsing_populates_cache(): + """Test that parsing a URL populates the cache.""" + url = URL("http://user:[email protected]:80/path?a=b#frag") + assert url._cache["raw_user"] == "user" + assert url._cache["raw_password"] == "password" + assert url._cache["raw_host"] == "example.com" + assert url._cache["explicit_port"] == 80 + assert url._cache["raw_query_string"] == "a=b" + assert url._cache["raw_fragment"] == "frag" + assert url._cache["scheme"] == "http" + assert url.raw_user == "user" + assert url.raw_password == "password" + assert url.raw_host == "example.com" + assert url.explicit_port == 80 + assert url.raw_query_string == "a=b" + assert url.raw_fragment == "frag" + assert url.scheme == "http" + url._cache.clear() + assert url.raw_user == "user" + assert url.raw_password == "password" + assert url.raw_host == "example.com" + assert url.explicit_port == 80 + assert url.raw_query_string == "a=b" + assert url.raw_fragment == "frag" + assert url.scheme == "http" + assert url._cache["raw_user"] == "user" + assert url._cache["raw_password"] == "password" + assert url._cache["raw_host"] == "example.com" + assert url._cache["explicit_port"] == 80 + assert url._cache["raw_query_string"] == "a=b" + assert url._cache["raw_fragment"] == "frag" + assert url._cache["scheme"] == "http" diff --git a/contrib/python/yarl/tests/test_url_build.py b/contrib/python/yarl/tests/test_url_build.py index 5aecbc58545..a0ab77b2d4e 100644 --- a/contrib/python/yarl/tests/test_url_build.py +++ b/contrib/python/yarl/tests/test_url_build.py @@ -15,6 +15,24 @@ def test_build_simple(): assert str(u) == "http://127.0.0.1" +def test_url_build_ipv6(): + u = URL.build(scheme="http", host="::1") + assert str(u) == "http://::1" + + +def test_url_build_ipv6_brackets(): + u = URL.build(scheme="http", host="[::1]") + assert str(u) == "http://::1" + + +def test_url_ipv4_in_ipv6(): + u = URL.build(scheme="http", host="2001:db8:122:344::192.0.2.33") + assert str(u) == "http://2001:db8:122:344::c000:221" + + def test_build_with_scheme(): u = URL.build(scheme="blob", path="path") assert str(u) == "blob:path" diff --git a/contrib/python/yarl/tests/test_url_parsing.py b/contrib/python/yarl/tests/test_url_parsing.py index 11aa8e92a48..4fe95185e71 100644 --- a/contrib/python/yarl/tests/test_url_parsing.py +++ b/contrib/python/yarl/tests/test_url_parsing.py @@ -41,11 +41,7 @@ class TestScheme: def test_no_scheme1(self): u = URL("google.com:80") # See: https://bugs.python.org/issue27657 - if ( - sys.version_info[:3] == (3, 7, 6) - or sys.version_info[:3] == (3, 8, 1) - or sys.version_info >= (3, 9, 0) - ): + if sys.version_info[:3] == (3, 8, 1) or sys.version_info >= (3, 9, 0): assert u.scheme == "google.com" assert u.host is None assert u.path == "80" @@ -214,19 +210,14 @@ class TestPort: assert u.query_string == "" assert u.fragment == "" - @pytest.mark.xfail( - # FIXME: remove "no cover" pragmas upon xfail marker deletion - reason="https://github.com/aio-libs/yarl/issues/821", - raises=ValueError, - ) def test_no_host(self): - u = URL("//:80") - assert u.scheme == "" # pragma: no cover - assert u.host == "" # pragma: no cover - assert u.port == 80 # pragma: no cover - assert u.path == "/" # pragma: no cover - assert u.query_string == "" # pragma: no cover - assert u.fragment == "" # pragma: no cover + u = URL("//:77") + assert u.scheme == "" + assert u.host == "" + assert u.port == 77 + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" def test_double_port(self): with pytest.raises(ValueError): @@ -461,9 +452,19 @@ class TestFragment: class TestStripEmptyParts: - def test_all_empty(self): + def test_all_empty_http(self): with pytest.raises(ValueError): - URL("//@:?#") + URL("http://@:?#") + + def test_all_empty(self): + u = URL("//@:?#") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host == "" + assert u.path == "" + assert u.query_string == "" + assert u.fragment == "" def test_path_only(self): u = URL("///path") @@ -584,3 +585,22 @@ class TestStripEmptyParts: assert u.path == "" assert u.query_string == "" assert u.fragment == "" + + + ("scheme"), + [ + ("http"), + ("https"), + ("ws"), + ("wss"), + ("ftp"), + ], +) +def test_schemes_that_require_host(scheme: str) -> None: + """Verify that schemes that require a host raise with empty host.""" + expect = ( + "Invalid URL: host is required for " f"absolute urls with the {scheme} scheme" + ) + with pytest.raises(ValueError, match=expect): + URL(f"{scheme}://:1") diff --git a/contrib/python/yarl/tests/test_url_query.py b/contrib/python/yarl/tests/test_url_query.py index bcd2433cbcc..cf4959e4faf 100644 --- a/contrib/python/yarl/tests/test_url_query.py +++ b/contrib/python/yarl/tests/test_url_query.py @@ -1,4 +1,4 @@ -from typing import List, Tuple +from typing import List, Sequence, Tuple from urllib.parse import parse_qs, urlencode import pytest @@ -88,7 +88,7 @@ def test_query_dont_unqoute_twice(): _SEMICOLON_XFAIL = pytest.mark.xfail( condition="separator" not in parse_qs.__code__.co_varnames, reason=( - "Python versions < 3.7.10, < 3.8.8 and < 3.9.2 lack a fix for " + "Python versions < 3.8.8 and < 3.9.2 lack a fix for " 'CVE-2021-23336 dropping ";" as a valid query parameter separator, ' "making this test fail." ), @@ -134,10 +134,10 @@ def test_query_separators_from_parsing( URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES_W_XFAIL, ) def test_query_separators_from_update_query( - original_url, - expected_query_len, - expected_value_a, -): + original_url: URL, + expected_query_len: int, + expected_value_a: str, +) -> None: new_url = original_url.update_query({"c": expected_value_a}) assert new_url.query["a"] == expected_value_a assert new_url.query["c"] == expected_value_a @@ -148,10 +148,10 @@ def test_query_separators_from_update_query( URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES, ) def test_query_separators_from_with_query( - original_url, - expected_query_len, - expected_value_a, -): + original_url: URL, + expected_query_len: int, + expected_value_a: int, +) -> None: new_url = original_url.with_query({"c": expected_value_a}) assert new_url.query["c"] == expected_value_a @@ -161,13 +161,51 @@ def test_query_separators_from_with_query( URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES, ) def test_query_from_empty_update_query( - original_url, - expected_query_len, - expected_value_a, -): + original_url: URL, + expected_query_len: int, + expected_value_a: str, +) -> None: new_url = original_url.update_query({}) assert new_url.query["a"] == original_url.query["a"] if "b" in original_url.query: assert new_url.query["b"] == original_url.query["b"] + + + ("original_query_string", "keys_to_drop", "expected_query_string"), + [ + ("a=10&b=M%C3%B9a+xu%C3%A2n&u%E1%BB%91ng=cafe", ["a"], "b=Mùa xuân&uống=cafe"), + ("a=10&b=M%C3%B9a+xu%C3%A2n", ["b"], "a=10"), + ("a=10&b=M%C3%B9a+xu%C3%A2n&c=30", ["b"], "a=10&c=30"), + ( + "a=10&b=M%C3%B9a+xu%C3%A2n&u%E1%BB%91ng=cafe", + ["uống"], + "a=10&b=Mùa xuân", + ), + ("a=10&b=M%C3%B9a+xu%C3%A2n", ["a", "b"], ""), + ], +) +def test_without_query_params( + original_query_string: str, keys_to_drop: Sequence[str], expected_query_string: str +) -> None: + url = URL(f"http://example.com?{original_query_string}") + new_url = url.without_query_params(*keys_to_drop) + assert new_url.query_string == expected_query_string + assert new_url is not url + + + ("original_query_string", "keys_to_drop"), + [ + ("a=10&b=M%C3%B9a+xu%C3%A2n&c=30", ["invalid_key"]), + ("a=10&b=M%C3%B9a+xu%C3%A2n", []), + ], +) +def test_skip_dropping_query_params( + original_query_string: str, keys_to_drop: Sequence[str] +) -> None: + url = URL(f"http://example.com?{original_query_string}") + new_url = url.without_query_params(*keys_to_drop) + assert new_url is url diff --git a/contrib/python/yarl/tests/test_url_update_netloc.py b/contrib/python/yarl/tests/test_url_update_netloc.py index 47d13bcd601..cfe6ca7d457 100644 --- a/contrib/python/yarl/tests/test_url_update_netloc.py +++ b/contrib/python/yarl/tests/test_url_update_netloc.py @@ -16,8 +16,13 @@ def test_with_scheme_uppercased(): def test_with_scheme_for_relative_url(): - with pytest.raises(ValueError): - URL("path/to").with_scheme("http") + """Test scheme can be set for relative URL.""" + msg = "scheme replacement is not allowed for " "relative URLs for the http scheme" + with pytest.raises(ValueError, match=msg): + assert URL("path/to").with_scheme("http") + + expected = URL("file:///absolute/path") + assert expected.with_scheme("file") == expected def test_with_scheme_invalid_type(): @@ -191,6 +196,32 @@ def test_with_port(): assert str(url.with_port(8888)) == "http://example.com:8888" +def test_with_default_port_normalization() -> None: + url = URL("http://example.com") + assert str(url.with_scheme("https")) == "https://example.com" + assert str(url.with_scheme("https").with_port(443)) == "https://example.com" + assert str(url.with_port(443).with_scheme("https")) == "https://example.com" + + +def test_with_custom_port_normalization() -> None: + url = URL("http://example.com") + u88 = url.with_port(88) + assert str(u88) == "http://example.com:88" + assert str(u88.with_port(80)) == "http://example.com" + assert str(u88.with_scheme("https")) == "https://example.com:88" + + +def test_with_explicit_port_normalization() -> None: + url = URL("http://example.com") + u80 = url.with_port(80) + assert str(u80) == "http://example.com" + assert str(u80.with_port(81)) == "http://example.com:81" + assert str(u80.with_scheme("https")) == "https://example.com:80" + + def test_with_port_with_no_port(): url = URL("http://example.com") assert str(url.with_port(None)) == "http://example.com" @@ -198,7 +229,7 @@ def test_with_port_with_no_port(): def test_with_port_ipv6(): url = URL("http://[::1]:8080/") - assert str(url.with_port(80)) == "http://[::1]:80/" + assert str(url.with_port(81)) == "http://[::1]:81/" def test_with_port_keeps_query_and_fragment(): diff --git a/contrib/python/yarl/ya.make b/contrib/python/yarl/ya.make index e1a242b8117..88efe860889 100644 --- a/contrib/python/yarl/ya.make +++ b/contrib/python/yarl/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(1.9.4) +VERSION(1.12.1) LICENSE(Apache-2.0) @@ -18,13 +18,17 @@ NO_LINT() PY_SRCS( TOP_LEVEL yarl/__init__.py - yarl/__init__.pyi + yarl/_helpers.py + yarl/_helpers_c.pyi + yarl/_helpers_py.py yarl/_quoting.py yarl/_quoting_c.pyi yarl/_quoting_py.py yarl/_url.py CYTHON_C yarl/_quoting_c.pyx + CYTHON_CPP + yarl/_helpers_c.pyx ) RESOURCE_FILES( diff --git a/contrib/python/yarl/yarl/__init__.py b/contrib/python/yarl/yarl/__init__.py index 127721ad096..50d24c0e778 100644 --- a/contrib/python/yarl/yarl/__init__.py +++ b/contrib/python/yarl/yarl/__init__.py @@ -1,5 +1,21 @@ -from ._url import URL, cache_clear, cache_configure, cache_info +from ._url import ( + URL, + Query, + QueryVariable, + SimpleQuery, + cache_clear, + cache_configure, + cache_info, +) -__version__ = "1.9.4" +__version__ = "1.12.1" -__all__ = ("URL", "cache_clear", "cache_configure", "cache_info") +__all__ = ( + "URL", + "SimpleQuery", + "QueryVariable", + "Query", + "cache_clear", + "cache_configure", + "cache_info", +) diff --git a/contrib/python/yarl/yarl/__init__.pyi b/contrib/python/yarl/yarl/__init__.pyi deleted file mode 100644 index 5fd4bd0d122..00000000000 --- a/contrib/python/yarl/yarl/__init__.pyi +++ /dev/null @@ -1,121 +0,0 @@ -import sys -from functools import _CacheInfo -from typing import Any, Mapping, Optional, Sequence, Tuple, Type, Union, overload - -import multidict - -if sys.version_info >= (3, 8): - from typing import Final, TypedDict, final -else: - from typing_extensions import Final, TypedDict, final - -_SimpleQuery = Union[str, int, float] -_QueryVariable = Union[_SimpleQuery, Sequence[_SimpleQuery]] -_Query = Union[ - None, str, Mapping[str, _QueryVariable], Sequence[Tuple[str, _QueryVariable]] -] - -@final -class URL: - scheme: Final[str] - raw_user: Final[str] - user: Final[Optional[str]] - raw_password: Final[Optional[str]] - password: Final[Optional[str]] - raw_host: Final[Optional[str]] - host: Final[Optional[str]] - port: Final[Optional[int]] - explicit_port: Final[Optional[int]] - raw_authority: Final[str] - authority: Final[str] - raw_path: Final[str] - path: Final[str] - raw_query_string: Final[str] - query_string: Final[str] - path_qs: Final[str] - raw_path_qs: Final[str] - raw_fragment: Final[str] - fragment: Final[str] - query: Final[multidict.MultiDict[str]] - raw_name: Final[str] - name: Final[str] - raw_suffix: Final[str] - suffix: Final[str] - raw_suffixes: Final[Tuple[str, ...]] - suffixes: Final[Tuple[str, ...]] - raw_parts: Final[Tuple[str, ...]] - parts: Final[Tuple[str, ...]] - parent: Final[URL] - def __init__( - self, val: Union[str, "URL"] = ..., *, encoded: bool = ... - ) -> None: ... - @classmethod - def build( - cls, - *, - scheme: str = ..., - authority: str = ..., - user: Optional[str] = ..., - password: Optional[str] = ..., - host: str = ..., - port: Optional[int] = ..., - path: str = ..., - query: Optional[_Query] = ..., - query_string: str = ..., - fragment: str = ..., - encoded: bool = ... - ) -> URL: ... - def __str__(self) -> str: ... - def __repr__(self) -> str: ... - def __bytes__(self) -> bytes: ... - def __eq__(self, other: Any) -> bool: ... - def __le__(self, other: Any) -> bool: ... - def __lt__(self, other: Any) -> bool: ... - def __ge__(self, other: Any) -> bool: ... - def __gt__(self, other: Any) -> bool: ... - def __hash__(self) -> int: ... - def __truediv__(self, name: str) -> URL: ... - def __mod__(self, query: _Query) -> URL: ... - def is_absolute(self) -> bool: ... - def is_default_port(self) -> bool: ... - def origin(self) -> URL: ... - def relative(self) -> URL: ... - def with_scheme(self, scheme: str) -> URL: ... - def with_user(self, user: Optional[str]) -> URL: ... - def with_password(self, password: Optional[str]) -> URL: ... - def with_host(self, host: str) -> URL: ... - def with_port(self, port: Optional[int]) -> URL: ... - def with_path(self, path: str, *, encoded: bool = ...) -> URL: ... - @overload - def with_query(self, query: _Query) -> URL: ... - @overload - def with_query(self, **kwargs: _QueryVariable) -> URL: ... - @overload - def update_query(self, query: _Query) -> URL: ... - @overload - def update_query(self, **kwargs: _QueryVariable) -> URL: ... - def with_fragment(self, fragment: Optional[str]) -> URL: ... - def with_name(self, name: str) -> URL: ... - def with_suffix(self, suffix: str) -> URL: ... - def join(self, url: URL) -> URL: ... - def joinpath(self, *url: str, encoded: bool = ...) -> URL: ... - def human_repr(self) -> str: ... - # private API - @classmethod - def _normalize_path(cls, path: str) -> str: ... - -@final -class cached_property: - def __init__(self, wrapped: Any) -> None: ... - def __get__(self, inst: URL, owner: Type[URL]) -> Any: ... - def __set__(self, inst: URL, value: Any) -> None: ... - -class CacheInfo(TypedDict): - idna_encode: _CacheInfo - idna_decode: _CacheInfo - -def cache_clear() -> None: ... -def cache_info() -> CacheInfo: ... -def cache_configure( - *, idna_encode_size: Optional[int] = ..., idna_decode_size: Optional[int] = ... -) -> None: ... diff --git a/contrib/python/yarl/yarl/_helpers.py b/contrib/python/yarl/yarl/_helpers.py new file mode 100644 index 00000000000..ac01158c7f5 --- /dev/null +++ b/contrib/python/yarl/yarl/_helpers.py @@ -0,0 +1,31 @@ +import os +import sys +from typing import TYPE_CHECKING + +__all__ = ("cached_property",) + + +NO_EXTENSIONS = bool(os.environ.get("YARL_NO_EXTENSIONS")) # type: bool +if sys.implementation.name != "cpython": + NO_EXTENSIONS = True + + +# isort: off +if TYPE_CHECKING: + from ._helpers_py import cached_property as cached_property_py + + cached_property = cached_property_py +elif not NO_EXTENSIONS: # pragma: no branch + try: + from ._helpers_c import cached_property as cached_property_c # type: ignore[attr-defined, unused-ignore] # noqa: E501 + + cached_property = cached_property_c + except ImportError: # pragma: no cover + from ._helpers_py import cached_property as cached_property_py + + cached_property = cached_property_py # type: ignore[assignment, misc] +else: + from ._helpers_py import cached_property as cached_property_py + + cached_property = cached_property_py # type: ignore[assignment, misc] +# isort: on diff --git a/contrib/python/yarl/yarl/_helpers_c.pyi b/contrib/python/yarl/yarl/_helpers_c.pyi new file mode 100644 index 00000000000..69034921219 --- /dev/null +++ b/contrib/python/yarl/yarl/_helpers_c.pyi @@ -0,0 +1,6 @@ +from typing import Any + +class cached_property: + def __init__(self, wrapped: Any) -> None: ... + def __get__(self, inst: Any, owner: Any) -> Any: ... + def __set__(self, inst: Any, value: Any) -> None: ... diff --git a/contrib/python/yarl/yarl/_helpers_c.pyx b/contrib/python/yarl/yarl/_helpers_c.pyx new file mode 100644 index 00000000000..e6eec375435 --- /dev/null +++ b/contrib/python/yarl/yarl/_helpers_c.pyx @@ -0,0 +1,36 @@ +# cython: language_level=3 + +cdef _sentinel = object() + +cdef class cached_property: + """Use as a class method decorator. It operates almost exactly like + the Python `@property` decorator, but it puts the result of the + method it decorates into the instance dict after the first call, + effectively replacing the function it decorates with an instance + variable. It is, in Python parlance, a data descriptor. + + """ + + cdef object wrapped + cdef object name + + def __init__(self, wrapped): + self.wrapped = wrapped + self.name = wrapped.__name__ + + @property + def __doc__(self): + return self.wrapped.__doc__ + + def __get__(self, inst, owner): + if inst is None: + return self + cdef dict cache = inst._cache + val = cache.get(self.name, _sentinel) + if val is _sentinel: + val = self.wrapped(inst) + cache[self.name] = val + return val + + def __set__(self, inst, value): + raise AttributeError("cached property is read-only") diff --git a/contrib/python/yarl/yarl/_helpers_py.py b/contrib/python/yarl/yarl/_helpers_py.py new file mode 100644 index 00000000000..5a18afb5c39 --- /dev/null +++ b/contrib/python/yarl/yarl/_helpers_py.py @@ -0,0 +1,41 @@ +"""Various helper functions.""" + +from typing import Any, Callable, Dict, Generic, Optional, Protocol, Type, TypeVar + +_T = TypeVar("_T") + + +class _TSelf(Protocol, Generic[_T]): + _cache: Dict[str, _T] + + +class cached_property(Generic[_T]): + """Use as a class method decorator. + + It operates almost exactly like + the Python `@property` decorator, but it puts the result of the + method it decorates into the instance dict after the first call, + effectively replacing the function it decorates with an instance + variable. It is, in Python parlance, a data descriptor. + """ + + def __init__(self, wrapped: Callable[..., _T]) -> None: + self.wrapped = wrapped + self.__doc__ = wrapped.__doc__ + self.name = wrapped.__name__ + + def __get__(self, inst: _TSelf[_T], owner: Optional[Type[Any]] = None) -> _T: + try: + try: + return inst._cache[self.name] + except KeyError: + val = self.wrapped(inst) + inst._cache[self.name] = val + return val + except AttributeError: + if inst is None: + return self + raise + + def __set__(self, inst: _TSelf[_T], value: _T) -> None: + raise AttributeError("cached property is read-only") diff --git a/contrib/python/yarl/yarl/_quoting.py b/contrib/python/yarl/yarl/_quoting.py index 8d1c705ff25..95e86095d1d 100644 --- a/contrib/python/yarl/yarl/_quoting.py +++ b/contrib/python/yarl/yarl/_quoting.py @@ -11,7 +11,7 @@ if sys.implementation.name != "cpython": if not NO_EXTENSIONS: # pragma: no branch try: - from ._quoting_c import _Quoter, _Unquoter # type: ignore[assignment] + from ._quoting_c import _Quoter, _Unquoter except ImportError: # pragma: no cover from ._quoting_py import _Quoter, _Unquoter # type: ignore[assignment] else: diff --git a/contrib/python/yarl/yarl/_quoting_c.pyi b/contrib/python/yarl/yarl/_quoting_c.pyi index 1c8fc24ec7e..1dc1a5b3c2d 100644 --- a/contrib/python/yarl/yarl/_quoting_c.pyi +++ b/contrib/python/yarl/yarl/_quoting_c.pyi @@ -9,8 +9,10 @@ class _Quoter: qs: bool = ..., requote: bool = ... ) -> None: ... - def __call__(self, val: Optional[str] = ...) -> Optional[str]: ... + def __call__(self, val: str = ...) -> str: ... class _Unquoter: - def __init__(self, *, unsafe: str = ..., qs: bool = ...) -> None: ... - def __call__(self, val: Optional[str] = ...) -> Optional[str]: ... + def __init__( + self, *, ignore: str = ..., unsafe: str = ..., qs: bool = ... + ) -> None: ... + def __call__(self, val: str = ...) -> str: ... diff --git a/contrib/python/yarl/yarl/_quoting_c.pyx b/contrib/python/yarl/yarl/_quoting_c.pyx index 96f69c14e2b..6ac44fdf3a9 100644 --- a/contrib/python/yarl/yarl/_quoting_c.pyx +++ b/contrib/python/yarl/yarl/_quoting_c.pyx @@ -269,12 +269,14 @@ cdef class _Quoter: cdef class _Unquoter: + cdef str _ignore cdef str _unsafe cdef bint _qs cdef _Quoter _quoter cdef _Quoter _qs_quoter - def __init__(self, *, unsafe='', qs=False): + def __init__(self, *, ignore="", unsafe="", qs=False): + self._ignore = ignore self._unsafe = unsafe self._qs = qs self._quoter = _Quoter() @@ -336,7 +338,7 @@ cdef class _Unquoter: buflen = 0 if self._qs and unquoted in '+=&;': ret.append(self._qs_quoter(unquoted)) - elif unquoted in self._unsafe: + elif unquoted in self._unsafe or unquoted in self._ignore: ret.append(self._quoter(unquoted)) else: ret.append(unquoted) diff --git a/contrib/python/yarl/yarl/_quoting_py.py b/contrib/python/yarl/yarl/_quoting_py.py index 585a1da8040..e5b1d3a3bd1 100644 --- a/contrib/python/yarl/yarl/_quoting_py.py +++ b/contrib/python/yarl/yarl/_quoting_py.py @@ -1,7 +1,7 @@ import codecs import re from string import ascii_letters, ascii_lowercase, digits -from typing import Optional, cast +from typing import cast BASCII_LOWERCASE = ascii_lowercase.encode("ascii") BPCT_ALLOWED = {f"%{i:02X}".encode("ascii") for i in range(256)} @@ -33,14 +33,14 @@ class _Quoter: self._qs = qs self._requote = requote - def __call__(self, val: Optional[str]) -> Optional[str]: + def __call__(self, val: str) -> str: if val is None: return None if not isinstance(val, str): raise TypeError("Argument should be str") if not val: return "" - bval = cast(str, val).encode("utf8", errors="ignore") + bval = val.encode("utf8", errors="ignore") ret = bytearray() pct = bytearray() safe = self._safe @@ -116,13 +116,14 @@ class _Quoter: class _Unquoter: - def __init__(self, *, unsafe: str = "", qs: bool = False) -> None: + def __init__(self, *, ignore: str = "", unsafe: str = "", qs: bool = False) -> None: + self._ignore = ignore self._unsafe = unsafe self._qs = qs self._quoter = _Quoter() self._qs_quoter = _Quoter(qs=True) - def __call__(self, val: Optional[str]) -> Optional[str]: + def __call__(self, val: str) -> str: if val is None: return None if not isinstance(val, str): @@ -158,7 +159,7 @@ class _Unquoter: if to_add is None: # pragma: no cover raise RuntimeError("Cannot quote None") ret.append(to_add) - elif unquoted in self._unsafe: + elif unquoted in self._unsafe or unquoted in self._ignore: to_add = self._quoter(unquoted) if to_add is None: # pragma: no cover raise RuntimeError("Cannot quote None") diff --git a/contrib/python/yarl/yarl/_url.py b/contrib/python/yarl/yarl/_url.py index 9cca27ef86c..8a48628fcd9 100644 --- a/contrib/python/yarl/yarl/_url.py +++ b/contrib/python/yarl/yarl/_url.py @@ -1,61 +1,126 @@ -import functools import math +import sys import warnings from collections.abc import Mapping, Sequence from contextlib import suppress +from functools import _CacheInfo, lru_cache from ipaddress import ip_address -from urllib.parse import SplitResult, parse_qsl, quote, urljoin, urlsplit, urlunsplit +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Iterable, + Iterator, + List, + SupportsInt, + Tuple, + TypedDict, + TypeVar, + Union, + overload, +) +from urllib.parse import ( + SplitResult, + parse_qsl, + quote, + urlsplit, + urlunsplit, + uses_netloc, + uses_relative, +) import idna -from multidict import MultiDict, MultiDictProxy +from multidict import MultiDict, MultiDictProxy, istr +from ._helpers import cached_property from ._quoting import _Quoter, _Unquoter -DEFAULT_PORTS = {"http": 80, "https": 443, "ws": 80, "wss": 443} +DEFAULT_PORTS = {"http": 80, "https": 443, "ws": 80, "wss": 443, "ftp": 21} +USES_AUTHORITY = frozenset(uses_netloc) +USES_RELATIVE = frozenset(uses_relative) + +# Special schemes https://url.spec.whatwg.org/#special-scheme +# are not allowed to have an empty host https://url.spec.whatwg.org/#url-representation +SCHEME_REQUIRES_HOST = frozenset(("http", "https", "ws", "wss", "ftp")) sentinel = object() +SimpleQuery = Union[str, int, float] +QueryVariable = Union[SimpleQuery, "Sequence[SimpleQuery]"] +Query = Union[ + None, str, "Mapping[str, QueryVariable]", "Sequence[Tuple[str, QueryVariable]]" +] +_T = TypeVar("_T") -def rewrite_module(obj: object) -> object: - obj.__module__ = "yarl" - return obj +if sys.version_info >= (3, 11): + from typing import Self +else: + Self = Any -class cached_property: - """Use as a class method decorator. It operates almost exactly like - the Python `@property` decorator, but it puts the result of the - method it decorates into the instance dict after the first call, - effectively replacing the function it decorates with an instance - variable. It is, in Python parlance, a data descriptor. +class CacheInfo(TypedDict): + """Host encoding cache.""" - """ + idna_encode: _CacheInfo + idna_decode: _CacheInfo + ip_address: _CacheInfo - def __init__(self, wrapped): - self.wrapped = wrapped - try: - self.__doc__ = wrapped.__doc__ - except AttributeError: # pragma: no cover - self.__doc__ = "" - self.name = wrapped.__name__ - def __get__(self, inst, owner, _sentinel=sentinel): - if inst is None: - return self - val = inst._cache.get(self.name, _sentinel) - if val is not _sentinel: - return val - val = self.wrapped(inst) - inst._cache[self.name] = val - return val +class _SplitResultDict(TypedDict, total=False): - def __set__(self, inst, value): - raise AttributeError("cached property is read-only") + scheme: str + netloc: str + path: str + query: str + fragment: str -def _normalize_path_segments(segments): +class _InternalURLCache(TypedDict, total=False): + + absolute: bool + scheme: str + raw_authority: str + _default_port: Union[int, None] + _port_not_default: Union[int, None] + authority: str + raw_user: Union[str, None] + user: Union[str, None] + raw_password: Union[str, None] + password: Union[str, None] + raw_host: Union[str, None] + host: Union[str, None] + port: Union[int, None] + explicit_port: Union[int, None] + raw_path: str + path: str + _parsed_query: List[Tuple[str, str]] + query: "MultiDictProxy[str]" + raw_query_string: str + query_string: str + path_qs: str + raw_path_qs: str + raw_fragment: str + fragment: str + raw_parts: Tuple[str, ...] + parts: Tuple[str, ...] + parent: "URL" + raw_name: str + name: str + raw_suffix: str + suffix: str + raw_suffixes: Tuple[str, ...] + suffixes: Tuple[str, ...] + + +def rewrite_module(obj: _T) -> _T: + obj.__module__ = "yarl" + return obj + + +def _normalize_path_segments(segments: "Sequence[str]") -> List[str]: """Drop '.' and '..' from a sequence of str segments""" - resolved_path = [] + resolved_path: List[str] = [] for seg in segments: if seg == "..": @@ -161,9 +226,18 @@ class URL: _UNQUOTER = _Unquoter() _PATH_UNQUOTER = _Unquoter(unsafe="+") + _PATH_SAFE_UNQUOTER = _Unquoter(ignore="/%", unsafe="+") _QS_UNQUOTER = _Unquoter(qs=True) - def __new__(cls, val="", *, encoded=False, strict=None): + _val: SplitResult + + def __new__( + cls, + val: Union[str, SplitResult, "URL"] = "", + *, + encoded: bool = False, + strict: Union[bool, None] = None, + ) -> Self: if strict is not None: # pragma: no cover warnings.warn("strict parameter is ignored") if type(val) is cls: @@ -178,55 +252,75 @@ class URL: else: raise TypeError("Constructor parameter should be str") + cache: _InternalURLCache = {} if not encoded: - if not val[1]: # netloc - netloc = "" + host: Union[str, None] + scheme, netloc, path, query, fragment = val + if not netloc: # netloc host = "" else: - host = val.hostname + username, password, host, port = cls._split_netloc(val[1]) if host is None: - raise ValueError("Invalid URL: host is required for absolute urls") - - try: - port = val.port - except ValueError as e: - raise ValueError( - "Invalid URL: port can't be converted to integer" - ) from e - + if scheme in SCHEME_REQUIRES_HOST: + msg = ( + "Invalid URL: host is required for " + f"absolute urls with the {scheme} scheme" + ) + raise ValueError(msg) + else: + host = "" + host = cls._encode_host(host) + raw_user = None if username is None else cls._REQUOTER(username) + raw_password = None if password is None else cls._REQUOTER(password) netloc = cls._make_netloc( - val.username, val.password, host, port, encode=True, requote=True + raw_user, raw_password, host, port, encode_host=False ) - path = cls._PATH_REQUOTER(val[2]) - if netloc: - path = cls._normalize_path(path) + if "[" in host: + # Our host encoder adds back brackets for IPv6 addresses + # so we need to remove them here to get the raw host + _, _, bracketed = host.partition("[") + raw_host, _, _ = bracketed.partition("]") + else: + raw_host = host + cache["raw_host"] = raw_host + cache["raw_user"] = raw_user + cache["raw_password"] = raw_password + cache["explicit_port"] = port + + if path: + path = cls._PATH_REQUOTER(path) + if netloc: + path = cls._normalize_path(path) cls._validate_authority_uri_abs_path(host=host, path=path) - query = cls._QUERY_REQUOTER(val[3]) - fragment = cls._FRAGMENT_REQUOTER(val[4]) - val = SplitResult(val[0], netloc, path, query, fragment) + query = cls._QUERY_REQUOTER(query) if query else query + fragment = cls._FRAGMENT_REQUOTER(fragment) if fragment else fragment + cache["scheme"] = scheme + cache["raw_query_string"] = query + cache["raw_fragment"] = fragment + val = SplitResult(scheme, netloc, path, query, fragment) self = object.__new__(cls) self._val = val - self._cache = {} + self._cache = cache return self @classmethod def build( cls, *, - scheme="", - authority="", - user=None, - password=None, - host="", - port=None, - path="", - query=None, - query_string="", - fragment="", - encoded=False, - ): + scheme: str = "", + authority: str = "", + user: Union[str, None] = None, + password: Union[str, None] = None, + host: str = "", + port: Union[int, None] = None, + path: str = "", + query: Union[Query, None] = None, + query_string: str = "", + fragment: str = "", + encoded: bool = False, + ) -> "URL": """Creates and returns a new URL""" if authority and (user or password or host or port): @@ -257,23 +351,27 @@ class URL: netloc = authority else: tmp = SplitResult("", authority, "", "", "") + port = None if tmp.port == DEFAULT_PORTS.get(scheme) else tmp.port netloc = cls._make_netloc( - tmp.username, tmp.password, tmp.hostname, tmp.port, encode=True + tmp.username, tmp.password, tmp.hostname, port, encode=True ) elif not user and not password and not host and not port: netloc = "" else: + port = None if port == DEFAULT_PORTS.get(scheme) else port netloc = cls._make_netloc( user, password, host, port, encode=not encoded, encode_host=not encoded ) if not encoded: - path = cls._PATH_QUOTER(path) - if netloc: + path = cls._PATH_QUOTER(path) if path else path + if path and netloc: path = cls._normalize_path(path) cls._validate_authority_uri_abs_path(host=host, path=path) - query_string = cls._QUERY_QUOTER(query_string) - fragment = cls._FRAGMENT_QUOTER(fragment) + query_string = ( + cls._QUERY_QUOTER(query_string) if query_string else query_string + ) + fragment = cls._FRAGMENT_QUOTER(fragment) if fragment else fragment url = cls( SplitResult(scheme, netloc, path, query_string, fragment), encoded=True @@ -281,73 +379,84 @@ class URL: if query: return url.with_query(query) - else: - return url + return url def __init_subclass__(cls): raise TypeError(f"Inheriting a class {cls!r} from URL is forbidden") - def __str__(self): + def __str__(self) -> str: val = self._val - if not val.path and self.is_absolute() and (val.query or val.fragment): + if not val.path and self.absolute and (val.query or val.fragment): val = val._replace(path="/") + if False and (port := self._port_not_default) is None: + # port normalization - using None for default ports to remove from rendering + # https://datatracker.ietf.org/doc/html/rfc3986.html#section-6.2.3 + val = val._replace( + netloc=self._make_netloc( + self.raw_user, + self.raw_password, + self.raw_host, + port, + encode_host=False, + ) + ) return urlunsplit(val) - def __repr__(self): + def __repr__(self) -> str: return f"{self.__class__.__name__}('{str(self)}')" - def __bytes__(self): + def __bytes__(self) -> bytes: return str(self).encode("ascii") - def __eq__(self, other): - if not type(other) is URL: + def __eq__(self, other: object) -> bool: + if type(other) is not URL: return NotImplemented val1 = self._val - if not val1.path and self.is_absolute(): + if not val1.path and self.absolute: val1 = val1._replace(path="/") val2 = other._val - if not val2.path and other.is_absolute(): + if not val2.path and other.absolute: val2 = val2._replace(path="/") return val1 == val2 - def __hash__(self): + def __hash__(self) -> int: ret = self._cache.get("hash") if ret is None: val = self._val - if not val.path and self.is_absolute(): + if not val.path and self.absolute: val = val._replace(path="/") ret = self._cache["hash"] = hash(val) return ret - def __le__(self, other): - if not type(other) is URL: + def __le__(self, other: object) -> bool: + if type(other) is not URL: return NotImplemented return self._val <= other._val - def __lt__(self, other): - if not type(other) is URL: + def __lt__(self, other: object) -> bool: + if type(other) is not URL: return NotImplemented return self._val < other._val - def __ge__(self, other): - if not type(other) is URL: + def __ge__(self, other: object) -> bool: + if type(other) is not URL: return NotImplemented return self._val >= other._val - def __gt__(self, other): - if not type(other) is URL: + def __gt__(self, other: object) -> bool: + if type(other) is not URL: return NotImplemented return self._val > other._val - def __truediv__(self, name): + def __truediv__(self, name: str) -> "URL": if not isinstance(name, str): return NotImplemented return self._make_child((str(name),)) - def __mod__(self, query): + def __mod__(self, query: Query) -> "URL": return self.update_query(query) def __bool__(self) -> bool: @@ -355,7 +464,7 @@ class URL: self._val.netloc or self._val.path or self._val.query or self._val.fragment ) - def __getstate__(self): + def __getstate__(self) -> Tuple[SplitResult]: return (self._val,) def __setstate__(self, state): @@ -366,38 +475,52 @@ class URL: self._val, *unused = state self._cache = {} - def is_absolute(self): + def _cache_netloc(self) -> None: + """Cache the netloc parts of the URL.""" + cache = self._cache + ( + cache["raw_user"], + cache["raw_password"], + cache["raw_host"], + cache["explicit_port"], + ) = self._split_netloc(self._val.netloc) + + def is_absolute(self) -> bool: """A check for absolute URLs. Return True for absolute ones (having scheme or starting with //), False otherwise. + Is is preferred to call the .absolute property instead + as it is cached. """ - return self.raw_host is not None + return self.absolute - def is_default_port(self): + def is_default_port(self) -> bool: """A check for default port. Return True if port is default for specified scheme, e.g. 'http://python.org' or 'http://python.org:80', False otherwise. + Return False for relative URLs. + """ - if self.port is None: - return False - default = DEFAULT_PORTS.get(self.scheme) - if default is None: - return False - return self.port == default + default = self._default_port + explicit = self.explicit_port + if explicit is None: + # A relative URL does not have an implicit port / default port + return default is not None + return explicit == default - def origin(self): + def origin(self) -> "URL": """Return an URL with scheme, host and port parts only. user, password, path, query and fragment are removed. """ # TODO: add a keyword-only option for keeping user/pass maybe? - if not self.is_absolute(): + if not self.absolute: raise ValueError("URL should be absolute") if not self._val.scheme: raise ValueError("URL should have scheme") @@ -406,19 +529,33 @@ class URL: val = v._replace(netloc=netloc, path="", query="", fragment="") return URL(val, encoded=True) - def relative(self): + def relative(self) -> "URL": """Return a relative part of the URL. scheme, user, password, host and port are removed. """ - if not self.is_absolute(): + if not self.absolute: raise ValueError("URL should be absolute") val = self._val._replace(scheme="", netloc="") return URL(val, encoded=True) - @property - def scheme(self): + @cached_property + def absolute(self) -> bool: + """A check for absolute URLs. + + Return True for absolute ones (having scheme or starting + with //), False otherwise. + + """ + # `netloc`` is an empty string for relative URLs + # Checking `netloc` is faster than checking `hostname` + # because `hostname` is a property that does some extra work + # to parse the host from the `netloc` + return self._val.netloc != "" + + @cached_property + def scheme(self) -> str: """Scheme for absolute URLs. Empty string for relative URLs or URLs starting with // @@ -426,8 +563,8 @@ class URL: """ return self._val.scheme - @property - def raw_authority(self): + @cached_property + def raw_authority(self) -> str: """Encoded authority part of URL. Empty string for relative URLs. @@ -436,7 +573,20 @@ class URL: return self._val.netloc @cached_property - def authority(self): + def _default_port(self) -> Union[int, None]: + """Default port for the scheme or None if not known.""" + return DEFAULT_PORTS.get(self.scheme) + + @cached_property + def _port_not_default(self) -> Union[int, None]: + """The port part of URL normalized to None if its the default port.""" + port = self.port + if self._default_port == port: + return None + return port + + @cached_property + def authority(self) -> str: """Decoded authority part of URL. Empty string for relative URLs. @@ -446,48 +596,53 @@ class URL: self.user, self.password, self.host, self.port, encode_host=False ) - @property - def raw_user(self): + @cached_property + def raw_user(self) -> Union[str, None]: """Encoded user part of URL. None if user is missing. """ # not .username - ret = self._val.username - if not ret: - return None - return ret + self._cache_netloc() + return self._cache["raw_user"] @cached_property - def user(self): + def user(self) -> Union[str, None]: """Decoded user part of URL. None if user is missing. """ - return self._UNQUOTER(self.raw_user) + raw_user = self.raw_user + if raw_user is None: + return None + return self._UNQUOTER(raw_user) - @property - def raw_password(self): + @cached_property + def raw_password(self) -> Union[str, None]: """Encoded password part of URL. None if password is missing. """ - return self._val.password + self._cache_netloc() + return self._cache["raw_password"] @cached_property - def password(self): + def password(self) -> Union[str, None]: """Decoded password part of URL. None if password is missing. """ - return self._UNQUOTER(self.raw_password) + raw_password = self.raw_password + if raw_password is None: + return None + return self._UNQUOTER(raw_password) - @property - def raw_host(self): + @cached_property + def raw_host(self) -> Union[str, None]: """Encoded host part of URL. None for relative URLs. @@ -495,10 +650,11 @@ class URL: """ # Use host instead of hostname for sake of shortness # May add .hostname prop later - return self._val.hostname + self._cache_netloc() + return self._cache["raw_host"] @cached_property - def host(self): + def host(self) -> Union[str, None]: """Decoded host part of URL. None for relative URLs. @@ -514,39 +670,40 @@ class URL: return raw return _idna_decode(raw) - @property - def port(self): + @cached_property + def port(self) -> Union[int, None]: """Port part of URL, with scheme-based fallback. None for relative URLs or URLs without explicit port and scheme without default port substitution. """ - return self._val.port or DEFAULT_PORTS.get(self._val.scheme) + return self.explicit_port or self._default_port - @property - def explicit_port(self): + @cached_property + def explicit_port(self) -> Union[int, None]: """Port part of URL, without scheme-based fallback. None for relative URLs or URLs without explicit port. """ - return self._val.port + self._cache_netloc() + return self._cache["explicit_port"] - @property - def raw_path(self): + @cached_property + def raw_path(self) -> str: """Encoded path of URL. / for absolute URLs without path part. """ ret = self._val.path - if not ret and self.is_absolute(): + if not ret and self.absolute: ret = "/" return ret @cached_property - def path(self): + def path(self) -> str: """Decoded path of URL. / for absolute URLs without path part. @@ -555,18 +712,33 @@ class URL: return self._PATH_UNQUOTER(self.raw_path) @cached_property - def query(self): + def path_safe(self) -> str: + """Decoded path of URL. + + / for absolute URLs without path part. + + / (%2F) and % (%25) are not decoded + + """ + return self._PATH_SAFE_UNQUOTER(self.raw_path) + + @cached_property + def _parsed_query(self) -> List[Tuple[str, str]]: + """Parse query part of URL.""" + return parse_qsl(self.raw_query_string, keep_blank_values=True) + + @cached_property + def query(self) -> "MultiDictProxy[str]": """A MultiDictProxy representing parsed query parameters in decoded representation. Empty value if URL has no query part. """ - ret = MultiDict(parse_qsl(self.raw_query_string, keep_blank_values=True)) - return MultiDictProxy(ret) + return MultiDictProxy(MultiDict(self._parsed_query)) - @property - def raw_query_string(self): + @cached_property + def raw_query_string(self) -> str: """Encoded query part of URL. Empty string if query is missing. @@ -575,7 +747,7 @@ class URL: return self._val.query @cached_property - def query_string(self): + def query_string(self) -> str: """Decoded query part of URL. Empty string if query is missing. @@ -584,21 +756,21 @@ class URL: return self._QS_UNQUOTER(self.raw_query_string) @cached_property - def path_qs(self): + def path_qs(self) -> str: """Decoded path of URL with query.""" if not self.query_string: return self.path return f"{self.path}?{self.query_string}" @cached_property - def raw_path_qs(self): + def raw_path_qs(self) -> str: """Encoded path of URL with query.""" if not self.raw_query_string: return self.raw_path return f"{self.raw_path}?{self.raw_query_string}" - @property - def raw_fragment(self): + @cached_property + def raw_fragment(self) -> str: """Encoded fragment part of URL. Empty string if fragment is missing. @@ -607,7 +779,7 @@ class URL: return self._val.fragment @cached_property - def fragment(self): + def fragment(self) -> str: """Decoded fragment part of URL. Empty string if fragment is missing. @@ -616,27 +788,27 @@ class URL: return self._UNQUOTER(self.raw_fragment) @cached_property - def raw_parts(self): + def raw_parts(self) -> Tuple[str, ...]: """A tuple containing encoded *path* parts. ('/',) for absolute URLs if *path* is missing. """ path = self._val.path - if self.is_absolute(): + if self.absolute: if not path: parts = ["/"] else: parts = ["/"] + path[1:].split("/") else: - if path.startswith("/"): + if path and path[0] == "/": parts = ["/"] + path[1:].split("/") else: parts = path.split("/") return tuple(parts) @cached_property - def parts(self): + def parts(self) -> Tuple[str, ...]: """A tuple containing decoded *path* parts. ('/',) for absolute URLs if *path* is missing. @@ -645,7 +817,7 @@ class URL: return tuple(self._UNQUOTER(part) for part in self.raw_parts) @cached_property - def parent(self): + def parent(self) -> "URL": """A new URL with last part of path removed and cleaned up query and fragment. @@ -660,10 +832,10 @@ class URL: return URL(val, encoded=True) @cached_property - def raw_name(self): + def raw_name(self) -> str: """The last part of raw_parts.""" parts = self.raw_parts - if self.is_absolute(): + if self.absolute: parts = parts[1:] if not parts: return "" @@ -673,12 +845,12 @@ class URL: return parts[-1] @cached_property - def name(self): + def name(self) -> str: """The last part of parts.""" return self._UNQUOTER(self.raw_name) @cached_property - def raw_suffix(self): + def raw_suffix(self) -> str: name = self.raw_name i = name.rfind(".") if 0 < i < len(name) - 1: @@ -687,11 +859,11 @@ class URL: return "" @cached_property - def suffix(self): + def suffix(self) -> str: return self._UNQUOTER(self.raw_suffix) @cached_property - def raw_suffixes(self): + def raw_suffixes(self) -> Tuple[str, ...]: name = self.raw_name if name.endswith("."): return () @@ -699,43 +871,45 @@ class URL: return tuple("." + suffix for suffix in name.split(".")[1:]) @cached_property - def suffixes(self): + def suffixes(self) -> Tuple[str, ...]: return tuple(self._UNQUOTER(suffix) for suffix in self.raw_suffixes) @staticmethod - def _validate_authority_uri_abs_path(host, path): + def _validate_authority_uri_abs_path(host: str, path: str) -> None: """Ensure that path in URL with authority starts with a leading slash. Raise ValueError if not. """ - if len(host) > 0 and len(path) > 0 and not path.startswith("/"): + if host and path and not path[0] == "/": raise ValueError( "Path in a URL with authority should start with a slash ('/') if set" ) - def _make_child(self, segments, encoded=False): - """add segments to self._val.path, accounting for absolute vs relative paths""" - # keep the trailing slash if the last segment ends with / - parsed = [""] if segments and segments[-1][-1:] == "/" else [] - for seg in reversed(segments): - if not seg: - continue - if seg[0] == "/": + def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL": + """ + add paths to self._val.path, accounting for absolute vs relative paths, + keep existing, but do not create new, empty segments + """ + parsed = [] + for idx, path in enumerate(reversed(paths)): + # empty segment of last is not removed + last = idx == 0 + if path and path[0] == "/": raise ValueError( - f"Appending path {seg!r} starting from slash is forbidden" - ) - seg = seg if encoded else self._PATH_QUOTER(seg) - if "/" in seg: - parsed += ( - sub for sub in reversed(seg.split("/")) if sub and sub != "." + f"Appending path {path!r} starting from slash is forbidden" ) - elif seg != ".": - parsed.append(seg) + path = path if encoded else self._PATH_QUOTER(path) + segments = list(reversed(path.split("/"))) + # remove trailing empty segment for all but the last path + segment_slice_start = int(not last and segments[0] == "") + parsed += segments[segment_slice_start:] parsed.reverse() - old_path = self._val.path - if old_path: - parsed = [*old_path.rstrip("/").split("/"), *parsed] - if self.is_absolute(): + + if self._val.path and (old_path_segments := self._val.path.split("/")): + old_path_cutoff = -1 if old_path_segments[-1] == "" else None + parsed = [*old_path_segments[:old_path_cutoff], *parsed] + + if self.absolute: parsed = _normalize_path_segments(parsed) if parsed and parsed[0] != "": # inject a leading slash when adding a path to an absolute URL @@ -747,11 +921,14 @@ class URL: ) @classmethod - def _normalize_path(cls, path): + def _normalize_path(cls, path: str) -> str: # Drop '.' and '..' from str path + if "." not in path: + # No need to normalize if there are no '.' or '..' segments + return path prefix = "" - if path.startswith("/"): + if path and path[0] == "/": # preserve the "/" root element of absolute paths, copying it to the # normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986. prefix = "/" @@ -761,38 +938,73 @@ class URL: return prefix + "/".join(_normalize_path_segments(segments)) @classmethod - def _encode_host(cls, host, human=False): - try: - ip, sep, zone = host.partition("%") - ip = ip_address(ip) - except ValueError: - host = host.lower() - # IDNA encoding is slow, - # skip it for ASCII-only strings - # Don't move the check into _idna_encode() helper - # to reduce the cache size - if human or host.isascii(): - return host - host = _idna_encode(host) + def _encode_host(cls, host: str, human: bool = False) -> str: + if "%" in host: + raw_ip, sep, zone = host.partition("%") else: - host = ip.compressed - if sep: - host += "%" + zone - if ip.version == 6: - host = "[" + host + "]" - return host + raw_ip = host + sep = zone = "" + + if raw_ip and raw_ip[-1].isdigit() or ":" in raw_ip: + # Might be an IP address, check it + # + # IP Addresses can look like: + # https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 + # - 127.0.0.1 (last character is a digit) + # - 2001:db8::ff00:42:8329 (contains a colon) + # - 2001:db8::ff00:42:8329%eth0 (contains a colon) + # - [2001:db8::ff00:42:8329] (contains a colon) + # Rare IP Address formats are not supported per: + # https://datatracker.ietf.org/doc/html/rfc3986#section-7.4 + # + # We try to avoid parsing IP addresses as much as possible + # since its orders of magnitude slower than almost any other operation + # this library does. + # + # IP parsing is slow, so its wrapped in an LRU + try: + ip_compressed_version = _ip_compressed_version(raw_ip) + except ValueError: + pass + else: + # These checks should not happen in the + # LRU to keep the cache size small + host, version = ip_compressed_version + if sep: + host += "%" + zone + if version == 6: + return f"[{host}]" + return host + + host = host.lower() + # IDNA encoding is slow, + # skip it for ASCII-only strings + # Don't move the check into _idna_encode() helper + # to reduce the cache size + if human or host.isascii(): + return host + return _idna_encode(host) @classmethod def _make_netloc( - cls, user, password, host, port, encode=False, encode_host=True, requote=False - ): + cls, + user: Union[str, None], + password: Union[str, None], + host: Union[str, None], + port: Union[int, None], + encode: bool = False, + encode_host: bool = True, + requote: bool = False, + ) -> str: + if host is None: + return "" quoter = cls._REQUOTER if requote else cls._QUOTER if encode_host: ret = cls._encode_host(host) else: ret = host if port is not None: - ret = ret + ":" + str(port) + ret = f"{ret}:{port}" if password is not None: if not user: user = "" @@ -808,16 +1020,56 @@ class URL: ret = user + "@" + ret return ret - def with_scheme(self, scheme): + @classmethod + @lru_cache # match the same size as urlsplit + def _split_netloc( + cls, + netloc: str, + ) -> Tuple[Union[str, None], Union[str, None], Union[str, None], Union[int, None]]: + """Split netloc into username, password, host and port.""" + if "@" not in netloc: + username: Union[str, None] = None + password: Union[str, None] = None + hostinfo = netloc + else: + userinfo, _, hostinfo = netloc.rpartition("@") + username, have_password, password = userinfo.partition(":") + if not have_password: + password = None + + if "[" in hostinfo: + _, _, bracketed = hostinfo.partition("[") + hostname, _, port_str = bracketed.partition("]") + _, _, port_str = port_str.partition(":") + else: + hostname, _, port_str = hostinfo.partition(":") + + if not port_str: + port: Union[int, None] = None + else: + try: + port = int(port_str) + except ValueError: + raise ValueError("Invalid URL: port can't be converted to integer") + if not (0 <= port <= 65535): + raise ValueError("Port out of range 0-65535") + + return username or None, password, hostname or None, port + + def with_scheme(self, scheme: str) -> "URL": """Return a new URL with scheme replaced.""" # N.B. doesn't cleanup query/fragment if not isinstance(scheme, str): raise TypeError("Invalid scheme type") - if not self.is_absolute(): - raise ValueError("scheme replacement is not allowed for relative URLs") + if not self.absolute and scheme in SCHEME_REQUIRES_HOST: + msg = ( + "scheme replacement is not allowed for " + f"relative URLs for the {scheme} scheme" + ) + raise ValueError(msg) return URL(self._val._replace(scheme=scheme.lower()), encoded=True) - def with_user(self, user): + def with_user(self, user: Union[str, None]) -> "URL": """Return a new URL with user replaced. Autoencode user if needed. @@ -834,7 +1086,7 @@ class URL: password = val.password else: raise TypeError("Invalid user type") - if not self.is_absolute(): + if not self.absolute: raise ValueError("user replacement is not allowed for relative URLs") return URL( self._val._replace( @@ -843,7 +1095,7 @@ class URL: encoded=True, ) - def with_password(self, password): + def with_password(self, password: Union[str, None]) -> "URL": """Return a new URL with password replaced. Autoencode password if needed. @@ -858,7 +1110,7 @@ class URL: password = self._QUOTER(password) else: raise TypeError("Invalid password type") - if not self.is_absolute(): + if not self.absolute: raise ValueError("password replacement is not allowed for relative URLs") val = self._val return URL( @@ -868,7 +1120,7 @@ class URL: encoded=True, ) - def with_host(self, host): + def with_host(self, host: str) -> "URL": """Return a new URL with host replaced. Autoencode host if needed. @@ -880,7 +1132,7 @@ class URL: # N.B. doesn't cleanup query/fragment if not isinstance(host, str): raise TypeError("Invalid host type") - if not self.is_absolute(): + if not self.absolute: raise ValueError("host replacement is not allowed for relative URLs") if not host: raise ValueError("host removing is not allowed") @@ -892,7 +1144,7 @@ class URL: encoded=True, ) - def with_port(self, port): + def with_port(self, port: Union[int, None]) -> "URL": """Return a new URL with port replaced. Clear port to default if None is passed. @@ -904,7 +1156,7 @@ class URL: raise TypeError(f"port should be int or None, got {type(port)}") if port < 0 or port > 65535: raise ValueError(f"port must be between 0 and 65535, got {port}") - if not self.is_absolute(): + if not self.absolute: raise ValueError("port replacement is not allowed for relative URLs") val = self._val return URL( @@ -914,18 +1166,20 @@ class URL: encoded=True, ) - def with_path(self, path, *, encoded=False): + def with_path(self, path: str, *, encoded: bool = False) -> "URL": """Return a new URL with path replaced.""" if not encoded: path = self._PATH_QUOTER(path) - if self.is_absolute(): + if self.absolute: path = self._normalize_path(path) if len(path) > 0 and path[0] != "/": path = "/" + path return URL(self._val._replace(path=path, query="", fragment=""), encoded=True) @classmethod - def _query_seq_pairs(cls, quoter, pairs): + def _query_seq_pairs( + cls, quoter: Callable[[str], str], pairs: Iterable[Tuple[str, QueryVariable]] + ) -> Iterator[str]: for key, val in pairs: if isinstance(val, (list, tuple)): for v in val: @@ -934,17 +1188,21 @@ class URL: yield quoter(key) + "=" + quoter(cls._query_var(val)) @staticmethod - def _query_var(v): + def _query_var(v: QueryVariable) -> str: cls = type(v) - if issubclass(cls, str): + if cls is str or issubclass(cls, str): + if TYPE_CHECKING: + assert isinstance(v, str) return v if issubclass(cls, float): + if TYPE_CHECKING: + assert isinstance(v, float) if math.isinf(v): raise ValueError("float('inf') is not supported") if math.isnan(v): raise ValueError("float('nan') is not supported") return str(float(v)) - if issubclass(cls, int) and cls is not bool: + if cls is not bool and isinstance(cls, SupportsInt): return str(int(v)) raise TypeError( "Invalid variable type: value " @@ -952,7 +1210,17 @@ class URL: "of type {}".format(v, cls) ) - def _get_str_query(self, *args, **kwargs): + def _get_str_query_from_iterable( + self, items: Iterable[Tuple[Union[str, istr], str]] + ) -> str: + """Return a query string from an iterable.""" + quoter = self._QUERY_PART_QUOTER + # A listcomp is used since listcomps are inlined on CPython 3.12+ and + # they are a bit faster than a generator expression. + return "&".join([f"{quoter(k)}={quoter(self._query_var(v))}" for k, v in items]) + + def _get_str_query(self, *args: Any, **kwargs: Any) -> Union[str, None]: + query: Union[str, Mapping[str, QueryVariable], None] if kwargs: if len(args) > 0: raise ValueError( @@ -965,34 +1233,35 @@ class URL: raise ValueError("Either kwargs or single query parameter must be present") if query is None: - query = None - elif isinstance(query, Mapping): + return None + if isinstance(query, Mapping): quoter = self._QUERY_PART_QUOTER - query = "&".join(self._query_seq_pairs(quoter, query.items())) - elif isinstance(query, str): - query = self._QUERY_QUOTER(query) - elif isinstance(query, (bytes, bytearray, memoryview)): + return "&".join(self._query_seq_pairs(quoter, query.items())) + if isinstance(query, str): + return self._QUERY_QUOTER(query) + if isinstance(query, (bytes, bytearray, memoryview)): raise TypeError( "Invalid query type: bytes, bytearray and memoryview are forbidden" ) - elif isinstance(query, Sequence): - quoter = self._QUERY_PART_QUOTER + if isinstance(query, Sequence): # We don't expect sequence values if we're given a list of pairs # already; only mappings like builtin `dict` which can't have the # same key pointing to multiple values are allowed to use # `_query_seq_pairs`. - query = "&".join( - quoter(k) + "=" + quoter(self._query_var(v)) for k, v in query - ) - else: - raise TypeError( - "Invalid query type: only str, mapping or " - "sequence of (key, value) pairs is allowed" - ) + return self._get_str_query_from_iterable(query) + + raise TypeError( + "Invalid query type: only str, mapping or " + "sequence of (key, value) pairs is allowed" + ) - return query + @overload + def with_query(self, query: Query) -> "URL": ... - def with_query(self, *args, **kwargs): + @overload + def with_query(self, **kwargs: QueryVariable) -> "URL": ... + + def with_query(self, *args: Any, **kwargs: Any) -> "URL": """Return a new URL with query part replaced. Accepts any Mapping (e.g. dict, multidict.MultiDict instances) @@ -1008,24 +1277,77 @@ class URL: # N.B. doesn't cleanup query/fragment new_query = self._get_str_query(*args, **kwargs) or "" - return URL( - self._val._replace(path=self._val.path, query=new_query), encoded=True - ) + return URL(self._val._replace(query=new_query), encoded=True) + + @overload + def extend_query(self, query: Query) -> "URL": ... + + @overload + def extend_query(self, **kwargs: QueryVariable) -> "URL": ... + + def extend_query(self, *args: Any, **kwargs: Any) -> "URL": + """Return a new URL with query part combined with the existing. + + This method will not remove existing query parameters. - def update_query(self, *args, **kwargs): - """Return a new URL with query part updated.""" + Example: + >>> url = URL('http://example.com/?a=1&b=2') + >>> url.extend_query(a=3, c=4) + URL('http://example.com/?a=1&b=2&a=3&c=4') + """ + new_query_string = self._get_str_query(*args, **kwargs) + if not new_query_string: + return self + if current_query := self.raw_query_string: + # both strings are already encoded so we can use a simple + # string join + if current_query[-1] == "&": + combined_query = f"{current_query}{new_query_string}" + else: + combined_query = f"{current_query}&{new_query_string}" + else: + combined_query = new_query_string + return URL(self._val._replace(query=combined_query), encoded=True) + + @overload + def update_query(self, query: Query) -> "URL": ... + + @overload + def update_query(self, **kwargs: QueryVariable) -> "URL": ... + + def update_query(self, *args: Any, **kwargs: Any) -> "URL": + """Return a new URL with query part updated. + + This method will overwrite existing query parameters. + + Example: + >>> url = URL('http://example.com/?a=1&b=2') + >>> url.update_query(a=3, c=4) + URL('http://example.com/?a=3&b=2&c=4') + """ s = self._get_str_query(*args, **kwargs) - query = None - if s is not None: - new_query = MultiDict(parse_qsl(s, keep_blank_values=True)) - query = MultiDict(self.query) - query.update(new_query) + if s is None: + return URL(self._val._replace(query=""), encoded=True) - return URL( - self._val._replace(query=self._get_str_query(query) or ""), encoded=True + query = MultiDict(self._parsed_query) + query.update(parse_qsl(s, keep_blank_values=True)) + new_str = self._get_str_query_from_iterable(query.items()) + return URL(self._val._replace(query=new_str), encoded=True) + + def without_query_params(self, *query_params: str) -> "URL": + """Remove some keys from query part and return new URL.""" + params_to_remove = set(query_params) & self.query.keys() + if not params_to_remove: + return self + return self.with_query( + tuple( + (name, value) + for name, value in self.query.items() + if name not in params_to_remove + ) ) - def with_fragment(self, fragment): + def with_fragment(self, fragment: Union[str, None]) -> "URL": """Return a new URL with fragment replaced. Autoencode fragment if needed. @@ -1044,7 +1366,7 @@ class URL: return self return URL(self._val._replace(fragment=raw_fragment), encoded=True) - def with_name(self, name): + def with_name(self, name: str) -> "URL": """Return a new URL with name (last part of path) replaced. Query and fragment parts are cleaned up. @@ -1061,7 +1383,7 @@ class URL: if name in (".", ".."): raise ValueError(". and .. values are forbidden") parts = list(self.raw_parts) - if self.is_absolute(): + if self.absolute: if len(parts) == 1: parts.append(name) else: @@ -1076,7 +1398,7 @@ class URL: encoded=True, ) - def with_suffix(self, suffix): + def with_suffix(self, suffix: str) -> "URL": """Return a new URL with suffix (file extension of name) replaced. Query and fragment parts are cleaned up. @@ -1085,7 +1407,7 @@ class URL: """ if not isinstance(suffix, str): raise TypeError("Invalid suffix type") - if suffix and not suffix.startswith(".") or suffix == ".": + if suffix and not suffix[0] == "." or suffix == ".": raise ValueError(f"Invalid suffix {suffix!r}") name = self.raw_name if not name: @@ -1097,7 +1419,7 @@ class URL: name = name[: -len(old_suffix)] + suffix return self.with_name(name) - def join(self, url): + def join(self, url: "URL") -> "URL": """Join URLs Construct a full (“absolute”) URL by combining a “base URL” @@ -1109,46 +1431,76 @@ class URL: relative URL. """ - # See docs for urllib.parse.urljoin - if not isinstance(url, URL): + if type(url) is not URL: raise TypeError("url should be URL") - return URL(urljoin(str(self), str(url)), encoded=True) + val = self._val + other_val = url._val + scheme = other_val.scheme or val.scheme + + if scheme != val.scheme or scheme not in USES_RELATIVE: + return url + + # scheme is in uses_authority as uses_authority is a superset of uses_relative + if other_val.netloc and scheme in USES_AUTHORITY: + return URL(other_val._replace(scheme=scheme), encoded=True) + + parts: _SplitResultDict = {"scheme": scheme} + if other_val.path or other_val.fragment: + parts["fragment"] = other_val.fragment + if other_val.path or other_val.query: + parts["query"] = other_val.query + + if not other_val.path: + return URL(val._replace(**parts), encoded=True) - def joinpath(self, *other, encoded=False): + if other_val.path[0] == "/": + path = other_val.path + elif not val.path: + path = f"/{other_val.path}" + elif val.path[-1] == "/": + path = f"{val.path}{other_val.path}" + else: + # … + # and relativizing ".." + # parts[0] is / for absolute urls, this join will add a double slash there + path = "/".join([*self.parts[:-1], ""]) + path += other_val.path + # which has to be removed + if val.path[0] == "/": + path = path[1:] + + parts["path"] = self._normalize_path(path) + return URL(val._replace(**parts), encoded=True) + + def joinpath(self, *other: str, encoded: bool = False) -> "URL": """Return a new URL with the elements in other appended to the path.""" return self._make_child(other, encoded=encoded) - def human_repr(self): + def human_repr(self) -> str: """Return decoded human readable string for URL representation.""" user = _human_quote(self.user, "#/:?@[]") password = _human_quote(self.password, "#/:?@[]") host = self.host if host: - host = self._encode_host(self.host, human=True) + host = self._encode_host(host, human=True) path = _human_quote(self.path, "#?") + if TYPE_CHECKING: + assert path is not None query_string = "&".join( "{}={}".format(_human_quote(k, "#&+;="), _human_quote(v, "#&+;=")) for k, v in self.query.items() ) fragment = _human_quote(self.fragment, "") - return urlunsplit( - SplitResult( - self.scheme, - self._make_netloc( - user, - password, - host, - self._val.port, - encode_host=False, - ), - path, - query_string, - fragment, - ) + if TYPE_CHECKING: + assert fragment is not None + netloc = self._make_netloc( + user, password, host, self.explicit_port, encode_host=False ) + val = SplitResult(self.scheme, netloc, path, query_string, fragment) + return urlunsplit(val) -def _human_quote(s, unsafe): +def _human_quote(s: Union[str, None], unsafe: str) -> Union[str, None]: if not s: return s for c in "%" + unsafe: @@ -1162,39 +1514,59 @@ def _human_quote(s, unsafe): _MAXCACHE = 256 [email protected]_cache(_MAXCACHE) -def _idna_decode(raw): +@lru_cache(_MAXCACHE) +def _idna_decode(raw: str) -> str: try: return idna.decode(raw.encode("ascii")) except UnicodeError: # e.g. '::1' return raw.encode("ascii").decode("idna") [email protected]_cache(_MAXCACHE) -def _idna_encode(host): +@lru_cache(_MAXCACHE) +def _idna_encode(host: str) -> str: try: return idna.encode(host, uts46=True).decode("ascii") except UnicodeError: return host.encode("idna").decode("ascii") +@lru_cache(_MAXCACHE) +def _ip_compressed_version(raw_ip: str) -> Tuple[str, int]: + """Return compressed version of IP address and its version.""" + ip = ip_address(raw_ip) + return ip.compressed, ip.version + + @rewrite_module -def cache_clear(): +def cache_clear() -> None: + """Clear all LRU caches.""" _idna_decode.cache_clear() _idna_encode.cache_clear() + _ip_compressed_version.cache_clear() @rewrite_module -def cache_info(): +def cache_info() -> CacheInfo: + """Report cache statistics.""" return { "idna_encode": _idna_encode.cache_info(), "idna_decode": _idna_decode.cache_info(), + "ip_address": _ip_compressed_version.cache_info(), } @rewrite_module -def cache_configure(*, idna_encode_size=_MAXCACHE, idna_decode_size=_MAXCACHE): - global _idna_decode, _idna_encode +def cache_configure( + *, + idna_encode_size: Union[int, None] = _MAXCACHE, + idna_decode_size: Union[int, None] = _MAXCACHE, + ip_address_size: Union[int, None] = _MAXCACHE, +) -> None: + """Configure LRU cache sizes.""" + global _idna_decode, _idna_encode, _ip_compressed_version - _idna_encode = functools.lru_cache(idna_encode_size)(_idna_encode.__wrapped__) - _idna_decode = functools.lru_cache(idna_decode_size)(_idna_decode.__wrapped__) + _idna_encode = lru_cache(idna_encode_size)(_idna_encode.__wrapped__) + _idna_decode = lru_cache(idna_decode_size)(_idna_decode.__wrapped__) + _ip_compressed_version = lru_cache(ip_address_size)( + _ip_compressed_version.__wrapped__ + ) |
