diff options
Diffstat (limited to 'contrib/tools/python3/Lib/_pyrepl/utils.py')
| -rw-r--r-- | contrib/tools/python3/Lib/_pyrepl/utils.py | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/contrib/tools/python3/Lib/_pyrepl/utils.py b/contrib/tools/python3/Lib/_pyrepl/utils.py new file mode 100644 index 00000000000..a30fbdee3a4 --- /dev/null +++ b/contrib/tools/python3/Lib/_pyrepl/utils.py @@ -0,0 +1,83 @@ +import re +import unicodedata +import functools + +from .types import CharBuffer, CharWidths +from .trace import trace + +ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") +ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") +ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) + + +def str_width(c: str) -> int: + if ord(c) < 128: + return 1 + # gh-139246 for zero-width joiner and combining characters + if unicodedata.combining(c): + return 0 + category = unicodedata.category(c) + if category == "Cf" and c != "\u00ad": + return 0 + w = unicodedata.east_asian_width(c) + if w in ("N", "Na", "H", "A"): + return 1 + return 2 + + +def wlen(s: str) -> int: + if len(s) == 1 and s != "\x1a": + return str_width(s) + length = sum(str_width(i) for i in s) + # remove lengths of any escape sequences + sequence = ANSI_ESCAPE_SEQUENCE.findall(s) + ctrl_z_cnt = s.count("\x1a") + return length - sum(len(i) for i in sequence) + ctrl_z_cnt + + +def unbracket(s: str, including_content: bool = False) -> str: + r"""Return `s` with \001 and \002 characters removed. + + If `including_content` is True, content between \001 and \002 is also + stripped. + """ + if including_content: + return ZERO_WIDTH_BRACKET.sub("", s) + return s.translate(ZERO_WIDTH_TRANS) + + +def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]: + r"""Decompose the input buffer into a printable variant. + + Returns a tuple of two lists: + - the first list is the input buffer, character by character; + - the second list is the visible width of each character in the input + buffer. + + Examples: + >>> utils.disp_str("a = 9") + (['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1]) + """ + chars: CharBuffer = [] + char_widths: CharWidths = [] + + if not buffer: + return chars, char_widths + + for c in buffer: + if c == "\x1a": # CTRL-Z on Windows + chars.append(c) + char_widths.append(2) + elif ord(c) < 128: + chars.append(c) + char_widths.append(1) + elif unicodedata.category(c).startswith("C"): + c = r"\u%04x" % ord(c) + chars.append(c) + char_widths.append(len(c)) + else: + chars.append(c) + char_widths.append(str_width(c)) + trace("disp_str({buffer}) = {s}, {b}", buffer=repr(buffer), s=chars, b=char_widths) + return chars, char_widths |
