summaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/Lib/_pyrepl/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/tools/python3/Lib/_pyrepl/utils.py')
-rw-r--r--contrib/tools/python3/Lib/_pyrepl/utils.py83
1 files changed, 83 insertions, 0 deletions
diff --git a/contrib/tools/python3/Lib/_pyrepl/utils.py b/contrib/tools/python3/Lib/_pyrepl/utils.py
new file mode 100644
index 00000000000..a30fbdee3a4
--- /dev/null
+++ b/contrib/tools/python3/Lib/_pyrepl/utils.py
@@ -0,0 +1,83 @@
+import re
+import unicodedata
+import functools
+
+from .types import CharBuffer, CharWidths
+from .trace import trace
+
+ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
+ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
+ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
+
+
+def str_width(c: str) -> int:
+ if ord(c) < 128:
+ return 1
+ # gh-139246 for zero-width joiner and combining characters
+ if unicodedata.combining(c):
+ return 0
+ category = unicodedata.category(c)
+ if category == "Cf" and c != "\u00ad":
+ return 0
+ w = unicodedata.east_asian_width(c)
+ if w in ("N", "Na", "H", "A"):
+ return 1
+ return 2
+
+
+def wlen(s: str) -> int:
+ if len(s) == 1 and s != "\x1a":
+ return str_width(s)
+ length = sum(str_width(i) for i in s)
+ # remove lengths of any escape sequences
+ sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
+ ctrl_z_cnt = s.count("\x1a")
+ return length - sum(len(i) for i in sequence) + ctrl_z_cnt
+
+
+def unbracket(s: str, including_content: bool = False) -> str:
+ r"""Return `s` with \001 and \002 characters removed.
+
+ If `including_content` is True, content between \001 and \002 is also
+ stripped.
+ """
+ if including_content:
+ return ZERO_WIDTH_BRACKET.sub("", s)
+ return s.translate(ZERO_WIDTH_TRANS)
+
+
+def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]:
+ r"""Decompose the input buffer into a printable variant.
+
+ Returns a tuple of two lists:
+ - the first list is the input buffer, character by character;
+ - the second list is the visible width of each character in the input
+ buffer.
+
+ Examples:
+ >>> utils.disp_str("a = 9")
+ (['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1])
+ """
+ chars: CharBuffer = []
+ char_widths: CharWidths = []
+
+ if not buffer:
+ return chars, char_widths
+
+ for c in buffer:
+ if c == "\x1a": # CTRL-Z on Windows
+ chars.append(c)
+ char_widths.append(2)
+ elif ord(c) < 128:
+ chars.append(c)
+ char_widths.append(1)
+ elif unicodedata.category(c).startswith("C"):
+ c = r"\u%04x" % ord(c)
+ chars.append(c)
+ char_widths.append(len(c))
+ else:
+ chars.append(c)
+ char_widths.append(str_width(c))
+ trace("disp_str({buffer}) = {s}, {b}", buffer=repr(buffer), s=chars, b=char_widths)
+ return chars, char_widths