aboutsummaryrefslogtreecommitdiffstats
path: root/library/python/strings/strings.py
diff options
context:
space:
mode:
authorAlexSm <alex@ydb.tech>2024-01-19 17:48:10 +0100
committerGitHub <noreply@github.com>2024-01-19 17:48:10 +0100
commit5722bbf18aa2f471fc5491834c6c877b524e8795 (patch)
tree0bfe53383cc4dc38261e3e0086af199f39777801 /library/python/strings/strings.py
parent610b3da211be5d7cfd27077f22b876aedaa2dc29 (diff)
downloadydb-5722bbf18aa2f471fc5491834c6c877b524e8795.tar.gz
Library update 9 (#1163)
* Right libs import scripts * Library update 9 * Add contrib/libs/cxxsupp/libcxx/include/memory_resource
Diffstat (limited to 'library/python/strings/strings.py')
-rw-r--r--library/python/strings/strings.py117
1 files changed, 117 insertions, 0 deletions
diff --git a/library/python/strings/strings.py b/library/python/strings/strings.py
index f5fa2d32c6..d068f30b76 100644
--- a/library/python/strings/strings.py
+++ b/library/python/strings/strings.py
@@ -176,3 +176,120 @@ def fix_utf8(data):
# remove destroyed symbol code
udata = six.ensure_text(data, 'utf-8', 'ignore')
return six.ensure_str(udata, 'utf-8', errors='ignore')
+
+
+_hexdig = "0123456789ABCDEFabcdef"
+_hextobyte = {
+ (a + b).encode(): bytes.fromhex(a + b) if six.PY3 else (a + b).decode("hex") for a in _hexdig for b in _hexdig
+}
+
+
+def parse_qs_binary(qs, keep_blank_values=False, strict_parsing=False, max_num_fields=None, separator=b'&'):
+ """Parse a query like original `parse_qs` from `urlparse`, `urllib.parse`, but query given as a bytes argument.
+
+ Arguments:
+
+ qs: percent-encoded query string to be parsed
+
+ keep_blank_values: flag indicating whether blank values in
+ percent-encoded queries should be treated as blank byte strings.
+ A true value indicates that blanks should be retained as
+ blank byte strings. The default false value indicates that
+ blank values are to be ignored and treated as if they were
+ not included.
+
+ strict_parsing: flag indicating what to do with parsing errors.
+ If false (the default), errors are silently ignored.
+ If true, errors raise a ValueError exception.
+
+ max_num_fields: int. If set, then throws a ValueError if there
+ are more than n fields read by parse_qsl_binary().
+
+ separator: bytes. The symbol to use for separating the query arguments.
+ Defaults to &.
+
+ Returns a dictionary.
+ """
+ parsed_result = {}
+ pairs = parse_qsl_binary(qs, keep_blank_values, strict_parsing, max_num_fields=max_num_fields, separator=separator)
+ for name, value in pairs:
+ if name in parsed_result:
+ parsed_result[name].append(value)
+ else:
+ parsed_result[name] = [value]
+ return parsed_result
+
+
+def parse_qsl_binary(qs, keep_blank_values=False, strict_parsing=False, max_num_fields=None, separator=b'&'):
+ """Parse a query like original `parse_qs` from `urlparse`, `urllib.parse`, but query given as a bytes argument.
+
+ Arguments:
+
+ qs: percent-encoded query bytes to be parsed
+
+ keep_blank_values: flag indicating whether blank values in
+ percent-encoded queries should be treated as blank byte strings.
+ A true value indicates that blanks should be retained as blank
+ byte strings. The default false value indicates that blank values
+ are to be ignored and treated as if they were not included.
+
+ strict_parsing: flag indicating what to do with parsing errors. If
+ false (the default), errors are silently ignored. If true,
+ errors raise a ValueError exception.
+
+ max_num_fields: int. If set, then throws a ValueError
+ if there are more than n fields read by parse_qsl_binary().
+
+ separator: bytes. The symbol to use for separating the query arguments.
+ Defaults to &.
+
+ Returns a list.
+ """
+
+ if max_num_fields is not None:
+ num_fields = 1 + qs.count(separator) if qs else 0
+ if max_num_fields < num_fields:
+ raise ValueError('Max number of fields exceeded')
+
+ r = []
+ query_args = qs.split(separator) if qs else []
+ for name_value in query_args:
+ if not name_value and not strict_parsing:
+ continue
+ nv = name_value.split(b'=', 1)
+
+ if len(nv) != 2:
+ if strict_parsing:
+ raise ValueError("bad query field: %r" % (name_value,))
+ # Handle case of a control-name with no equal sign
+ if keep_blank_values:
+ nv.append(b'')
+ else:
+ continue
+ if len(nv[1]) or keep_blank_values:
+ name = nv[0].replace(b'+', b' ')
+ name = unquote_binary(name)
+ value = nv[1].replace(b'+', b' ')
+ value = unquote_binary(value)
+ r.append((name, value))
+ return r
+
+
+def unquote_binary(string):
+ """Replace %xx escapes by their single-character equivalent.
+ By default, percent-encoded sequences are replaced by ASCII character or
+ byte code, and invalid sequences are replaced by a placeholder character.
+
+ unquote('abc%20def') -> 'abc def'
+ unquote('abc%FFdef') -> 'abc\xffdef'
+ unquote('%no') -> '%no'
+ """
+ bits = string.split(b"%")
+ if len(bits) == 1:
+ return bits[0]
+
+ res = [bits[0]]
+ for item in bits[1:]:
+ res.append(_hextobyte.get(item[:2], b"%"))
+ res.append(item if res[-1] == b"%" else item[2:])
+ return b"".join(res)