aboutsummaryrefslogtreecommitdiffstats
path: root/library/python/testing/yatest_lib/tools.py
blob: 48acbcf278a50b5bd5ca77c8ae304ea278246716 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import six
import sys


def to_utf8(value):
    """
    Converts value to string encoded into utf-8
    :param value:
    :return:
    """
    if sys.version_info[0] < 3:
        if not isinstance(value, basestring):  # noqa
            value = unicode(value)  # noqa
        if isinstance(value, str):
            value = value.decode("utf-8", errors="ignore")
        return value.encode('utf-8', 'ignore')
    else:
        return str(value)


def trim_string(s, max_bytes):
    """
    Adjusts the length of the string s in order to fit it
    into max_bytes bytes of storage after encoding as UTF-8.
    Useful when cutting filesystem paths.
    :param s: unicode string
    :param max_bytes: number of bytes
    :return the prefix of s
    """
    if isinstance(s, six.text_type):
        return _trim_unicode_string(s, max_bytes)

    if isinstance(s, six.binary_type):
        if len(s) <= max_bytes:
            return s
        s = s.decode('utf-8', errors='ignore')
        s = _trim_unicode_string(s, max_bytes)
        s = s.encode('utf-8', errors='ignore')
        return s

    raise TypeError('a string is expected')


def _trim_unicode_string(s, max_bytes):
    if len(s) * 4 <= max_bytes:
        # UTF-8 uses at most 4 bytes per character
        return s

    result = []
    cur_byte_length = 0

    for ch in s:
        cur_byte_length += len(ch.encode('utf-8'))
        if cur_byte_length > max_bytes:
            break
        result.append(ch)

    return ''.join(result)


def to_str(s):
    if six.PY2 and isinstance(s, six.text_type):
        return s.encode('utf8')
    return s