diff options
| author | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 | 
|---|---|---|
| committer | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 | 
| commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
| tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/tld/gen_tld.py | |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/tld/gen_tld.py')
| -rwxr-xr-x | library/cpp/tld/gen_tld.py | 57 | 
1 files changed, 57 insertions, 0 deletions
diff --git a/library/cpp/tld/gen_tld.py b/library/cpp/tld/gen_tld.py new file mode 100755 index 00000000000..882b701e1db --- /dev/null +++ b/library/cpp/tld/gen_tld.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys, os + +def main(): +    alphabet = 'abcdefghijklmnopqrstuvwxyz' +    tlds = dict() + +    for s in alphabet: +        tlds[s] = list() +    tlds['xn--'] = list() + +    tld_file = open(sys.argv[1], 'r') +    for line in tld_file.readlines(): +        domain = line.strip().lower() +        for label in tlds: +            if domain.startswith('xn--'): +                tlds['xn--'].append(domain) +                break +            elif domain.startswith('x'): +                tlds['x'].append(domain) +                break +            else: +                if domain.startswith(label): +                    tlds[label].append(domain) +                    break + +    print '// actual list can be found at http://data.iana.org/TLD/tlds-alpha-by-domain.txt' +    print 'static const char* const TopLevelDomains[] = {' + +    for label, value in sorted(tlds.iteritems()): +        if label == 'xn--': +            sys.stdout.write('    /* ') +            str = '' +            for n in value: +                unicode_domain = n.decode('idna').encode('utf-8') +                str += ('%s, ' % unicode_domain) +            sys.stdout.write('%s*/\n' % str.rstrip()) + +            sys.stdout.write('    ') +            str = '' +            for n in value: +                str += ('"%s", ' % n) +            sys.stdout.write('%s\n' % str.rstrip()) +        else: +            sys.stdout.write('    ') +            str = '' +            for n in value: +                str += ('"%s", ' % n) +            sys.stdout.write('%s\n' % str.rstrip()) + +    print '    0' +    print '};' + +if __name__ == '__main__': +    main()  | 
