aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/tld/gen_tld.py
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/tld/gen_tld.py
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/tld/gen_tld.py')
-rwxr-xr-xlibrary/cpp/tld/gen_tld.py57
1 files changed, 57 insertions, 0 deletions
diff --git a/library/cpp/tld/gen_tld.py b/library/cpp/tld/gen_tld.py
new file mode 100755
index 0000000000..882b701e1d
--- /dev/null
+++ b/library/cpp/tld/gen_tld.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys, os
+
+def main():
+ alphabet = 'abcdefghijklmnopqrstuvwxyz'
+ tlds = dict()
+
+ for s in alphabet:
+ tlds[s] = list()
+ tlds['xn--'] = list()
+
+ tld_file = open(sys.argv[1], 'r')
+ for line in tld_file.readlines():
+ domain = line.strip().lower()
+ for label in tlds:
+ if domain.startswith('xn--'):
+ tlds['xn--'].append(domain)
+ break
+ elif domain.startswith('x'):
+ tlds['x'].append(domain)
+ break
+ else:
+ if domain.startswith(label):
+ tlds[label].append(domain)
+ break
+
+ print '// actual list can be found at http://data.iana.org/TLD/tlds-alpha-by-domain.txt'
+ print 'static const char* const TopLevelDomains[] = {'
+
+ for label, value in sorted(tlds.iteritems()):
+ if label == 'xn--':
+ sys.stdout.write(' /* ')
+ str = ''
+ for n in value:
+ unicode_domain = n.decode('idna').encode('utf-8')
+ str += ('%s, ' % unicode_domain)
+ sys.stdout.write('%s*/\n' % str.rstrip())
+
+ sys.stdout.write(' ')
+ str = ''
+ for n in value:
+ str += ('"%s", ' % n)
+ sys.stdout.write('%s\n' % str.rstrip())
+ else:
+ sys.stdout.write(' ')
+ str = ''
+ for n in value:
+ str += ('"%s", ' % n)
+ sys.stdout.write('%s\n' % str.rstrip())
+
+ print ' 0'
+ print '};'
+
+if __name__ == '__main__':
+ main()