aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/tld
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/tld
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/tld')
-rwxr-xr-xlibrary/cpp/tld/gen_tld.py57
-rw-r--r--library/cpp/tld/tld.cpp48
-rw-r--r--library/cpp/tld/tld.h28
-rw-r--r--library/cpp/tld/tld_ut.cpp59
-rw-r--r--library/cpp/tld/tlds-alpha-by-domain.txt1536
-rw-r--r--library/cpp/tld/ut/ya.make15
-rw-r--r--library/cpp/tld/ya.make19
7 files changed, 1762 insertions, 0 deletions
diff --git a/library/cpp/tld/gen_tld.py b/library/cpp/tld/gen_tld.py
new file mode 100755
index 0000000000..882b701e1d
--- /dev/null
+++ b/library/cpp/tld/gen_tld.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys, os
+
+def main():
+ alphabet = 'abcdefghijklmnopqrstuvwxyz'
+ tlds = dict()
+
+ for s in alphabet:
+ tlds[s] = list()
+ tlds['xn--'] = list()
+
+ tld_file = open(sys.argv[1], 'r')
+ for line in tld_file.readlines():
+ domain = line.strip().lower()
+ for label in tlds:
+ if domain.startswith('xn--'):
+ tlds['xn--'].append(domain)
+ break
+ elif domain.startswith('x'):
+ tlds['x'].append(domain)
+ break
+ else:
+ if domain.startswith(label):
+ tlds[label].append(domain)
+ break
+
+ print '// actual list can be found at http://data.iana.org/TLD/tlds-alpha-by-domain.txt'
+ print 'static const char* const TopLevelDomains[] = {'
+
+ for label, value in sorted(tlds.iteritems()):
+ if label == 'xn--':
+ sys.stdout.write(' /* ')
+ str = ''
+ for n in value:
+ unicode_domain = n.decode('idna').encode('utf-8')
+ str += ('%s, ' % unicode_domain)
+ sys.stdout.write('%s*/\n' % str.rstrip())
+
+ sys.stdout.write(' ')
+ str = ''
+ for n in value:
+ str += ('"%s", ' % n)
+ sys.stdout.write('%s\n' % str.rstrip())
+ else:
+ sys.stdout.write(' ')
+ str = ''
+ for n in value:
+ str += ('"%s", ' % n)
+ sys.stdout.write('%s\n' % str.rstrip())
+
+ print ' 0'
+ print '};'
+
+if __name__ == '__main__':
+ main()
diff --git a/library/cpp/tld/tld.cpp b/library/cpp/tld/tld.cpp
new file mode 100644
index 0000000000..e31f3f0322
--- /dev/null
+++ b/library/cpp/tld/tld.cpp
@@ -0,0 +1,48 @@
+#include "tld.h"
+
+#include <library/cpp/digest/lower_case/hash_ops.h>
+
+#include <util/generic/hash_set.h>
+#include <util/generic/singleton.h>
+
+namespace NTld {
+ namespace {
+#include <library/cpp/tld/tld.inc>
+
+ using TCiHash = THashSet<TStringBuf, TCIOps, TCIOps>;
+
+ struct TTLDHash: public TCiHash {
+ TTLDHash() {
+ for (auto tld = GetTlds(); *tld; ++tld) {
+ insert(*tld);
+ }
+ }
+ };
+
+ struct TVeryGoodTld: public TCiHash {
+ TVeryGoodTld() {
+ auto domains = {
+ "am", "az", "biz", "by", "com", "cz", "de", "ec", "fr", "ge", "gov",
+ "gr", "il", "info", "kg", "kz", "mobi", "net", "nu", "org", "lt", "lv",
+ "md", "ru", "su", "tr", "ua", "uk", "uz", "ws", "xn--p1ai", "рф"};
+
+ for (auto d : domains) {
+ insert(d);
+ }
+ }
+ };
+ }
+
+ const char* const* GetTlds() {
+ return TopLevelDomains;
+ }
+
+ bool IsTld(const TStringBuf& s) {
+ return Default<TTLDHash>().contains(s);
+ }
+
+ bool IsVeryGoodTld(const TStringBuf& s) {
+ return Default<TVeryGoodTld>().contains(s);
+ }
+
+}
diff --git a/library/cpp/tld/tld.h b/library/cpp/tld/tld.h
new file mode 100644
index 0000000000..9e241de090
--- /dev/null
+++ b/library/cpp/tld/tld.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+
+namespace NTld {
+ const char* const* GetTlds();
+
+ // Note that FindTld() returns empty string when @host is single domain label (without '.').
+ // If you need whole @host for such case, you can use GetZone() from library/cpp/string_utils/url/url.h
+ inline TStringBuf FindTld(const TStringBuf& host) {
+ size_t p = host.rfind('.');
+ return p != TStringBuf::npos ? host.SubStr(p + 1) : TStringBuf();
+ }
+
+ bool IsTld(const TStringBuf& tld);
+
+ inline bool InTld(const TStringBuf& host) {
+ return IsTld(FindTld(host));
+ }
+
+ // check if @s belongs to a "good" subset of reliable TLDs, defined in tld.cpp
+ bool IsVeryGoodTld(const TStringBuf& tld);
+
+ inline bool InVeryGoodTld(const TStringBuf& host) {
+ return IsVeryGoodTld(FindTld(host));
+ }
+
+}
diff --git a/library/cpp/tld/tld_ut.cpp b/library/cpp/tld/tld_ut.cpp
new file mode 100644
index 0000000000..733200f2b5
--- /dev/null
+++ b/library/cpp/tld/tld_ut.cpp
@@ -0,0 +1,59 @@
+#include "tld.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <library/cpp/charset/doccodes.h>
+
+using namespace NTld;
+
+Y_UNIT_TEST_SUITE(TTldTest) {
+ Y_UNIT_TEST(TestFindTld) {
+ UNIT_ASSERT(FindTld("yandex.ru") == "ru");
+ UNIT_ASSERT(FindTld("YandeX.Ru") == "Ru");
+ UNIT_ASSERT(FindTld("yandex.com.tr") == "tr");
+ UNIT_ASSERT(FindTld("com.tr") == "tr");
+ UNIT_ASSERT(FindTld("abc.def.ghi") == "ghi");
+ UNIT_ASSERT(FindTld("abc.def.aaaaaaaaaa") == "aaaaaaaaaa");
+ UNIT_ASSERT(FindTld("a.b.c.d.e.f.g") == "g");
+
+ UNIT_ASSERT(FindTld(".diff") == "diff");
+ UNIT_ASSERT(FindTld(".") == "");
+ UNIT_ASSERT(FindTld("ru") == "");
+ UNIT_ASSERT(FindTld("") == "");
+ }
+
+ Y_UNIT_TEST(TestTLDs) {
+ UNIT_ASSERT(IsTld("ru"));
+ UNIT_ASSERT(IsTld("Ru"));
+ UNIT_ASSERT(IsTld("BMW"));
+ UNIT_ASSERT(IsTld("TiReS"));
+ UNIT_ASSERT(IsTld("xn--p1ai"));
+ UNIT_ASSERT(IsTld("YaHOO"));
+ UNIT_ASSERT(!IsTld("xn"));
+
+ UNIT_ASSERT(InTld("ru.ru"));
+ UNIT_ASSERT(!InTld("ru"));
+ UNIT_ASSERT(!InTld("ru."));
+ UNIT_ASSERT(!InTld("ru.xn"));
+ }
+
+ Y_UNIT_TEST(TestVeryGoodTlds) {
+ UNIT_ASSERT(IsVeryGoodTld("ru"));
+ UNIT_ASSERT(IsVeryGoodTld("Ru"));
+ UNIT_ASSERT(!IsVeryGoodTld("BMW"));
+ UNIT_ASSERT(!IsVeryGoodTld("TiReS"));
+ UNIT_ASSERT(IsVeryGoodTld("рф"));
+ UNIT_ASSERT(!IsVeryGoodTld("РФ")); // note that uppercase non-ascii tlds cannot be found
+ UNIT_ASSERT(IsVeryGoodTld("xn--p1ai")); // "рф"
+ UNIT_ASSERT(!IsVeryGoodTld("xn--p1ag")); // "ру"
+ UNIT_ASSERT(!IsVeryGoodTld("YaHOO"));
+ UNIT_ASSERT(!IsVeryGoodTld("xn"));
+
+ UNIT_ASSERT(InVeryGoodTld("ru.ru"));
+ UNIT_ASSERT(InVeryGoodTld("яндекс.рф"));
+ UNIT_ASSERT(InVeryGoodTld("http://xn--d1acpjx3f.xn--p1ai"));
+ UNIT_ASSERT(!InVeryGoodTld("ru"));
+ UNIT_ASSERT(!InVeryGoodTld("ru."));
+ UNIT_ASSERT(!InVeryGoodTld("ru.xn"));
+ }
+}
diff --git a/library/cpp/tld/tlds-alpha-by-domain.txt b/library/cpp/tld/tlds-alpha-by-domain.txt
new file mode 100644
index 0000000000..412a6e29cb
--- /dev/null
+++ b/library/cpp/tld/tlds-alpha-by-domain.txt
@@ -0,0 +1,1536 @@
+# Version 2019012200, Last Updated Tue Jan 22 07:07:01 2019 UTC
+AAA
+AARP
+ABARTH
+ABB
+ABBOTT
+ABBVIE
+ABC
+ABLE
+ABOGADO
+ABUDHABI
+AC
+ACADEMY
+ACCENTURE
+ACCOUNTANT
+ACCOUNTANTS
+ACO
+ACTIVE
+ACTOR
+AD
+ADAC
+ADS
+ADULT
+AE
+AEG
+AERO
+AETNA
+AF
+AFAMILYCOMPANY
+AFL
+AFRICA
+AG
+AGAKHAN
+AGENCY
+AI
+AIG
+AIGO
+AIRBUS
+AIRFORCE
+AIRTEL
+AKDN
+AL
+ALFAROMEO
+ALIBABA
+ALIPAY
+ALLFINANZ
+ALLSTATE
+ALLY
+ALSACE
+ALSTOM
+AM
+AMERICANEXPRESS
+AMERICANFAMILY
+AMEX
+AMFAM
+AMICA
+AMSTERDAM
+ANALYTICS
+ANDROID
+ANQUAN
+ANZ
+AO
+AOL
+APARTMENTS
+APP
+APPLE
+AQ
+AQUARELLE
+AR
+ARAB
+ARAMCO
+ARCHI
+ARMY
+ARPA
+ART
+ARTE
+AS
+ASDA
+ASIA
+ASSOCIATES
+AT
+ATHLETA
+ATTORNEY
+AU
+AUCTION
+AUDI
+AUDIBLE
+AUDIO
+AUSPOST
+AUTHOR
+AUTO
+AUTOS
+AVIANCA
+AW
+AWS
+AX
+AXA
+AZ
+AZURE
+BA
+BABY
+BAIDU
+BANAMEX
+BANANAREPUBLIC
+BAND
+BANK
+BAR
+BARCELONA
+BARCLAYCARD
+BARCLAYS
+BAREFOOT
+BARGAINS
+BASEBALL
+BASKETBALL
+BAUHAUS
+BAYERN
+BB
+BBC
+BBT
+BBVA
+BCG
+BCN
+BD
+BE
+BEATS
+BEAUTY
+BEER
+BENTLEY
+BERLIN
+BEST
+BESTBUY
+BET
+BF
+BG
+BH
+BHARTI
+BI
+BIBLE
+BID
+BIKE
+BING
+BINGO
+BIO
+BIZ
+BJ
+BLACK
+BLACKFRIDAY
+BLANCO
+BLOCKBUSTER
+BLOG
+BLOOMBERG
+BLUE
+BM
+BMS
+BMW
+BN
+BNL
+BNPPARIBAS
+BO
+BOATS
+BOEHRINGER
+BOFA
+BOM
+BOND
+BOO
+BOOK
+BOOKING
+BOSCH
+BOSTIK
+BOSTON
+BOT
+BOUTIQUE
+BOX
+BR
+BRADESCO
+BRIDGESTONE
+BROADWAY
+BROKER
+BROTHER
+BRUSSELS
+BS
+BT
+BUDAPEST
+BUGATTI
+BUILD
+BUILDERS
+BUSINESS
+BUY
+BUZZ
+BV
+BW
+BY
+BZ
+BZH
+CA
+CAB
+CAFE
+CAL
+CALL
+CALVINKLEIN
+CAM
+CAMERA
+CAMP
+CANCERRESEARCH
+CANON
+CAPETOWN
+CAPITAL
+CAPITALONE
+CAR
+CARAVAN
+CARDS
+CARE
+CAREER
+CAREERS
+CARS
+CARTIER
+CASA
+CASE
+CASEIH
+CASH
+CASINO
+CAT
+CATERING
+CATHOLIC
+CBA
+CBN
+CBRE
+CBS
+CC
+CD
+CEB
+CENTER
+CEO
+CERN
+CF
+CFA
+CFD
+CG
+CH
+CHANEL
+CHANNEL
+CHARITY
+CHASE
+CHAT
+CHEAP
+CHINTAI
+CHRISTMAS
+CHROME
+CHRYSLER
+CHURCH
+CI
+CIPRIANI
+CIRCLE
+CISCO
+CITADEL
+CITI
+CITIC
+CITY
+CITYEATS
+CK
+CL
+CLAIMS
+CLEANING
+CLICK
+CLINIC
+CLINIQUE
+CLOTHING
+CLOUD
+CLUB
+CLUBMED
+CM
+CN
+CO
+COACH
+CODES
+COFFEE
+COLLEGE
+COLOGNE
+COM
+COMCAST
+COMMBANK
+COMMUNITY
+COMPANY
+COMPARE
+COMPUTER
+COMSEC
+CONDOS
+CONSTRUCTION
+CONSULTING
+CONTACT
+CONTRACTORS
+COOKING
+COOKINGCHANNEL
+COOL
+COOP
+CORSICA
+COUNTRY
+COUPON
+COUPONS
+COURSES
+CR
+CREDIT
+CREDITCARD
+CREDITUNION
+CRICKET
+CROWN
+CRS
+CRUISE
+CRUISES
+CSC
+CU
+CUISINELLA
+CV
+CW
+CX
+CY
+CYMRU
+CYOU
+CZ
+DABUR
+DAD
+DANCE
+DATA
+DATE
+DATING
+DATSUN
+DAY
+DCLK
+DDS
+DE
+DEAL
+DEALER
+DEALS
+DEGREE
+DELIVERY
+DELL
+DELOITTE
+DELTA
+DEMOCRAT
+DENTAL
+DENTIST
+DESI
+DESIGN
+DEV
+DHL
+DIAMONDS
+DIET
+DIGITAL
+DIRECT
+DIRECTORY
+DISCOUNT
+DISCOVER
+DISH
+DIY
+DJ
+DK
+DM
+DNP
+DO
+DOCS
+DOCTOR
+DODGE
+DOG
+DOHA
+DOMAINS
+DOT
+DOWNLOAD
+DRIVE
+DTV
+DUBAI
+DUCK
+DUNLOP
+DUNS
+DUPONT
+DURBAN
+DVAG
+DVR
+DZ
+EARTH
+EAT
+EC
+ECO
+EDEKA
+EDU
+EDUCATION
+EE
+EG
+EMAIL
+EMERCK
+ENERGY
+ENGINEER
+ENGINEERING
+ENTERPRISES
+EPOST
+EPSON
+EQUIPMENT
+ER
+ERICSSON
+ERNI
+ES
+ESQ
+ESTATE
+ESURANCE
+ET
+ETISALAT
+EU
+EUROVISION
+EUS
+EVENTS
+EVERBANK
+EXCHANGE
+EXPERT
+EXPOSED
+EXPRESS
+EXTRASPACE
+FAGE
+FAIL
+FAIRWINDS
+FAITH
+FAMILY
+FAN
+FANS
+FARM
+FARMERS
+FASHION
+FAST
+FEDEX
+FEEDBACK
+FERRARI
+FERRERO
+FI
+FIAT
+FIDELITY
+FIDO
+FILM
+FINAL
+FINANCE
+FINANCIAL
+FIRE
+FIRESTONE
+FIRMDALE
+FISH
+FISHING
+FIT
+FITNESS
+FJ
+FK
+FLICKR
+FLIGHTS
+FLIR
+FLORIST
+FLOWERS
+FLY
+FM
+FO
+FOO
+FOOD
+FOODNETWORK
+FOOTBALL
+FORD
+FOREX
+FORSALE
+FORUM
+FOUNDATION
+FOX
+FR
+FREE
+FRESENIUS
+FRL
+FROGANS
+FRONTDOOR
+FRONTIER
+FTR
+FUJITSU
+FUJIXEROX
+FUN
+FUND
+FURNITURE
+FUTBOL
+FYI
+GA
+GAL
+GALLERY
+GALLO
+GALLUP
+GAME
+GAMES
+GAP
+GAY
+GARDEN
+GB
+GBIZ
+GD
+GDN
+GE
+GEA
+GENT
+GENTING
+GEORGE
+GF
+GG
+GGEE
+GH
+GI
+GIFT
+GIFTS
+GIVES
+GIVING
+GL
+GLADE
+GLASS
+GLE
+GLOBAL
+GLOBO
+GM
+GMAIL
+GMBH
+GMO
+GMX
+GN
+GODADDY
+GOLD
+GOLDPOINT
+GOLF
+GOO
+GOODYEAR
+GOOG
+GOOGLE
+GOP
+GOT
+GOV
+GP
+GQ
+GR
+GRAINGER
+GRAPHICS
+GRATIS
+GREEN
+GRIPE
+GROCERY
+GROUP
+GS
+GT
+GU
+GUARDIAN
+GUCCI
+GUGE
+GUIDE
+GUITARS
+GURU
+GW
+GY
+HAIR
+HAMBURG
+HANGOUT
+HAUS
+HBO
+HDFC
+HDFCBANK
+HEALTH
+HEALTHCARE
+HELP
+HELSINKI
+HERE
+HERMES
+HGTV
+HIPHOP
+HISAMITSU
+HITACHI
+HIV
+HK
+HKT
+HM
+HN
+HOCKEY
+HOLDINGS
+HOLIDAY
+HOMEDEPOT
+HOMEGOODS
+HOMES
+HOMESENSE
+HONDA
+HONEYWELL
+HORSE
+HOSPITAL
+HOST
+HOSTING
+HOT
+HOTELES
+HOTELS
+HOTMAIL
+HOUSE
+HOW
+HR
+HSBC
+HT
+HU
+HUGHES
+HYATT
+HYUNDAI
+IBM
+ICBC
+ICE
+ICU
+ID
+IE
+IEEE
+IFM
+IKANO
+IL
+IM
+IMAMAT
+IMDB
+IMMO
+IMMOBILIEN
+IN
+INC
+INDUSTRIES
+INFINITI
+INFO
+ING
+INK
+INSTITUTE
+INSURANCE
+INSURE
+INT
+INTEL
+INTERNATIONAL
+INTUIT
+INVESTMENTS
+IO
+IPIRANGA
+IQ
+IR
+IRISH
+IS
+ISELECT
+ISMAILI
+IST
+ISTANBUL
+IT
+ITAU
+ITV
+IVECO
+JAGUAR
+JAVA
+JCB
+JCP
+JE
+JEEP
+JETZT
+JEWELRY
+JIO
+JLL
+JM
+JMP
+JNJ
+JO
+JOBS
+JOBURG
+JOT
+JOY
+JP
+JPMORGAN
+JPRS
+JUEGOS
+JUNIPER
+KAUFEN
+KDDI
+KE
+KERRYHOTELS
+KERRYLOGISTICS
+KERRYPROPERTIES
+KFH
+KG
+KH
+KI
+KIA
+KIM
+KINDER
+KINDLE
+KITCHEN
+KIWI
+KM
+KN
+KOELN
+KOMATSU
+KOSHER
+KP
+KPMG
+KPN
+KR
+KRD
+KRED
+KUOKGROUP
+KW
+KY
+KYOTO
+KZ
+LA
+LACAIXA
+LADBROKES
+LAMBORGHINI
+LAMER
+LANCASTER
+LANCIA
+LANCOME
+LAND
+LANDROVER
+LANXESS
+LASALLE
+LAT
+LATINO
+LATROBE
+LAW
+LAWYER
+LB
+LC
+LDS
+LEASE
+LECLERC
+LEFRAK
+LEGAL
+LEGO
+LEXUS
+LGBT
+LI
+LIAISON
+LIDL
+LIFE
+LIFEINSURANCE
+LIFESTYLE
+LIGHTING
+LIKE
+LILLY
+LIMITED
+LIMO
+LINCOLN
+LINDE
+LINK
+LIPSY
+LIVE
+LIVING
+LIXIL
+LK
+LLC
+LOAN
+LOANS
+LOCKER
+LOCUS
+LOFT
+LOL
+LONDON
+LOTTE
+LOTTO
+LOVE
+LPL
+LPLFINANCIAL
+LR
+LS
+LT
+LTD
+LTDA
+LU
+LUNDBECK
+LUPIN
+LUXE
+LUXURY
+LV
+LY
+MA
+MACYS
+MADRID
+MAIF
+MAISON
+MAKEUP
+MAN
+MANAGEMENT
+MANGO
+MAP
+MARKET
+MARKETING
+MARKETS
+MARRIOTT
+MARSHALLS
+MASERATI
+MATTEL
+MBA
+MC
+MCKINSEY
+MD
+ME
+MED
+MEDIA
+MEET
+MELBOURNE
+MEME
+MEMORIAL
+MEN
+MENU
+MERCKMSD
+METLIFE
+MG
+MH
+MIAMI
+MICROSOFT
+MIL
+MINI
+MINT
+MIT
+MITSUBISHI
+MK
+ML
+MLB
+MLS
+MM
+MMA
+MN
+MO
+MOBI
+MOBILE
+MOBILY
+MODA
+MOE
+MOI
+MOM
+MONASH
+MONEY
+MONSTER
+MOPAR
+MORMON
+MORTGAGE
+MOSCOW
+MOTO
+MOTORCYCLES
+MOV
+MOVIE
+MOVISTAR
+MP
+MQ
+MR
+MS
+MSD
+MT
+MTN
+MTR
+MU
+MUSEUM
+MUTUAL
+MV
+MW
+MX
+MY
+MZ
+NA
+NAB
+NADEX
+NAGOYA
+NAME
+NATIONWIDE
+NATURA
+NAVY
+NBA
+NC
+NE
+NEC
+NET
+NETBANK
+NETFLIX
+NETWORK
+NEUSTAR
+NEW
+NEWHOLLAND
+NEWS
+NEXT
+NEXTDIRECT
+NEXUS
+NF
+NFL
+NG
+NGO
+NHK
+NI
+NICO
+NIKE
+NIKON
+NINJA
+NISSAN
+NISSAY
+NL
+NO
+NOKIA
+NORTHWESTERNMUTUAL
+NORTON
+NOW
+NOWRUZ
+NOWTV
+NP
+NR
+NRA
+NRW
+NTT
+NU
+NYC
+NZ
+OBI
+OBSERVER
+OFF
+OFFICE
+OKINAWA
+OLAYAN
+OLAYANGROUP
+OLDNAVY
+OLLO
+OM
+OMEGA
+ONE
+ONG
+ONL
+ONLINE
+ONYOURSIDE
+OOO
+OPEN
+ORACLE
+ORANGE
+ORG
+ORGANIC
+ORIGINS
+OSAKA
+OTSUKA
+OTT
+OVH
+PA
+PAGE
+PANASONIC
+PARIS
+PARS
+PARTNERS
+PARTS
+PARTY
+PASSAGENS
+PAY
+PCCW
+PE
+PET
+PF
+PFIZER
+PG
+PH
+PHARMACY
+PHD
+PHILIPS
+PHONE
+PHOTO
+PHOTOGRAPHY
+PHOTOS
+PHYSIO
+PIAGET
+PICS
+PICTET
+PICTURES
+PID
+PIN
+PING
+PINK
+PIONEER
+PIZZA
+PK
+PL
+PLACE
+PLAY
+PLAYSTATION
+PLUMBING
+PLUS
+PM
+PN
+PNC
+POHL
+POKER
+POLITIE
+PORN
+POST
+PR
+PRAMERICA
+PRAXI
+PRESS
+PRIME
+PRO
+PROD
+PRODUCTIONS
+PROF
+PROGRESSIVE
+PROMO
+PROPERTIES
+PROPERTY
+PROTECTION
+PRU
+PRUDENTIAL
+PS
+PT
+PUB
+PW
+PWC
+PY
+QA
+QPON
+QUEBEC
+QUEST
+QVC
+RACING
+RADIO
+RAID
+RE
+READ
+REALESTATE
+REALTOR
+REALTY
+RECIPES
+RED
+REDSTONE
+REDUMBRELLA
+REHAB
+REISE
+REISEN
+REIT
+RELIANCE
+REN
+RENT
+RENTALS
+REPAIR
+REPORT
+REPUBLICAN
+REST
+RESTAURANT
+REVIEW
+REVIEWS
+REXROTH
+RICH
+RICHARDLI
+RICOH
+RIGHTATHOME
+RIL
+RIO
+RIP
+RMIT
+RO
+ROCHER
+ROCKS
+RODEO
+ROGERS
+ROOM
+RS
+RSVP
+RU
+RUGBY
+RUHR
+RUN
+RW
+RWE
+RYUKYU
+SA
+SAARLAND
+SAFE
+SAFETY
+SAKURA
+SALE
+SALON
+SAMSCLUB
+SAMSUNG
+SANDVIK
+SANDVIKCOROMANT
+SANOFI
+SAP
+SARL
+SAS
+SAVE
+SAXO
+SB
+SBI
+SBS
+SC
+SCA
+SCB
+SCHAEFFLER
+SCHMIDT
+SCHOLARSHIPS
+SCHOOL
+SCHULE
+SCHWARZ
+SCIENCE
+SCJOHNSON
+SCOR
+SCOT
+SD
+SE
+SEARCH
+SEAT
+SECURE
+SECURITY
+SEEK
+SELECT
+SENER
+SERVICES
+SES
+SEVEN
+SEW
+SEX
+SEXY
+SFR
+SG
+SH
+SHANGRILA
+SHARP
+SHAW
+SHELL
+SHIA
+SHIKSHA
+SHOES
+SHOP
+SHOPPING
+SHOUJI
+SHOW
+SHOWTIME
+SHRIRAM
+SI
+SILK
+SINA
+SINGLES
+SITE
+SJ
+SK
+SKI
+SKIN
+SKY
+SKYPE
+SL
+SLING
+SM
+SMART
+SMILE
+SN
+SNCF
+SO
+SOCCER
+SOCIAL
+SOFTBANK
+SOFTWARE
+SOHU
+SOLAR
+SOLUTIONS
+SONG
+SONY
+SOY
+SPACE
+SPORT
+SPOT
+SPREADBETTING
+SR
+SRL
+SRT
+ST
+STADA
+STAPLES
+STAR
+STARHUB
+STATEBANK
+STATEFARM
+STC
+STCGROUP
+STOCKHOLM
+STORAGE
+STORE
+STREAM
+STUDIO
+STUDY
+STYLE
+SU
+SUCKS
+SUPPLIES
+SUPPLY
+SUPPORT
+SURF
+SURGERY
+SUZUKI
+SV
+SWATCH
+SWIFTCOVER
+SWISS
+SX
+SY
+SYDNEY
+SYMANTEC
+SYSTEMS
+SZ
+TAB
+TAIPEI
+TALK
+TAOBAO
+TARGET
+TATAMOTORS
+TATAR
+TATTOO
+TAX
+TAXI
+TC
+TCI
+TD
+TDK
+TEAM
+TECH
+TECHNOLOGY
+TEL
+TELEFONICA
+TEMASEK
+TENNIS
+TEVA
+TF
+TG
+TH
+THD
+THEATER
+THEATRE
+TIAA
+TICKETS
+TIENDA
+TIFFANY
+TIPS
+TIRES
+TIROL
+TJ
+TJMAXX
+TJX
+TK
+TKMAXX
+TL
+TM
+TMALL
+TN
+TO
+TODAY
+TOKYO
+TOOLS
+TOP
+TORAY
+TOSHIBA
+TOTAL
+TOURS
+TOWN
+TOYOTA
+TOYS
+TR
+TRADE
+TRADING
+TRAINING
+TRAVEL
+TRAVELCHANNEL
+TRAVELERS
+TRAVELERSINSURANCE
+TRUST
+TRV
+TT
+TUBE
+TUI
+TUNES
+TUSHU
+TV
+TVS
+TW
+TZ
+UA
+UBANK
+UBS
+UCONNECT
+UG
+UK
+UNICOM
+UNIVERSITY
+UNO
+UOL
+UPS
+US
+UY
+UZ
+VA
+VACATIONS
+VANA
+VANGUARD
+VC
+VE
+VEGAS
+VENTURES
+VERISIGN
+VERSICHERUNG
+VET
+VG
+VI
+VIAJES
+VIDEO
+VIG
+VIKING
+VILLAS
+VIN
+VIP
+VIRGIN
+VISA
+VISION
+VISTAPRINT
+VIVA
+VIVO
+VLAANDEREN
+VN
+VODKA
+VOLKSWAGEN
+VOLVO
+VOTE
+VOTING
+VOTO
+VOYAGE
+VU
+VUELOS
+WALES
+WALMART
+WALTER
+WANG
+WANGGOU
+WARMAN
+WATCH
+WATCHES
+WEATHER
+WEATHERCHANNEL
+WEBCAM
+WEBER
+WEBSITE
+WED
+WEDDING
+WEIBO
+WEIR
+WF
+WHOSWHO
+WIEN
+WIKI
+WILLIAMHILL
+WIN
+WINDOWS
+WINE
+WINNERS
+WME
+WOLTERSKLUWER
+WOODSIDE
+WORK
+WORKS
+WORLD
+WOW
+WS
+WTC
+WTF
+XBOX
+XEROX
+XFINITY
+XIHUAN
+XIN
+XN--11B4C3D
+XN--1CK2E1B
+XN--1QQW23A
+XN--2SCRJ9C
+XN--30RR7Y
+XN--3BST00M
+XN--3DS443G
+XN--3E0B707E
+XN--3HCRJ9C
+XN--3OQ18VL8PN36A
+XN--3PXU8K
+XN--42C2D9A
+XN--45BR5CYL
+XN--45BRJ9C
+XN--45Q11C
+XN--4GBRIM
+XN--54B7FTA0CC
+XN--55QW42G
+XN--55QX5D
+XN--5SU34J936BGSG
+XN--5TZM5G
+XN--6FRZ82G
+XN--6QQ986B3XL
+XN--80ADXHKS
+XN--80AO21A
+XN--80AQECDR1A
+XN--80ASEHDB
+XN--80ASWG
+XN--8Y0A063A
+XN--90A3AC
+XN--90AE
+XN--90AIS
+XN--9DBQ2A
+XN--9ET52U
+XN--9KRT00A
+XN--B4W605FERD
+XN--BCK1B9A5DRE4C
+XN--C1AVG
+XN--C2BR7G
+XN--CCK2B3B
+XN--CG4BKI
+XN--CLCHC0EA0B2G2A9GCD
+XN--CZR694B
+XN--CZRS0T
+XN--CZRU2D
+XN--D1ACJ3B
+XN--D1ALF
+XN--E1A4C
+XN--ECKVDTC9D
+XN--EFVY88H
+XN--ESTV75G
+XN--FCT429K
+XN--FHBEI
+XN--FIQ228C5HS
+XN--FIQ64B
+XN--FIQS8S
+XN--FIQZ9S
+XN--FJQ720A
+XN--FLW351E
+XN--FPCRJ9C3D
+XN--FZC2C9E2C
+XN--FZYS8D69UVGM
+XN--G2XX48C
+XN--GCKR3F0F
+XN--GECRJ9C
+XN--GK3AT1E
+XN--H2BREG3EVE
+XN--H2BRJ9C
+XN--H2BRJ9C8C
+XN--HXT814E
+XN--I1B6B1A6A2E
+XN--IMR513N
+XN--IO0A7I
+XN--J1AEF
+XN--J1AMH
+XN--J6W193G
+XN--JLQ61U9W7B
+XN--JVR189M
+XN--KCRX77D1X4A
+XN--KPRW13D
+XN--KPRY57D
+XN--KPU716F
+XN--KPUT3I
+XN--L1ACC
+XN--LGBBAT1AD8J
+XN--MGB9AWBF
+XN--MGBA3A3EJT
+XN--MGBA3A4F16A
+XN--MGBA7C0BBN0A
+XN--MGBAAKC7DVF
+XN--MGBAAM7A8H
+XN--MGBAB2BD
+XN--MGBAI9AZGQP6J
+XN--MGBAYH7GPA
+XN--MGBB9FBPOB
+XN--MGBBH1A
+XN--MGBBH1A71E
+XN--MGBC0A9AZCG
+XN--MGBCA7DZDO
+XN--MGBERP4A5D4AR
+XN--MGBGU82A
+XN--MGBI4ECEXP
+XN--MGBPL2FH
+XN--MGBT3DHD
+XN--MGBTX2B
+XN--MGBX4CD0AB
+XN--MIX891F
+XN--MK1BU44C
+XN--MXTQ1M
+XN--NGBC5AZD
+XN--NGBE9E0A
+XN--NGBRX
+XN--NODE
+XN--NQV7F
+XN--NQV7FS00EMA
+XN--NYQY26A
+XN--O3CW4H
+XN--OGBPF8FL
+XN--OTU796D
+XN--P1ACF
+XN--P1AI
+XN--PBT977C
+XN--PGBS0DH
+XN--PSSY2U
+XN--Q9JYB4C
+XN--QCKA1PMC
+XN--QXAM
+XN--RHQV96G
+XN--ROVU88B
+XN--RVC1E0AM3E
+XN--S9BRJ9C
+XN--SES554G
+XN--T60B56A
+XN--TCKWE
+XN--TIQ49XQYJ
+XN--UNUP4Y
+XN--VERMGENSBERATER-CTB
+XN--VERMGENSBERATUNG-PWB
+XN--VHQUV
+XN--VUQ861B
+XN--W4R85EL8FHU5DNRA
+XN--W4RS40L
+XN--WGBH1C
+XN--WGBL6A
+XN--XHQ521B
+XN--XKC2AL3HYE2A
+XN--XKC2DL3A5EE0H
+XN--Y9A3AQ
+XN--YFRO4I67O
+XN--YGBI2AMMX
+XN--ZFR164B
+XXX
+XYZ
+YACHTS
+YAHOO
+YAMAXUN
+YANDEX
+YE
+YODOBASHI
+YOGA
+YOKOHAMA
+YOU
+YOUTUBE
+YT
+YUN
+ZA
+ZAPPOS
+ZARA
+ZERO
+ZIP
+ZIPPO
+ZM
+ZONE
+ZUERICH
+ZW
diff --git a/library/cpp/tld/ut/ya.make b/library/cpp/tld/ut/ya.make
new file mode 100644
index 0000000000..0bc5b40b2f
--- /dev/null
+++ b/library/cpp/tld/ut/ya.make
@@ -0,0 +1,15 @@
+UNITTEST()
+
+OWNER(abolkhovityanov)
+
+PEERDIR(
+ ADDINCL library/cpp/tld
+)
+
+SRCDIR(library/cpp/tld)
+
+SRCS(
+ tld_ut.cpp
+)
+
+END()
diff --git a/library/cpp/tld/ya.make b/library/cpp/tld/ya.make
new file mode 100644
index 0000000000..aeabbfeebf
--- /dev/null
+++ b/library/cpp/tld/ya.make
@@ -0,0 +1,19 @@
+LIBRARY()
+
+OWNER(abolkhovityanov)
+
+PYTHON(
+ gen_tld.py tlds-alpha-by-domain.txt
+ IN tlds-alpha-by-domain.txt
+ STDOUT tld.inc
+)
+
+SRCS(
+ tld.cpp
+)
+
+PEERDIR(
+ library/cpp/digest/lower_case
+)
+
+END()