diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/codecs/ut | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/codecs/ut')
-rw-r--r-- | library/cpp/codecs/ut/codecs_ut.cpp | 1360 | ||||
-rw-r--r-- | library/cpp/codecs/ut/float_huffman_ut.cpp | 237 | ||||
-rw-r--r-- | library/cpp/codecs/ut/tls_cache_ut.cpp | 36 | ||||
-rw-r--r-- | library/cpp/codecs/ut/ya.make | 20 |
4 files changed, 1653 insertions, 0 deletions
diff --git a/library/cpp/codecs/ut/codecs_ut.cpp b/library/cpp/codecs/ut/codecs_ut.cpp new file mode 100644 index 0000000000..caf6089aef --- /dev/null +++ b/library/cpp/codecs/ut/codecs_ut.cpp @@ -0,0 +1,1360 @@ +#include <library/cpp/codecs/delta_codec.h> +#include <library/cpp/codecs/huffman_codec.h> +#include <library/cpp/codecs/pfor_codec.h> +#include <library/cpp/codecs/solar_codec.h> +#include <library/cpp/codecs/zstd_dict_codec.h> +#include <library/cpp/codecs/comptable_codec.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/buffer.h> +#include <util/string/util.h> +#include <util/string/hex.h> +#include <library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h> + +namespace { + const char* TextValues[] = { + "! сентября газета", + "!(возмездие это)!", + "!(материнский капитал)", + "!(пермь березники)", + "!биография | !жизнь / + розинг | зворыгин & изобретение | телевидение | электронно лучевая трубка", + "!овсиенко николай павлович", + "!путин", + "\"i'm on you\" p. diddy тимати клип", + "\"билайн\" представит собственный планшет", + "\"в особо крупном размере\"", + "\"викиликс\" джулиан ассанж", + "\"вимм билль данн", + "\"газэнергосеть астрахань", + "\"газэнергосеть астрахань\"", + "\"домодедово\" ту-154", + "\"жилина\" \"спартак\" видео", + "\"зелёнsq шершнm\"", + "\"зелёного шершня\"", + "\"золотой граммофон\" марины яблоковой", + "\"золотой граммофон-2010\"", + "\"калинниковы\"", + "\"манчестер юнайтед\" (англия) \"валенсия\" (испания) 1:1 (0:1)", + "\"маркер\"", + "\"моника\" засыпает москву снегом", + "\"моника\" снегопад", + "\"о безопасности\",", + "\"памятку\" для пассажиров воздушных международных рейсов", + "\"петровский парк\" и \"ходынское поле\"", + "\"путинская\" трава", + "\"пятерочка\"купила \"копейку\"", + "\"пятёрочка\" и \"копейка\" объединились", + "\"реал\" \"осер\" 4:0", + "\"речь мутко\"", + "\"российский лес 2010\"", + "\"ростехинвентаризация федеральное бти\" рубцов", + "\"саня останется с нами\",", + "\"следопыт\" реалити шоу", + "\"слышишь\" молодые авторы", + "\"стадион\"", + "\"ходынское поле\" метро", + "\"хроники нарнии\"", + "\"чистая вода\"", + "\"школа деда мороза\"", + "# asus -1394", + "# сторонники wikileaks", + "#106#", + "#11", + "#8 какой цвет", + "#если клиент", + "$ 13,79", + "$ xnj ,s dct ,skb ljdjkmys !!!", + "$ в день", + "$ диск компьютера", + "$.ajax", + "$125 000", + "$курс", + "% в си", + "% влады", + "% годовых", + "% женщин и % мужчин в россии", + "% занятости персонала", + "% инфляции 2010", + "% инфляции в 2010 г.", + "% налога", + "% налогов в 2010г.", + "% общего количества", + "% от числа", + "% по налогу на прибыль организации", + "%24", + "%академия%", + "%комарова%татьяна", + "& в 1с", + "&& (+не существует | !такой проблемы)", + ">>>скачать | download c cs strikez.clan.su<<<", + ">hbq nbityrjd", + "< какой знак", + "< лицей | < техническая школа# < история#< лицей сегодня#< перечень профессий#< руководство лицея#< прием учащихся#< контакты#< схема проезда#< фотогалереяистория создания лицея и основные этапы путиулица купчинская дом 28", + "<<link>>", + "</storage>", + "<bfnkjy", + "<bktntd", + "<cr", + "<ddr3>", + "<e[ufknthcrbq abyfycjdsq", + "<fcctqys", + "<fhcf", + "<fhctkjyf he,by", + "<firbhbz", + "<fyr djphj;ltybt", + "<fyr vjcrds", + "<fyr резерв", + "<fyufkjh", + "<index>", + "<jkmifz jrhe;yfz rbtd", + "<kbpytws", + "<megafon> интернет", + "<thtpybrb gthvcrbq rhfq", + "<tkjxrf", + "<беларусь это мы", + "<бокс, версия ibf", + "designer tree svc", + "seriesg810", + "doll makers", + "rotten.com", + "evening gowns", + "discover", + "south carolina escorts", + "forkliftjobsinhousron", + "mailbox", + "alexis", + "espn.com mlb", + "gypsy.chat.2k", + "the man in the mirror", + "azteca", + "sebastian telfair - jamel thomas", + "kirby", + "java", + "trike motorcycles", + "piasecki helicopter", + "wicca binding spells", + "pier park panama city beach .com", + "continente europeo", + "asswatchers.com", + "asswatchers.com", + "easton stealth stiff flex cnt adult baseball bat - 3", + "facesofdeath", + "video of 9 11", + "profileedit.myspace.com", + "georgia snakes", + "yahoo.com", + "google", + "http wwwclassicindustries .corvettes-roadsters.com", + "arington training stable", + "find bred of dog", + "southpark contact tables for myspace", + "symptoms of laryngitis", + "suzuki stickers", + "avianca", + "radio shack", + "dominican republic pictures", + "recent", + "mapquest", + "http myspace .com", + "research chemicals supplies", + "winn dixie.com", + "drivers 20guide.com", + "dylan whitley north carolina", + "google com", + "order wild horses cigarettes", + "yahoocom", + "fl runners", + "aol companion install", + "nbc.comdond 59595 6", + "directv.com", + "motorsports insurance", + "cartoonnetwork", + "pop warner-victorville", + "black iorn spars", + "goog", + "the suns", + "ebay", + "pop warner", + "philadelphia cream cheese", + "oklahoma", + "doudleday books.com", + "javascript download", + "city of nacogdoches", + "sfyl", + "myspace.com", + "baptism pictures", + "games", + "depredadores sexuales", + "mycl.cravelyrics.com", + "become a bone marrow donner", + "vintage copies", + "ford dealership", + "candystand", + "smarthairypussyom", + "yahoo.com", + "vanderbilt.edu", + "ebay", + "grouper", + "mys", + "myrsa and birth defects", + "hatteras rentals", + "female escorts", + "ja rule", + "meat bluesheet", + "yahoo", + "american disability act court cases", + "clearview cinemas", + "hard69.com", + "make a living will for free", + "fat asses", + "flashback concert in atlanta ga", + "fucking", + "flat abdomen exercises", + "big brother facial", + "german dictionary", + "black dick", + "ebonymovies", + "airsoft rifles", + "best fishing days calander", + "tattoo", + "impressions", + "cs.com", + "northwest airlines reservations", + "halo 3", + "wallbaums", + "chat room listings", + "waterbury ct warrants", + "pictures of chad michael murry", + "yahoo", + "install wallpaper", + "halo 3", + "clits and tits", + "prothsmouth general circuit courts", + "old hawthorne columbia", + "jess lee photos", + "no deposit casino bonus", + "bbc gladiator dressed to kill", + "anemagazine.com", + "lyrics unfaithful", + "gold bars found", + "art.comhttp", + "free unlock key", + "man o war lost a race", + "blue cross and blue shield", + "phenergan", + "myspace.com", + "http www.constitutional court.com", + "monster trucks", + "the breeze fort myers fla.newspaper", + "davis origin name", + "upper deck.com", + "arizona", + "akira lane", + "ebaumsworld", + "union pacific jobs", + "google.cm", + "free bigt girls nudes", + "abcnews.com", + "tootse.com", + "az lyrics", + "freddy", + "georgia.com", + "johncombest.com", + "nelly", + "gussi mane", + "university of illinois", + "oregan valcano's", + "mythbusters", + "sailormoon hentai", + "international cub tractor", + "desert sky movie green valley az", + "evite", + "nelly nud epics", + "penndot.com", + "first banks", + "psp manual", + "google", + "jackieaudet hotmail.com", + "internet", + "shootinggames", + "shootinggames", + "montana western rendezvous of art", + "hello kitty layouts", + "yahoo", + "translation", + "glenn scott attorney", + "hallofshame", + "capitolone.com", + "recipe for popovers", + "pictures of demons", + "barnes and nobles.com", + "rbd", + "hart and hunnington tattoo shop", + "janepowellmovies.com", + "ged schools in the military", + "kelis", + "hvacagent", + "neat home organizer television show", + "2719 24-2-crime and courts", + "fsu", + "torpedo bomber games", + "love poems", + "polly pocket'toys", + "yweatherahoo.com", + "jungle gin", + "flemington new jersey real estate", + "milf hunter stories", + "budget.com", + "chopperstyle", + "keno player", + "up skirt", + "dogs", + "beerballers", + "phat white butt", + "phat white butt", + "va licensing for interpeters for the deaf", + "white page phone book maiden north carolina", + "controlled 20solutions 20corp.com", + "friedman jewelery", + "kelis", + "curtains", + "curtains", + "fuck me harder", + "naked girls", + "southwest airlines boarding pass", + "mailbox", + "1976 mavrick", + "adult diapers", + "horse nasal discharge", + "charles ludlam", + "google", + "himnos en espanol", + "quarter horses for sale in nebraska", + "cosmo", + "hi", + "mattel", + "aouto 20trader.com", + "sunsetter awnings", + "bl.cfm", + "at", + "tattoo designs", + "bubs", + "yahoo", + "free live gay cam chats", + "antibiotics", + "upgrade", + "aessuccess.org", + "yahoo", + "boobdex", + "the jackle", + "plus size lingerie magazines for home", + "lehigh valley little league", + "ancient trade coins", + "pillsbury", + "colorado springs", + "canada aviation jobs", + "free guitar tablature", + "kids aol", + "capitol community colage", + "kevin thomas bermuda", + "missouri lotto", + "homedepotsportscomplex.com", + "dr. franklin schneier", + "williamsburg va. hotels", + "aim", + "morningbuzz", + "probusines.com", + "wwwalbasoul.com", + "w.runehints.com", + "yahoo.com", + "yahoo.com", + "yahoo.com", + "fantasy 5", + "xxx rape", + "hawaiian gift baskets", + "madonna.com", + "myspace contact tables", + "white cock", + "safe space", + "drinks", + "o rly", + "dsl", + "wwww.uncc.edu", + "wwww.uncc.edu", + "wwww.uncc.edu", + "online overseas checkt.westernunion.com", + "angina", + "heba technologies", + "hebrew ancient coins", + "games", + "recent", + "international male.com", + "sex pics", + "paul wall layouts for myspace", + "health", + "wire lamp shade frames", + "windows", + "top business colleges", + "mary jo eustace", + "attored", + "oklahoma indian legal services", + "6arab", + "santo nino", + "10.1.0.199", + "http www.myspace.com daffydonn07", + "marine electrical", + "sandy creek cabins weekend new york", + "onionbutts", + "tucson classifieds", + "new york times", + "recently deleted screen names", + "goldeneagle.net", + "fta support forums", + "low protein in bloos", + "datring", + "lilwayne", + "free billiards games", + "yahoo", + "ako", + "a.tribalfusion.c script language", + "dustin davis", + "cooking", + "yahoo.com", + "universal studios", + "adult chat", + "santa monica flea market", + "carpevino.us", + "wine vinyard in stewertstown pa", + "y", + "craigslist", + "ups.com", + "1-866-347-3292", + "renegade boats", + "renegade boats", + "sunset state beach caping", + "artofstanlync.org", + "heart-i want make love to you video", + "triangles around the world", + "mycl.cravelyrics.com", + "in the bible what type of persons were forced to walk around in public and say unclean unclean", + "providence water fire", + "googlecom", + "yahoo.com", + "b.g", + "website de rebelde", + "stoplinks", + "allison 2000 transmission", + "thepriceanduseofgasoline.com", + "chamillinaire", + "veryspecialhomescom", + "crashbandicoot", + "a short sex story", + "yahoo.com", + "music now", + "east carolina university", + "vandalism in new york", + "the bainde soleil company", + "dicaprio movies", + "xxx dvds", + "visual basic scripting support", + "english bulldogs", + "travelocity.com", + "website for asstr.org", + "hypnotic slave training", + "pogo", + "university at buffalo addmissions", + "screen name services", + "superdrol", + "art institute", + "online business cards", + "aolfinancial", + "upgrade shop", + "anderson abrasive", + "weatherchannel.com", + "recent", + "ebay", + "diagram and xray of a normal shouldercheck out surgicalpoker.comfor more sports medicine and orthopedic information and images check out emedx.com by dr. allan mishranormal diagram normal x-ray", + "95 mustang gt chips", + "gold grills", + "hap housing in portland or", + "car sales", + "swimming with dolphins", + "jennifer lopez nude", + "wwwdubcnn.com", + "dominicks pizza", + "fl studio", + "http blackplanet .com", + "http blackplanet .com", + "http blackplanet .com", + "A$AP Rocky", + "benie mac", + "fujifilm.com", + "aol dialup setup", + "metal fabrication tools", + "internet", + "buy my painting", + "pulaski va classifieds", + "w.coj.net", + "postopia.com", + "no medical records hydrocodone", + "auto completes for deal or no deal contest", + "http www. big monster dicks .com", + "invacare wheelchairs", + "musicdownload.com", + "president bush", + "heavy equipment", + "inmate information", + "allina.com", + "megan law.gov", + "wwwl.eharmony.com", + "jobs in colombiaoqx0nq", + "beastsex", + "ferguisson", + "heart-i wanna make love to you vedio", + "west georgia university", + "west georgia university", + "hsn", + "bb&t", + "midas realty", + "yahoo", + "mytrip.com", + "donna texas mcdonalds", + "free picture of our lady", + "bubs", + "taken chemo for 5 month's cancer can still be seen on ct scan", + "porn 20video 20clips", + "lake monsters", + "freedj mix vibes", + "myspace.coim", + "la joya school district tx", + "colorado bungee jumping", + "yahoo", + "google.com", + "lafayette co vampire grave", + "ice cube", + "internet", + "tccd.edu", + "google", + "people", + "instructions on putting together a filing cabinet", + "click.babycenter.com", + "90minut", + "ramien noodles", + "lilwayne", + "danni virgin", + "nice sexy girls.com", + "guttural pouch", + "free male masturbating", + "good", + "rotton 20dot.com", + "fox sports", + "seth rogen", + "desb.mspaceads.com", + "betjc.com", + "pictures of quebec", + "gold in quartz", + "evergreen college", + "runescape", + "gastons white river resort", + "sunset beach santa cruz", + "auto parts", + "travelocity", + "myspace.com", + "laptops", + "beyaonce and j", + "free gay ebony knights webcams", + "google", + "derek watson", + "alice in wonderland tshirts", + "hippa p rivacy act", + "down payment mortgage", + "believe it or not", + "mys", + "datatreca", + "onesuite", + "names", + "lil john", + "scales of justice cuff links", + "localsales.com", + "alametris denise lipsey", + "adam for adam", + "flip flops crochet", + "arbors", + "heb hospital", + "myspae.com", + "midevil breast torture", + "askjeeves", + "assparade", + ".comhttp", + "weekly hotels reston virginia", + "noiceinparadise.com", + "pre diabetic diet", + "h.i.m.com", + "myspace", + "myspace", + "wwww.sex.lp.cpm", + "mcso mugshots", + "roush", + "wellfargo", + "lilwayne", + "hopecherie", + "frontgate.com", + "barbados registration department", + "american pitbull", + "free pc full flight simulation game downloads", + "google", + "vaginal secretion grey stuff", + "myspace layouts", + "kanye west", + "walmart", + "pain in hip and leg", + "tenneesseeaquarium.com", + "suncom.com", + "alysseandrachelwerehere", + "pimiclo", + "starmagazine.com", + "classifieds", + "mount rushmore in dakota", + "sams", + "disney com", + "beastyality", + "chief joseph paintings", + "henry scott", + "paris hilton", + "kb903235", + "autotrader", + "irish traveller", + "ajcobs.com", + "art of stanlync.org", + "fox news", + "freeporn", + "depo provera", + "air france", + "talk city active chats", + "codes for the gamecube game resident evil 4", + "good food to eat for sugar diabetes", + "warpmymind", + "arc jacksonville fl", + "7fwww.sendspace.com", + "j blackfoot", + "mcso madison street jail inmate", + "macys", + "eduscapes", + "free picture of our lady", + "http www.eastman.org", + "minneapolisstartribune localnews", + "minneapolisstartribune localnews", + "tennessee", + "foodtown", + "anti virous download", + "http www.mdland rec.net", + "ed edd eddy", + "maryjbilge", + "shipping services", + "baseball videogames", + "egyption ancient coins", + "internet", + "what is sodomy", + "international cub lowboy", + "mary j. bilge", + "scenic backgrounds", + "google.com", + "rosettalangueges.com", + "titanpoker.net", + "titie show", + "edelen realtor", + "lil cim", + "china.com", + "boost mobile", + "nc eipa", + "people's 20pharmacy 20guide 20to", + "costco", + "charles schultz drawings", + "nicisterling", + "a picture of author stephen crane", + "yahoo.com", + "sponge bob myspace layouts", + "g", + "calendar creator", + "careerbuilder.com", + "cool tex for web pages", + "yahoo.com", + "mcdougal littel", + "sign on", + "superman", + "radio", + "lajollaindians.com", + "mike tyson died", + "pink panther", + "lolita newgroups", + "nude girls", + "galveston 20texas", + "gerlach meat co.", + "thetakeover2006.com", + "yahoo", + "simpsons movie", + "saxy", + "yahoo", + "21st century realty", + "new zealand", + "dogs", + "weather", + "free porn sex", + "bugs bunny parties", + "mortal kombat 2 fatalities", + "sea life park hawaii", + "songs for middle school choir", + "rocky mountain jeep", + "householdbank.com", + "birdville isd", + "brutal dildo", + "brutal dildo", + "free live gay cam chats", + "wonder woman", + "ebay com", + "myspace.com", + "boost mobile", + "desktop themes sex", + "myspace.com", + "myspace.com", + "maroon chevy auto dealership", + "beyonce", + "cleopatra vii", + "accountcentralonline.com", + "juvenile", + "the game cock", + "pics of ashland city tennessee", + "coherent deos", + "microwsoft wireless connection", + "best buy", + "southwest airlines", + "southwest airlines", + "pogo games", + "family court record room in brooklyn newyork", + "60.ufc.net", + "us mint", + "people", + "firstcitycreditunion", + "washington mutual careers", + "beyonce", + "tab energy drink", + "http vemmabuilder.com", + "new york state lottery", + "yahoo", + "tmobile", + "yellow pages.com", + "az.central.com", + "pasco auto salvage", + "im help", + "home based businesses", + "studyisland", + "bible study from king james on 1 corinthians chapter 6 verses 18- 20", + "bellevue-ne", + "msn.com", + "aolsignupfree", + "the simsons", + "nevada", + "forsyth central high school", + "road state college", + "does my child have adhd", + "les tucanesde tijuana", + "yahoo.com", + "mexican pharmacy hyrocodone", + "ford motor co year end sales", + "google.com", + "google.com", + "person.com", + "marylyn monroe", + "nfl", + "the hun.net", + "nkena anderson", + "free netscape download", + "top fifty colleges", + "wil.", + "memphis tennessee", + "yahoo mail", + "corrections officer of juveniles", + "jada pinkett smith", + "mapquest.com", + "apartments", + "msn.com", + "msn.com", + "wasco state prison", + "solitaire", + "http", + "freeport seaman center", + "futbol soccer", + "screen names", + "kmov.com", + "survey.otxresearch.com", + "facial shaves", + "gle", + "flw.com", + "seasportboats.com", + "toysrus.com", + "animated sexy graphics", + "colombia", + "unitarian univeralist association", + "fr", + "google video.com", + "660-342-1072", + "suzan-lori parks", + "male facial", + "william bouguereau first kiss how much it is worth", + "streetfighter", + "nick.com", + "wonder woman", + "pentagram", + "mcafee virus protection", + "diary", + "037f34742140a5f761ad51d95180b4f8", + "free porn", + "no deposit casino bonus", + "spongebob the movie myspace layouts", + "on line banking", + "equestrian properties for sale", + "kazaa free muisc download", + "gay truckers", + "24", + "pay-pal", + "www yahoo.com", + "phatazz.white hoes", + "planets of the universe", + "free movies", + "budget rentals special", + "yahoogames", + "talaat pasha", + "mariah carey song lyrics don't forget about us", + "futbol soccer", + "msn groups", + "martha steward", + "martha steward", + "soap opera scoops cbs", + "cingular", + "stuwie", + "womengiving blowjobs", + "hear dancing queen by abba", + "love song", + "fhsaa.org", + "any dvd", + "any dvd", + "gallery.brookeskye.com", + "gibson ranch", + "wachovia com", + "kzg golf information", + "skylight curtains", + "c", + "123freeweblayouts.com", + "yahoo.com", + "allie.com", + "ghosts of bingham cemetery", + "resume maker", + "resume maker", + "resume maker", + "lymphomatoid papulosis", + "sez.com", + }; +} + +class TCodecsTest: public TTestBase { + UNIT_TEST_SUITE(TCodecsTest); + UNIT_TEST(TestPipeline) + UNIT_TEST(TestDelta) + UNIT_TEST(TestHuffman) + UNIT_TEST(TestZStdDict) + UNIT_TEST(TestCompTable) + UNIT_TEST(TestHuffmanLearnByFreqs) + UNIT_TEST(TestSolar) + UNIT_TEST(TestPFor) + UNIT_TEST(TestRegistry) + + UNIT_TEST_SUITE_END(); + +private: + TString PrintError(TStringBuf learn, TStringBuf test, TStringBuf codec, ui32 i) { + TString s; + TStringOutput sout(s); + sout << codec << ": " << i << ", " + << "\n"; + sout << HexEncode(learn.data(), learn.size()); //NEscJ::EscapeJ<true>(learn, sout); + sout << " != \n"; + sout << HexEncode(test.data(), test.size()); //NEscJ::EscapeJ<true>(test, sout); + + if (s.Size() > 1536) { + TString res = s.substr(0, 512); + res.append("...<skipped ").append(ToString(s.size() - 1024)).append(">..."); + res.append(s.substr(s.size() - 512)); + } + + return s; + } + + TStringBuf AsStrBuf(const TBuffer& b) { + return TStringBuf(b.data(), b.size()); + } + + template <typename TCodec, bool testsaveload> + void TestCodec(const TVector<TBuffer>& inlearn = TVector<TBuffer>(), const TVector<TBuffer>& in = TVector<TBuffer>(), NCodecs::TCodecPtr c = new TCodec) { + using namespace NCodecs; + + TBuffer buff; + + { + TVector<TBuffer> out; + + c->Learn(inlearn.begin(), inlearn.end()); + + if (testsaveload) { + { + TBufferOutput bout(buff); + ICodec::Store(&bout, c); + } + + { + TBufferInput bin(buff); + c = ICodec::Restore(&bin); + UNIT_ASSERT(c->AlreadyTrained()); + } + } + + { + size_t insz = 0; + size_t outsz = buff.Size(); + + for (ui32 i = 0; i < inlearn.size(); ++i) { + out.emplace_back(); + c->Encode(AsStrBuf(inlearn[i]), out[i]); + + insz += inlearn[i].Size(); + outsz += out[i].Size(); + } + + TBuffer vecl; + for (ui32 i = 0; i < out.size(); ++i) { + vecl.Clear(); + c->Decode(AsStrBuf(out[i]), vecl); + + UNIT_ASSERT_EQUAL_C(AsStrBuf(inlearn[i]), AsStrBuf(vecl), + PrintError(TStringBuf(inlearn[i].data(), inlearn[i].size()), + TStringBuf(vecl.data(), vecl.size()), c->GetName(), i)); + } + } + } + + { + if (testsaveload) { + TBufferInput bin(buff); + c = ICodec::Restore(&bin); + } + + size_t insz = 0; + size_t outsz = buff.Size(); + + TBuffer out, in1; + for (ui32 i = 0; i < in.size(); ++i) { + out.Clear(); + in1.Clear(); + c->Encode(AsStrBuf(in[i]), out); + insz += in[i].Size(); + outsz += out.Size(); + c->Decode(AsStrBuf(out), in1); + UNIT_ASSERT_EQUAL_C(AsStrBuf(in[i]), AsStrBuf(in1), + PrintError(TStringBuf(in[i].data(), in[i].size()), + TStringBuf(in1.data(), in1.size()), c->GetName(), i)); + } + } + } + + template <class T> + void AppendTo(TBuffer& b, T t) { + b.Append((char*)&t, sizeof(t)); + } + + void TestDelta() { + using namespace NCodecs; + TVector<TBuffer> d; + + // 1. common case + d.emplace_back(); + AppendTo(d.back(), 1ULL); + AppendTo(d.back(), 10ULL); + AppendTo(d.back(), 100ULL); + AppendTo(d.back(), 1000ULL); + AppendTo(d.back(), 10000ULL); + AppendTo(d.back(), 100000ULL); + + // 2. delta overflow + d.emplace_back(); + AppendTo(d.back(), 1ULL); + AppendTo(d.back(), 10ULL); + AppendTo(d.back(), 100ULL); + AppendTo(d.back(), 1000ULL); + AppendTo(d.back(), (ui64)-100LL); + AppendTo(d.back(), (ui64)-10ULL); + + // 3. bad sorting + d.emplace_back(); + AppendTo(d.back(), 1ULL); + AppendTo(d.back(), 10ULL); + AppendTo(d.back(), 1000ULL); + AppendTo(d.back(), 100ULL); + AppendTo(d.back(), 10000ULL); + AppendTo(d.back(), 100000ULL); + + // all bad + d.emplace_back(); + AppendTo(d.back(), -1LL); + AppendTo(d.back(), -1LL); + AppendTo(d.back(), -1LL); + AppendTo(d.back(), -1LL); + + TestCodec<TDeltaCodec<ui64, true>, false>(d); + TestCodec<TDeltaCodec<ui64, false>, false>(d); + } + + void TestPFor() { + using namespace NCodecs; + { + TVector<TBuffer> d; + d.emplace_back(); + AppendTo(d.back(), -1LL); + AppendTo(d.back(), -1LL); + AppendTo(d.back(), -1LL); + AppendTo(d.back(), -1LL); + d.emplace_back(); + AppendTo(d.back(), 0LL); + AppendTo(d.back(), 1LL); + AppendTo(d.back(), 2LL); + AppendTo(d.back(), 1LL); + AppendTo(d.back(), 0LL); + AppendTo(d.back(), 1LL); + AppendTo(d.back(), 2LL); + d.emplace_back(); + AppendTo(d.back(), 0LL); + AppendTo(d.back(), 1LL); + AppendTo(d.back(), 2LL); + AppendTo(d.back(), 1LL); + AppendTo(d.back(), -1LL); + AppendTo(d.back(), 0LL); + AppendTo(d.back(), 1LL); + AppendTo(d.back(), 2LL); + d.emplace_back(); + AppendTo(d.back(), 0LL); + AppendTo(d.back(), -1LL); + AppendTo(d.back(), -2LL); + AppendTo(d.back(), -1LL); + AppendTo(d.back(), -2LL); + AppendTo(d.back(), -1LL); + AppendTo(d.back(), 0LL); + AppendTo(d.back(), -1LL); + AppendTo(d.back(), -2LL); + + TestCodec<TPForCodec<ui64>, false>(d); + TestCodec<TPForCodec<ui64, true>, true>(d); + } + { + TVector<TBuffer> d; + d.emplace_back(); + AppendTo(d.back(), -1); + AppendTo(d.back(), -1); + AppendTo(d.back(), -1); + AppendTo(d.back(), -1); + d.emplace_back(); + AppendTo(d.back(), 0); + AppendTo(d.back(), 1); + AppendTo(d.back(), 2); + AppendTo(d.back(), 1); + AppendTo(d.back(), -1); + AppendTo(d.back(), 0); + AppendTo(d.back(), 1); + AppendTo(d.back(), 2); + d.emplace_back(); + AppendTo(d.back(), 0); + AppendTo(d.back(), -1); + AppendTo(d.back(), -2); + AppendTo(d.back(), -1); + AppendTo(d.back(), -2); + AppendTo(d.back(), -1); + AppendTo(d.back(), 0); + AppendTo(d.back(), -1); + AppendTo(d.back(), -2); + + TestCodec<TPForCodec<ui32>, false>(d); + TestCodec<TPForCodec<ui32, true>, false>(d); + } + { + TVector<TBuffer> d; + d.emplace_back(); + for (auto& textValue : TextValues) { + AppendTo(d.back(), (ui32)strlen(textValue)); + } + + TestCodec<TPForCodec<ui32>, false>(d); + TestCodec<TPForCodec<ui32, true>, false>(d); + } + { + TVector<TBuffer> d; + d.emplace_back(); + for (auto& textValue : TextValues) { + AppendTo(d.back(), (ui64)strlen(textValue)); + } + + TestCodec<TPForCodec<ui64>, false>(d); + TestCodec<TPForCodec<ui64, true>, false>(d); + } + } + + template <class TCodec> + void DoTestSimpleCodec() { + using namespace NCodecs; + { + TVector<TBuffer> learn; + + for (auto& textValue : TextValues) { + learn.emplace_back(textValue, strlen(textValue)); + } + + TestCodec<TCodec, true>(learn); + } + { + TestCodec<TCodec, true>(); + } + + { + TVector<TBuffer> learn; + learn.emplace_back(); + learn.back().Append('a'); + + TVector<TBuffer> test; + test.emplace_back(); + for (ui32 i = 0; i < 256; ++i) { + test.back().Append((ui8)i); + } + + TestCodec<TCodec, true>(learn, test); + } + + { + TVector<TBuffer> learn; + learn.emplace_back(); + for (ui32 i = 0; i < 256; ++i) { + for (ui32 j = 0; j < i; ++j) { + learn.back().Append((ui8)i); + } + } + + TVector<TBuffer> test; + test.emplace_back(); + for (ui32 i = 0; i < 256; ++i) { + test.back().Append((ui8)i); + } + + TestCodec<TCodec, true>(learn, test); + } + + { + TVector<TBuffer> learn; + learn.emplace_back(); + for (ui32 i = 0; i < 128; ++i) { + for (ui32 j = 0; j < i; ++j) { + learn.back().Append((ui8)i); + } + } + + TVector<TBuffer> test; + test.emplace_back(); + for (ui32 i = 128; i < 256; ++i) { + test.back().Append((ui8)i); + } + + TestCodec<TCodec, true>(learn, test); + } + } + + void TestHuffman() { + DoTestSimpleCodec<NCodecs::THuffmanCodec>(); + } + + void TestZStdDict() { + using namespace NCodecs; + { + TVector<TBuffer> learn; + + for (auto& textValue : TextValues) { + learn.emplace_back(textValue, strlen(textValue)); + } + + TestCodec<TZStdDictCodec, true>(learn); + } + + } + + void TestCompTable() { + DoTestSimpleCodec<NCodecs::TCompTableCodec>(); + } + + void TestHuffmanLearnByFreqs() { + using namespace NCodecs; + + TVector<TBuffer> data; + + for (auto& textValue : TextValues) { + data.emplace_back(textValue, strlen(textValue)); + } + + TVector<TBuffer> outLearn; + + { + THuffmanCodec codec; + static_cast<ICodec&>(codec).Learn(data.begin(), data.end()); + + for (ui32 i = 0; i < data.size(); ++i) { + outLearn.emplace_back(); + codec.Encode(AsStrBuf(data[i]), outLearn[i]); + } + } + + TVector<TBuffer> outLearnByFreqs; + + { + THuffmanCodec codec; + std::pair<char, ui64> freqs[256]; + + for (size_t i = 0; i < Y_ARRAY_SIZE(freqs); ++i) { + freqs[i].first = (char)i; + freqs[i].second = 0; + } + + for (auto& textValue : TextValues) { + size_t len = strlen(textValue); + for (size_t j = 0; j < len; ++j) { + ++freqs[(ui32)(0xFF & textValue[j])].second; + } + } + + codec.LearnByFreqs(TArrayRef<std::pair<char, ui64>>(freqs, Y_ARRAY_SIZE(freqs))); + + for (ui32 i = 0; i < data.size(); ++i) { + outLearnByFreqs.emplace_back(); + codec.Encode(AsStrBuf(data[i]), outLearnByFreqs[i]); + } + } + + UNIT_ASSERT_EQUAL(outLearn.size(), outLearnByFreqs.size()); + const size_t sz = outLearn.size(); + for (size_t n = 0; n < sz; ++n) { + UNIT_ASSERT_EQUAL(AsStrBuf(outLearn[n]), AsStrBuf(outLearnByFreqs[n])); + } + } + + void TestSolar() { + using namespace NCodecs; + { + TVector<TBuffer> learn; + + for (auto& textValue : TextValues) { + learn.emplace_back(textValue, strlen(textValue)); + } + + TestCodec<TSolarCodec, true>(learn, TVector<TBuffer>(), new TSolarCodec(512, 8)); + TestCodec<TAdaptiveSolarCodec, false>(learn, TVector<TBuffer>(), new TAdaptiveSolarCodec(512, 8)); + TestCodec<TAdaptiveSolarCodec, true>(learn, TVector<TBuffer>(), new TAdaptiveSolarCodec(512, 8)); + TestCodec<TSolarCodecShortInt, true>(learn, TVector<TBuffer>(), new TSolarCodecShortInt(512, 8)); + } + { + TestCodec<TSolarCodec, true>(TVector<TBuffer>(), TVector<TBuffer>(), new TSolarCodec(512, 8)); + TestCodec<TAdaptiveSolarCodec, false>(TVector<TBuffer>(), TVector<TBuffer>(), new TAdaptiveSolarCodec(512, 8)); + TestCodec<TAdaptiveSolarCodec, true>(TVector<TBuffer>(), TVector<TBuffer>(), new TAdaptiveSolarCodec(512, 8)); + TestCodec<TSolarCodecShortInt, true>(TVector<TBuffer>(), TVector<TBuffer>(), new TSolarCodecShortInt(512, 8)); + } + + { + TVector<TBuffer> learn; + learn.emplace_back(); + learn.back().Append('a'); + + TVector<TBuffer> test; + test.emplace_back(); + for (ui32 i = 0; i < 256; ++i) { + test.back().Append((ui8)i); + } + + TestCodec<TSolarCodec, true>(learn, test, new TSolarCodec(512, 8)); + TestCodec<TAdaptiveSolarCodec, false>(learn, test, new TAdaptiveSolarCodec(512, 8)); + TestCodec<TAdaptiveSolarCodec, true>(learn, test, new TAdaptiveSolarCodec(512, 8)); + TestCodec<TSolarCodecShortInt, true>(learn, test, new TSolarCodecShortInt(512, 8)); + } + + { + TVector<TBuffer> learn; + learn.emplace_back(); + for (ui32 i = 0; i < 256; ++i) { + for (ui32 j = 0; j < i; ++j) { + learn.back().Append((ui8)i); + } + } + + TVector<TBuffer> test; + test.emplace_back(); + for (ui32 i = 0; i < 256; ++i) { + test.back().Append((ui8)i); + } + + TestCodec<TSolarCodec, true>(learn, test, new TSolarCodec(512, 8)); + TestCodec<TAdaptiveSolarCodec, false>(learn, test, new TAdaptiveSolarCodec(512, 8)); + TestCodec<TAdaptiveSolarCodec, true>(learn, test, new TAdaptiveSolarCodec(512, 8)); + TestCodec<TSolarCodecShortInt, true>(learn, test, new TSolarCodecShortInt(512, 8)); + } + } + + void TestPipeline() { + using namespace NCodecs; + { + TVector<TBuffer> learn; + learn.emplace_back(); + for (ui32 i = 0; i < 256; ++i) { + for (i32 j = i; j >= 0; --j) { + learn.back().Append((ui8)j); + } + } + + TVector<TBuffer> test; + test.emplace_back(); + for (ui32 i = 0; i < 256; ++i) { + test.back().Append((ui8)i); + } + + TestCodec<TPipelineCodec, true>(learn, test, + new TPipelineCodec(new TSolarCodec(512, 8), new TSolarCodec(512, 8), new THuffmanCodec)); + } + { + TVector<TBuffer> d; + d.emplace_back(); + for (ui32 i = 0; i < 256; ++i) { + for (i32 j = i; j >= 0; --j) { + d.back().Append(i * i); + } + } + + TestCodec<TPipelineCodec, false>(d, TVector<TBuffer>(), + new TPipelineCodec(new TDeltaCodec<ui32, false>, new TPForCodec<ui32>)); + } + } + + void TestRegistry() { + using namespace NCodecs; + TVector<TString> vs = ICodec::GetCodecsList(); + for (const auto& v : vs) { + TCodecPtr p = ICodec::GetInstance(v); + if (v == "none") { + UNIT_ASSERT(!p); + continue; + } + UNIT_ASSERT_C(!!p, v); + UNIT_ASSERT_C(TStringBuf(v).Head(3) == TStringBuf(p->GetName()).Head(3), v + " " + p->GetName()); + } + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TCodecsTest) diff --git a/library/cpp/codecs/ut/float_huffman_ut.cpp b/library/cpp/codecs/ut/float_huffman_ut.cpp new file mode 100644 index 0000000000..3156fb1f46 --- /dev/null +++ b/library/cpp/codecs/ut/float_huffman_ut.cpp @@ -0,0 +1,237 @@ +#include <library/cpp/codecs/float_huffman.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/stream/format.h> +#include <util/stream/output.h> +#include <library/cpp/string_utils/base64/base64.h> + +namespace fh = NCodecs::NFloatHuff; + +Y_UNIT_TEST_SUITE(FloatHuffmanTest) { + static const float Factors[] = { + 0.340582, 0.000974026, 0.487168, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0.411765, 0.921569, + 0.00390625, 0.109371, 0, 1, 0, 0, 0, 0, 0.523322, 0, 1, 0, 0, 0, 0, 0.285714, 1, + 0.008253, 1, 0, 0, 0.00993935, 0.450213, 0.000974026, 1, 1, 1, 1, 0, 0, 0.20564, + 0.97561, 0.913896, 1, 1, 0, 1, 0, 0, 0.5, 0, 0, 0, 0.1, 1, 0, 0, 0, 0, 0, 0.450923, + 0, 0.5, 0, 0, 0.20564, 0, 0.5, 0, 0, 0.20564, 0, 0, 0.0313726, 0, 1, 1, 1, 0.363636, + 0.5, 0.686073, 0.45121, 0.00574382, 0.366166, 0.413295, 1, 1, 1, 0, 0, 0, 0, 0.160784, + 0, 0.937255, 0.537255, 0.133333, 0, 0, 0, 0, 0.00392157, 0, 0.333333, 0.027451, 0.0156863, + 1, 0.105882, 1, 0.00220908, 0.000112501, 0.0111262, 0.102384, 0.00140808, 0.123581, + 0.29308, 6.57282e-06, 0.00489498, 2.10209e-05, 0.00140559, 5.907e-06, 0, 0.559322, + 0.559322, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0.794765, 0, + 0.352648, 0.225904, 1, 0.047619, 0.0107276, 0.399461, 0.0304838, 0.292932, 0.00969929, + 0, 0, 0.886904, 0.714693, 0, 0.00223213, 0.000544069, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0.00507403, 0, 0, 0, 0, 0, 0.875, 0, 0, 1, 1, 1, 0, 0.20564, 0, 0.00176048, 0, + 0.000440121, 0, 0, 0, 0.000974026, 0.487168, 0, 0, 0.533333, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 0, 1, 0, 0, 0.723187, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 0.206882, 0.00483367, 0.792983, 0.00126106, 1, 0.0313726, 0.470588, + 0.254902, 0.188235, 0.188235, 0.388235, 0.164706, 0, 0.870588, 0.843137, 0.635294, + 0.384314, 0.384314, 0.643137, 0, 0, 0, 0, 0, 0, 0, 0, 0.541176, 0, 0.541176, 0, 0, + 0.0532634, 1, 0, 0, 0, 0.015044, 1, 0, 1, 1, 1, 0.47451, 0.329412, 0.964706, 0, 0, + 0, 0, 0, 0.5, 0, 0, 0, 0, 0, 0, 0.0941176, 0.970588, 0.970588, 0, 0.970588, 0.97561, + 0, 0.0431373, 0.47451, 0.329412, 0.964706, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0.231373, 0.00392157, 0, 0, 0, 0.054902, 0, 0, + 1, 0, 0, 0.0235294, 0, 1, 0, 0, 0, 0, 0.34902, 0.0352941, 0.925379, 0.623681, 0, + 0.954543, 0, 0, 0.00102756, 0.709804, 0.498039, 0.0901961, 0.631373, 0.847059, 0.270588, + 0.0156863, 0.133333, 0.980392, 1e-12, 1e-12, 1e-12, 1e-12, 0.497159, 0, 0.407487, + 0, 0, 0, 0.00392157, 0.00202156, 0.046875, 0.187159, 0.046875, 0.15625, 0.434232, + 0.15625, 0, 2.95083e-07, 0.20564, 0.20564, 0.97561, 0.913896, 0, 0, 0, 0, 0, 0, 0.00784314, + 0, 0.695525, 1, 0.07205, 0, 0, 0.176471, 0, 0, 0, 1, 1, 0.98, 0.01, 0.01, 0, 0.00690702, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.29078, 0.29078, 1, 0, 0, 0, 0, 0.192157, 0.188235, + 0.0941176, 0, 0.0313726, 0, 0.141176, 0.207843, 0.0901961, 0.00784314, 0.0784314, + 0, 0, 0, 0, 0, 0.203922, 0.0196078, 0.34902, 0.0235294, 0.0980392, 0.164706, 0.133333, + 0.368627, 0, 0.0941176, 0, 1, 0.313726, 0, 0, 0.433582, 0.384508, 0.0532186, 0.0833333, + 0.01609, 0, 1, 0, 0, 0, 0.0666667, 0, 0, 0, 0, 1, 0, 0.564706, 0.501961, 0, 0, 0, + 0, 0, 0.0516447, 0.000173065, 0, 0, 0, 0, 0, 0, 0, 0.996309, 0, 0, 0.00392157, 1, + 0, 0.01, 0, 0, 0, 0, 0, 0.439505, 0.206882, 0.206882, 0.260891, 0, 0.875, 0, 0, 0, + 0, 0, 0.185657, 1, 1, 0, 0, 0, 0.0332647, 0.206106, 0.0688878, 0.239216, 0, 0, 0, + 0, 0.054902, 0, 0.101961, 0.160784, 0.180392, 0, 0.737828, 0, 0, 0.875, 0.0142566, + 0, 0.662745, 1, 0, 0, 0, 0.225806, 0.99992, 0.631373, 0.00392157, 1, 0, 0.143647, + 0.00270085, 1, 0.231482, 0.246735, 0.0428062, 0, 0, 1, 0, 0.186441, 0.0115358, 0, + 0.221762, 0, 0.2, 0, 0.0156863, 0, 0, 0, 0.976471, 0, 0.231373, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0.00392157, 0.00392157, 0.0666667, 0, 0, 0, 0, 0.0117647, 0.580392, 0.98737, + 1, 1, 1, 0, 0, 0, 0.153, 0.847, 0.931373, 0.94697, 0.94697, 0, 0.946294, 0.408118, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0.99992, 0.97561, 0, 0, 0, 0, 0, 0, + 0.274677, 0.153017, 0, 0.642356, 0, 0, 0.1, 0, 0, 0, 0, 0.327944, 0.327944, 0, 0, + 0.815686, 0, 0, 0, 0, 0.206106, 0.439126, 0, 0, 0, 0, 0, 1, 1, 1, 0.00392157, 0.232788, + 0.232465, 0.999899, 0.00309296, 0.0636097, 0.445954, 0.156863, 0, 0, 0, 0, 0, 0, + 0.3796, 0.0784, 0.0651664, 0, 0, 0.254902, 0.266667, 1, 0, 0, 0, 0, 0, 0.596073, + 0.517876, 0.145833, 0.372549, 0, 0.991667, 0.602125, 0.161979, 0, 0, 0, 0, 0.0255146, + 0.947855, 0, 0, 0, 0, 0, 0, 0, 0, 0.847059, 0.679841, 0, 0.156863, 0, 0, 1, 0, 0, + 0, 0, 0.969697, 0, 0, 0.564706, 0, 0, 0, 0, 0, 1, 0.0367282, 0.0395228, 0, 0, 0, + 0, 0, 0.0470588, 0.141176, 0.054902, 0, 0, 0, 0}; + static const size_t FactorCount = Y_ARRAY_SIZE(Factors); + + static const ui8 CodedFactors[] = { + 0x24, 0x06, 0x73, 0xB5, 0xC7, 0x55, 0x7F, 0x3A, 0xB4, 0x70, 0xCB, 0xEF, 0xEE, 0xFE, 0xB3, 0x5B, + 0x5A, 0x1A, 0x93, 0x5F, 0x5F, 0x13, 0x00, 0x00, 0x10, 0x00, 0x3D, 0xEF, 0xFF, 0xEE, 0x0F, 0xDC, + 0xF0, 0xAB, 0x3F, 0x37, 0x92, 0x24, 0x5D, 0x5E, 0xDE, 0x1C, 0xF8, 0x12, 0x15, 0x5B, 0x84, 0x51, + 0x82, 0xE6, 0xF6, 0xB8, 0xEA, 0x4F, 0xC7, 0xDD, 0x7D, 0x2E, 0x4D, 0x4A, 0x21, 0xCA, 0xE0, 0xC4, + 0x2E, 0xEA, 0xD3, 0xBD, 0x0F, 0x00, 0x00, 0xE0, 0xDA, 0xCC, 0xCC, 0xEC, 0x9F, 0x61, 0xDF, 0xE6, + 0x01, 0x00, 0x00, 0xCC, 0xA5, 0x49, 0xA9, 0x00, 0x00, 0x00, 0xE6, 0xD2, 0xA4, 0xD4, 0xEA, 0x08, + 0x08, 0xD0, 0xDD, 0xF9, 0xE7, 0xA2, 0x0B, 0x00, 0x00, 0x40, 0xD8, 0x13, 0x7D, 0xFE, 0x13, 0x9C, + 0x9B, 0xA8, 0x36, 0xBC, 0x00, 0x90, 0x43, 0x6F, 0x97, 0x67, 0x9B, 0xD3, 0xEE, 0xFE, 0x84, 0x24, + 0x25, 0x89, 0xC9, 0xBF, 0x3F, 0x58, 0x4C, 0x4C, 0xCA, 0x21, 0x22, 0xBC, 0x39, 0x08, 0x08, 0x08, + 0x40, 0x7E, 0xAA, 0xAA, 0xCA, 0x75, 0x70, 0x70, 0xE9, 0x08, 0x08, 0xE8, 0x9A, 0x8A, 0x8D, 0xED, + 0xA6, 0x8D, 0x31, 0x04, 0x00, 0x96, 0xD0, 0x7D, 0x1D, 0x47, 0xAA, 0x2A, 0xD9, 0x28, 0xAD, 0x6B, + 0xB4, 0x9D, 0x7A, 0xC4, 0xD5, 0xD1, 0x04, 0x8C, 0x7E, 0x56, 0x3A, 0x58, 0x5A, 0x0C, 0x46, 0x6E, + 0x1B, 0x53, 0xC2, 0x0C, 0x14, 0x00, 0xAB, 0x60, 0x05, 0x7B, 0x63, 0x8D, 0x77, 0x70, 0x75, 0xAC, + 0x2F, 0x8D, 0xB1, 0x4D, 0xA0, 0xFB, 0xF2, 0x40, 0xF7, 0xE5, 0x7F, 0xDF, 0xDD, 0xFD, 0xBB, 0x1B, + 0xB8, 0x75, 0x9B, 0x47, 0x8E, 0xB4, 0x0C, 0x9B, 0x3A, 0x73, 0x25, 0x61, 0x18, 0x92, 0xD1, 0xC2, + 0x2F, 0x3C, 0x31, 0x64, 0x96, 0x2A, 0xB9, 0xF9, 0x7C, 0xD9, 0xAF, 0x94, 0xC5, 0xE9, 0x1E, 0x63, + 0x24, 0x0C, 0x03, 0x7F, 0xD8, 0x5B, 0xB3, 0x1D, 0x49, 0x02, 0x00, 0xAB, 0xFD, 0xE9, 0xA0, 0xF3, + 0xBF, 0xC9, 0x40, 0x64, 0x0A, 0xC0, 0xC7, 0x00, 0x00, 0x60, 0x77, 0xCF, 0xA5, 0x49, 0xA9, 0x16, + 0xFD, 0xD7, 0x5C, 0xA7, 0x55, 0x00, 0x36, 0xCF, 0xB9, 0x3D, 0xAE, 0xFA, 0xD3, 0xA1, 0x85, 0x5B, + 0xFE, 0x60, 0x10, 0x11, 0xFF, 0xF7, 0x7D, 0x38, 0x59, 0x24, 0xFF, 0xFF, 0xDF, 0x13, 0x1C, 0x7B, + 0xCA, 0x1C, 0x1E, 0xF3, 0x04, 0xC0, 0x78, 0x07, 0x58, 0x7B, 0xA2, 0x54, 0xAA, 0xE3, 0xEA, 0x08, + 0x08, 0xC0, 0x74, 0x78, 0x78, 0x88, 0x50, 0x50, 0xD8, 0x0A, 0x0C, 0xC4, 0x56, 0x60, 0x20, 0xF6, + 0x1A, 0x1B, 0x33, 0x16, 0x15, 0xA5, 0xB8, 0xED, 0xED, 0x22, 0xF5, 0xF5, 0x09, 0xA1, 0xA2, 0x42, + 0x67, 0x62, 0x62, 0x3A, 0x13, 0x13, 0x0B, 0xA0, 0xA4, 0xF4, 0x0F, 0x06, 0x15, 0x35, 0x18, 0x54, + 0xD4, 0x35, 0x57, 0x45, 0xCB, 0x2F, 0x39, 0xF6, 0xEC, 0xBC, 0xBB, 0x53, 0x5F, 0x5E, 0x9E, 0xB1, + 0xA8, 0xA8, 0x28, 0xDF, 0xDE, 0x3E, 0x00, 0x00, 0x80, 0x5F, 0x75, 0x81, 0x81, 0x51, 0x1D, 0x1E, + 0xA2, 0x3A, 0x3C, 0x8C, 0xEA, 0xF0, 0x10, 0x51, 0x06, 0x67, 0xED, 0x85, 0x85, 0xA1, 0xBE, 0xBC, + 0x3C, 0x63, 0x51, 0x51, 0x51, 0xBE, 0xBD, 0xFD, 0xFF, 0xFD, 0xFE, 0xCE, 0x85, 0x76, 0x36, 0x73, + 0x10, 0x10, 0x10, 0x80, 0xEB, 0x3A, 0x38, 0xD8, 0xBE, 0xD4, 0x05, 0x06, 0xEE, 0x4F, 0x60, 0x59, + 0x59, 0x65, 0x84, 0x84, 0xC0, 0x46, 0xCB, 0x19, 0x7F, 0x4C, 0xFD, 0xC8, 0x9D, 0x8B, 0xB6, 0x31, + 0xAF, 0x86, 0x3A, 0xF0, 0x6D, 0x6D, 0x11, 0xDF, 0xDF, 0x5F, 0x79, 0x71, 0x71, 0x85, 0xD4, 0xD0, + 0x10, 0xB9, 0xB1, 0x11, 0x1A, 0x54, 0x54, 0xE9, 0x08, 0x08, 0x48, 0x39, 0x44, 0x04, 0x84, 0xAF, + 0xAF, 0x96, 0x99, 0x97, 0x71, 0xC5, 0x32, 0xF3, 0x32, 0xAE, 0x58, 0x66, 0x5E, 0xC6, 0x15, 0xCB, + 0xCC, 0xCB, 0xB8, 0x42, 0xD0, 0x45, 0xFF, 0x1C, 0x11, 0x85, 0xBE, 0x39, 0x08, 0x08, 0x08, 0x80, + 0x69, 0xC2, 0x47, 0x00, 0x80, 0x02, 0x00, 0x00, 0x91, 0xD3, 0xF4, 0x47, 0x01, 0x00, 0x80, 0x08, + 0x00, 0x00, 0x42, 0xD4, 0x29, 0x6F, 0x02, 0x00, 0x80, 0xB4, 0xE6, 0x6B, 0x9E, 0x34, 0x5C, 0x9A, + 0x94, 0xE2, 0xD2, 0xA4, 0x14, 0xA2, 0x0C, 0x4E, 0xEC, 0xA2, 0x3E, 0x7F, 0x39, 0x08, 0x08, 0x10, + 0x6E, 0x6F, 0x10, 0xD7, 0x79, 0xC7, 0xC9, 0x09, 0x4D, 0x4B, 0x73, 0x77, 0x84, 0x14, 0xAE, 0x52, + 0xE1, 0x7A, 0x44, 0x2A, 0x5C, 0x8F, 0x34, 0x93, 0xA8, 0xC4, 0x01, 0xF8, 0x3F, 0x3D, 0xC2, 0x29, + 0xE9, 0x11, 0x4E, 0xE9, 0x4F, 0x67, 0x62, 0x22, 0xB6, 0x02, 0x03, 0xA9, 0x2E, 0x30, 0x70, 0x75, + 0x04, 0x04, 0xC8, 0x38, 0x48, 0x08, 0x32, 0x53, 0x53, 0x29, 0x2F, 0x2E, 0xAE, 0x1C, 0x04, 0x04, + 0x50, 0x52, 0x50, 0xD0, 0x4F, 0x77, 0x68, 0x28, 0x99, 0x08, 0x0A, 0x4A, 0x60, 0x59, 0x59, 0xA9, + 0x0B, 0x0C, 0xAC, 0xC7, 0xC8, 0xC8, 0x8C, 0x45, 0x45, 0xA1, 0x1C, 0x22, 0x02, 0x5D, 0x79, 0x79, + 0xAB, 0x2E, 0x30, 0x70, 0xA7, 0x2C, 0x28, 0xE8, 0xB4, 0xF3, 0xEF, 0x26, 0x8F, 0x37, 0xB1, 0xFE, + 0xEE, 0x67, 0xA9, 0xA9, 0xAA, 0xAA, 0x6C, 0x79, 0x1E, 0xEC, 0xD7, 0x46, 0x44, 0xC4, 0xF7, 0xF8, + 0x24, 0x24, 0x00, 0x42, 0x40, 0xF8, 0x5A, 0x96, 0x38, 0x65, 0x91, 0xF1, 0x6A, 0x72, 0xFE, 0x68, + 0xC3, 0xE1, 0x37, 0x07, 0x01, 0x01, 0x01, 0xF0, 0x52, 0xE1, 0x7A, 0xE4, 0xB3, 0xD9, 0x20, 0x9C, + 0xE0, 0xD8, 0x53, 0x04, 0xC7, 0x9E, 0x82, 0x02, 0x27, 0x2B, 0x06, 0x00, 0x00, 0x9F, 0xDE, 0x1C, + 0x3E, 0xEE, 0xD7, 0x48, 0x20, 0x04, 0xD2, 0x35, 0x4C, 0x29, 0x43, 0x45, 0x23, 0x15, 0xEA, 0xE9, + 0x5E, 0xD7, 0xC1, 0xC1, 0xAA, 0x3B, 0x34, 0x34, 0x21, 0x49, 0x49, 0xE8, 0x8A, 0x8B, 0x13, 0x66, + 0x12, 0xE7, 0x31, 0x00, 0x00, 0x90, 0x84, 0x94, 0x69, 0x05, 0xD4, 0xD4, 0xF4, 0x13, 0x36, 0xE7, + 0x0C, 0x09, 0xEB, 0xBF, 0x90, 0x1A, 0x1A, 0xE6, 0x20, 0x20, 0x20, 0x00, 0x9E, 0x33, 0x18, 0x13, + 0xA6, 0x2F, 0x40, 0x0C, 0x00, 0x4E, 0xCF, 0x84, 0x36, 0x6A, 0xA0, 0xF2, 0xA9, 0x63, 0xD5, 0xCB, + 0x9E, 0x64, 0xEA, 0x3E, 0xF2, 0x14, 0xA0, 0x27, 0x29, 0x2B, 0xC6, 0xB2, 0x99, 0x99, 0xA9, 0x74, + 0x04, 0x04, 0x3C, 0x0A, 0xD0, 0xCF, 0x5C, 0x68, 0x67, 0xFB, 0xDF, 0x1C, 0x04, 0x04, 0x04, 0xC0, + 0x1C, 0x04, 0x04, 0x04, 0x40, 0x1B, 0x11, 0x11, 0x5F, 0xEA, 0x02, 0x03, 0xE1, 0x92, 0x94, 0x84, + 0x90, 0x88, 0xD9, 0xDD, 0x4F, 0x04, 0x56, 0x0E, 0xD1, 0x9F, 0x1A, 0x31, 0x3B, 0x37, 0x47, 0xA0, + 0x6C, 0x82, 0x40, 0xD9, 0x24, 0x9A, 0x02, 0x12, 0x62, 0xD3, 0x43, 0xFF, 0xBF, 0x8F, 0x84, 0xF5, + 0x1F, 0x51, 0x06, 0xE7, 0x0F, 0xDD, 0x89, 0x32, 0xFB, 0x60, 0x39, 0x0A, 0x71, 0x71, 0xB4, 0x36, + 0x33, 0x33, 0x3F, 0x8F, 0xD0, 0x4F, 0x79, 0x84, 0x7E, 0xBA, 0xC8, 0x0C, 0x0D, 0x4F, 0xBA, 0x86, + 0x29, 0x82, 0x54, 0x83, 0x7F, 0x77, 0x37, 0x07, 0x01, 0x01, 0x01, 0xA0, 0xFE, 0x97, 0x1B, 0x9D, + 0x16, 0xDC, 0x90, 0x58, 0xFE, 0x9B, 0x42, 0xB3, 0x4A, 0x00, 0x68, 0x73, 0x91, 0x20, 0x2B, 0xA8, + 0xC8, 0x29, 0x0B, 0x0A, 0xF2, 0xD3, 0x5D, 0x4B, 0x58, 0x5D, 0x20, 0x41, 0xD5, 0xBE, 0xAE, 0x70, + 0x88, 0x50, 0x50, 0x20, 0x4A, 0x44, 0xF4, 0x8F, 0xF7, 0x60, 0x22, 0x30, 0x9C, 0x24, 0xFE, 0x54, + 0x55, 0xD0, 0xD7, 0xD7, 0x37, 0x1A, 0xEF, 0x6E, 0xBC, 0x9B, 0x44, 0x39, 0xDD, 0x5D, 0xF2, 0xF2, + 0x7F, 0x20, 0x1A, 0x81, 0x9A, 0xCA, 0xBF, 0xC8, 0x8D, 0x8D, 0xC2, 0x83, 0x82, 0xA7, 0x2C, 0x28, + 0xC8, 0xFE, 0x08, 0xC2, 0x07, 0xC7, 0x27, 0x21, 0xE1, 0xBB, 0x3E, 0xC1, 0x59, 0x68, 0xAA, 0x78, + 0xC8, 0x57, 0x5D, 0x60, 0x20, 0xC6, 0x41, 0x42, 0xE8, 0x3A, 0x38, 0xD8, 0x9B, 0xFF, 0xFF, 0xFF, + 0xC4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + static const size_t CodedSize = Y_ARRAY_SIZE(CodedFactors); + static const TStringBuf CodedFactorsBuf(reinterpret_cast<const char*>(CodedFactors), CodedSize); + + void FillWithGarbage(float* factors, size_t count) { + void* data = static_cast<void*>(factors); + memset(data, 0xAA, sizeof(float) * count); + } + + // Helper for dumping compressed values + void PrintCompressed(const TVector<ui8>& codedFactors) { + for (size_t i = 0; i < codedFactors.size(); ++i) { + if (i % 0x10 == 0) + Cerr << Endl; + Cerr << Hex(codedFactors[i]) << ", "; + } + Cerr << Endl; + } + + // Helper for dumping decompressed values + void PrintDecompressed(const TVector<float>& factors) { + TStringStream result; + TStringStream line; + + for (size_t i = 0; i < factors.size(); ++i) { + line << factors[i] << ", "; + if (line.Str().size() > 80) { + result << line.Str() << Endl; + line.Clear(); + } + } + Cerr << result.Str() << Endl; + } + + Y_UNIT_TEST(TestCompress) { + const auto codedFactors = fh::Encode(Factors); + UNIT_ASSERT_VALUES_EQUAL(codedFactors.size(), CodedSize); + for (size_t i = 0; i < Min(codedFactors.size(), CodedSize); ++i) + UNIT_ASSERT_VALUES_EQUAL((ui8)codedFactors[i], CodedFactors[i]); + //PrintCompressed(codedFactors); + } + + Y_UNIT_TEST(TestSimpleDecompress) { + TVector<float> factors = fh::Decode(CodedFactorsBuf); + UNIT_ASSERT_VALUES_EQUAL(factors.size(), FactorCount); + for (size_t i = 0; i < Min(factors.size(), FactorCount); ++i) + UNIT_ASSERT_VALUES_EQUAL(factors[i], Factors[i]); + //PrintDecompressed(factors); + } + + Y_UNIT_TEST(TestDecompressInParts) { + float factors[FactorCount]; + FillWithGarbage(factors, FactorCount); + fh::TDecoder decoder(CodedFactorsBuf); + const size_t firstPack = 100; + // unpack first pack + UNIT_ASSERT_VALUES_EQUAL(decoder.Decode({factors, firstPack}), firstPack); + // unpack all the rest + UNIT_ASSERT_VALUES_EQUAL(decoder.Decode({factors + firstPack, FactorCount - firstPack}), FactorCount - firstPack); + + for (size_t i = 0; i < FactorCount; ++i) + UNIT_ASSERT_VALUES_EQUAL(factors[i], Factors[i]); + //PrintDecompressed(factors); + } + + Y_UNIT_TEST(TestSkip) { + float factors[FactorCount]; + FillWithGarbage(factors, FactorCount); + fh::TDecoder decoder(CodedFactorsBuf); + const size_t firstPack = 100; + // unpack first pack + UNIT_ASSERT_VALUES_EQUAL(decoder.Decode({factors, firstPack}), firstPack); + // skip some factors + const size_t skipCount = 60; + UNIT_ASSERT_VALUES_EQUAL(decoder.Skip(skipCount / 2), skipCount / 2); + // unpack all, except some factors in the end + const auto toDecode = FactorCount - firstPack - skipCount; + UNIT_ASSERT_VALUES_EQUAL(decoder.Decode({factors + firstPack, toDecode}), toDecode); + UNIT_ASSERT_VALUES_EQUAL(decoder.Skip(skipCount / 2), skipCount / 2); + for (size_t i = 0; i < FactorCount - skipCount; ++i) { + size_t correctedI = i < firstPack ? i : i + skipCount / 2; + UNIT_ASSERT_VALUES_EQUAL(factors[i], Factors[correctedI]); + } + //PrintDecompressed(factors); + } + + Y_UNIT_TEST(TestDecompressForgedData) { + // this coredumps without end-of-coded-stream check, see SEARCH-1156 for details + TString brokenBase64Encoded = + "NLjYltUWs5pqnd3d3f05Li4OAwCAEqrP6mv06jDt7PiAUVu7Y+PiMpuZmdzeM" + "ArqOLxS2q4FKCII52dktcVs7y0zL+OKgeO9SOzEkFj7uPfFqqoCAAAAAADAtZ" + "mZ2fdmICAgANQXhi1WVRUAAAAAAAAGjvcWq6oKAAAAAAAAA8d7qe4rV3Nxcd3" + "d4ZfQZrETm3B+OxxB8bbnTPM5+qtbQ92mJ3fHPGj+iH5+8tzcnJuamry1tWUw" + "MBD693f07+9+DQQEkIGAgIgPetzN5yEbAGxWpbCNxXK/0JGTKRz2KkIoR7aM"; + UNIT_ASSERT_EXCEPTION( + fh::Decode(Base64Decode(brokenBase64Encoded)), + yexception); + } + + Y_UNIT_TEST(TestDecompressEmpty) { + UNIT_ASSERT_EXCEPTION(fh::Decode({}), yexception); + } +}; diff --git a/library/cpp/codecs/ut/tls_cache_ut.cpp b/library/cpp/codecs/ut/tls_cache_ut.cpp new file mode 100644 index 0000000000..8101af761f --- /dev/null +++ b/library/cpp/codecs/ut/tls_cache_ut.cpp @@ -0,0 +1,36 @@ +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/codecs/tls_cache.h> + +Y_UNIT_TEST_SUITE(CodecsBufferFactoryTest){ + void AssignToBuffer(TBuffer & buf, TStringBuf val){ + buf.Assign(val.data(), val.size()); +} + +TStringBuf AsStringBuf(const TBuffer& b) { + return TStringBuf(b.Data(), b.Size()); +} + +Y_UNIT_TEST(TestAcquireReleaseReuse) { + NCodecs::TBufferTlsCache factory; + // acquiring the first buffer + auto buf1 = factory.Item(); + AssignToBuffer(buf1.Get(), "Buffer_01"); + { + // acquiring the second buffer + auto buf2 = factory.Item(); + AssignToBuffer(buf2.Get(), "Buffer_02"); + } + // the first buffer should stay intact + UNIT_ASSERT_EQUAL(AsStringBuf(buf1.Get()), "Buffer_01"); + { + // reacquiring the last released buffer + // expecting it zero sized but having the same memory + auto buf2 = factory.Item(); + UNIT_ASSERT_VALUES_EQUAL(buf2.Get().Size(), 0u); + buf2.Get().Resize(TStringBuf("Buffer_02").Size()); + UNIT_ASSERT_EQUAL(AsStringBuf(buf2.Get()), "Buffer_02"); + } + // when the factory dies we should see no leaks +} +} +; diff --git a/library/cpp/codecs/ut/ya.make b/library/cpp/codecs/ut/ya.make new file mode 100644 index 0000000000..90841b05ef --- /dev/null +++ b/library/cpp/codecs/ut/ya.make @@ -0,0 +1,20 @@ +UNITTEST() + +OWNER( + g:base + velavokr +) + +PEERDIR( + library/cpp/string_utils/base64 + library/cpp/codecs + library/cpp/string_utils/relaxed_escaper +) + +SRCS( + tls_cache_ut.cpp + codecs_ut.cpp + float_huffman_ut.cpp +) + +END() |