aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/comptable/usage
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/comptable/usage
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/comptable/usage')
-rw-r--r--library/cpp/comptable/usage/usage.cpp75
-rw-r--r--library/cpp/comptable/usage/ya.make13
2 files changed, 88 insertions, 0 deletions
diff --git a/library/cpp/comptable/usage/usage.cpp b/library/cpp/comptable/usage/usage.cpp
new file mode 100644
index 0000000000..9997c83686
--- /dev/null
+++ b/library/cpp/comptable/usage/usage.cpp
@@ -0,0 +1,75 @@
+#include <library/cpp/comptable/comptable.h>
+
+#include <util/random/random.h>
+#include <util/random/fast.h>
+
+#include <time.h>
+#include <stdlib.h>
+
+using namespace NCompTable;
+
+template <bool HQ>
+void DoTest(const TCompressorTable& table, const TVector<TString>& lines) {
+ TVector<char> compressed;
+ TVector<char> decompressed;
+
+ TChunkCompressor compressor(HQ, table);
+ TChunkDecompressor deCompressor(HQ, table);
+
+ size_t origSize = 0;
+ size_t compSize = 0;
+ float cl1 = clock();
+ for (size_t i = 0; i < lines.size(); ++i) {
+ const TString& line = lines[i];
+ compressor.Compress(line, &compressed);
+ origSize += line.size();
+ compSize += compressed.size();
+ TStringBuf in(compressed.data(), compressed.size());
+ deCompressor.Decompress(in, &decompressed);
+ if (decompressed.size() != line.size() || memcmp(decompressed.data(), line.data(), decompressed.size())) {
+ Cout << i << "\n";
+ Cout << line << "\n"
+ << TString(decompressed.data(), decompressed.size()) << "\n";
+ abort();
+ }
+ }
+ float cl2 = clock();
+ float secs = (cl2 - cl1) / CLOCKS_PER_SEC;
+ Cout << "origSize: " << origSize << "\tcompSize: " << compSize << Endl;
+ Cout << "yep! compression + decompression speed " << origSize / 1024.0f / 1024.0f / secs << " mbps\n";
+ Cout << "yep! compression ratio " << double(origSize) / double(compSize + 1) << "\n";
+}
+
+int main(int argc, const char* argv[]) {
+ TReallyFastRng32 rr(17);
+ TVector<TString> lines;
+ /*FILE *fp = fopen("res", "rb");
+ while (!feof(fp)) {
+ char buff[4096];
+ fscanf(fp, "%s", buff);
+ lines.push_back(TString(buff));
+ }*/
+ //for (size_t i = 0; i < 10000000; ++i) {
+ //for (size_t i = 0; i < 1000000; ++i) {
+ for (size_t i = 0; i < 1000000; ++i) {
+ size_t size = rr.Uniform(32);
+ TString res = "www.yandex.ru/yandsearch?text=";
+ for (size_t j = 0; j < size; ++j) {
+ res += "qwer"[rr.Uniform(4)];
+ }
+ lines.push_back(res);
+ }
+ THolder<TDataSampler> sampler(new TDataSampler);
+ for (size_t i = 0; i < lines.size(); ++i) {
+ sampler->AddStat(lines[i]);
+ }
+ TCompressorTable table;
+ sampler->BuildTable(table);
+
+ DoTest<true>(table, lines);
+ DoTest<false>(table, lines);
+
+ Y_UNUSED(argc);
+ Y_UNUSED(argv);
+ return 0;
+}
diff --git a/library/cpp/comptable/usage/ya.make b/library/cpp/comptable/usage/ya.make
new file mode 100644
index 0000000000..ab31e7528c
--- /dev/null
+++ b/library/cpp/comptable/usage/ya.make
@@ -0,0 +1,13 @@
+PROGRAM()
+
+OWNER(ironpeter)
+
+SRCS(
+ usage.cpp
+)
+
+PEERDIR(
+ library/cpp/comptable
+)
+
+END()