aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpritula <pritula@yandex-team.ru>2022-02-10 16:50:53 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:50:53 +0300
commit0093f1ac39402a8aaf5ae7edc5e83a4de7771335 (patch)
tree5d5cb817648f650d76cf1076100726fd9b8448e8
parent67060456f1386d2ddd6313929f48dc92356a0ff6 (diff)
downloadydb-0093f1ac39402a8aaf5ae7edc5e83a4de7771335.tar.gz
Restoring authorship annotation for <pritula@yandex-team.ru>. Commit 2 of 2.
-rw-r--r--library/cpp/archive/yarchive.cpp34
-rw-r--r--library/cpp/archive/yarchive.h2
-rw-r--r--tools/archiver/main.cpp228
-rw-r--r--tools/archiver/tests/directory/file32
-rw-r--r--tools/archiver/tests/test.py58
5 files changed, 162 insertions, 162 deletions
diff --git a/library/cpp/archive/yarchive.cpp b/library/cpp/archive/yarchive.cpp
index b014d3e93b..1becc3e5da 100644
--- a/library/cpp/archive/yarchive.cpp
+++ b/library/cpp/archive/yarchive.cpp
@@ -172,16 +172,16 @@ public:
Off_ += out.Counter();
}
- inline void AddSynonym(const TString& existingKey, const TString& newKey) {
+ inline void AddSynonym(const TString& existingKey, const TString& newKey) {
Y_ENSURE(Dict_.contains(existingKey), "key " << existingKey.data() << " not stored yet");
Y_ENSURE(!Dict_.contains(newKey), "key " << newKey.data() << " already stored");
-
- TArchiveRecordDescriptorRef existingDescr = Dict_[existingKey];
- TArchiveRecordDescriptorRef descr(new TArchiveRecordDescriptor(existingDescr->Offset(), existingDescr->Length(), newKey));
-
- Dict_[newKey] = descr;
- }
-
+
+ TArchiveRecordDescriptorRef existingDescr = Dict_[existingKey];
+ TArchiveRecordDescriptorRef descr(new TArchiveRecordDescriptor(existingDescr->Offset(), existingDescr->Length(), newKey));
+
+ Dict_[newKey] = descr;
+ }
+
private:
ui64 Off_;
IOutputStream* Out_;
@@ -215,17 +215,17 @@ void TArchiveWriter::Finish() {
}
void TArchiveWriter::Add(const TString& key, IInputStream* src) {
- Y_ENSURE(Impl_.Get(), "archive already closed");
+ Y_ENSURE(Impl_.Get(), "archive already closed");
Impl_->Add(key, src);
}
-void TArchiveWriter::AddSynonym(const TString& existingKey, const TString& newKey) {
- Y_ENSURE(Impl_.Get(), "archive already closed");
-
- Impl_->AddSynonym(existingKey, newKey);
-}
-
+void TArchiveWriter::AddSynonym(const TString& existingKey, const TString& newKey) {
+ Y_ENSURE(Impl_.Get(), "archive already closed");
+
+ Impl_->AddSynonym(existingKey, newKey);
+}
+
namespace {
class TArchiveInputStreamBase {
public:
@@ -266,7 +266,7 @@ public:
inline ~TImpl() = default;
inline void ReadDict() {
- Y_ENSURE(Blob_.Size() >= sizeof(ui64), "too small blob");
+ Y_ENSURE(Blob_.Size() >= sizeof(ui64), "too small blob");
const char* end = (const char*)Blob_.End();
const char* ptr = end - sizeof(ui64);
@@ -274,7 +274,7 @@ public:
memcpy(&dictlen, ptr, sizeof(ui64));
dictlen = LittleToHost(dictlen);
- Y_ENSURE(dictlen <= Blob_.Size() - sizeof(ui64), "bad blob");
+ Y_ENSURE(dictlen <= Blob_.Size() - sizeof(ui64), "bad blob");
const char* beg = ptr - dictlen;
TMemoryInput mi(beg, dictlen);
diff --git a/library/cpp/archive/yarchive.h b/library/cpp/archive/yarchive.h
index b871362c14..8120bcb940 100644
--- a/library/cpp/archive/yarchive.h
+++ b/library/cpp/archive/yarchive.h
@@ -22,7 +22,7 @@ public:
void Flush();
void Finish();
void Add(const TString& key, IInputStream* src);
- void AddSynonym(const TString& existingKey, const TString& newKey);
+ void AddSynonym(const TString& existingKey, const TString& newKey);
private:
class TImpl;
diff --git a/tools/archiver/main.cpp b/tools/archiver/main.cpp
index a89556ffad..6cda54c1ea 100644
--- a/tools/archiver/main.cpp
+++ b/tools/archiver/main.cpp
@@ -16,8 +16,8 @@
#include <util/string/subst.h>
#include <util/system/filemap.h>
-#include <cstring>
-
+#include <cstring>
+
namespace {
class TStringArrayOutput: public IOutputStream {
public:
@@ -203,91 +203,91 @@ namespace {
IOutputStream* O = nullptr;
const TString B;
};
-
- struct TMyFileComparator {
- bool operator()(const TString& fname1, const TString& fname2) const {
- if (fname1 == fname2) {
- return false;
- }
- if (const auto* savedResultPtr = SavedResults.FindPtr(std::make_pair(fname1, fname2))) {
- return *savedResultPtr < 0;
- }
- TMemoryMap mmap1(fname1, TMemoryMap::oRdOnly);
- TMemoryMap mmap2(fname2, TMemoryMap::oRdOnly);
- mmap1.SetSequential();
- mmap2.SetSequential();
- Y_ASSERT(mmap1.Length() == mmap2.Length());
- TMemoryMap::TMapResult mapResult1 = mmap1.Map(0, mmap1.Length());
- TMemoryMap::TMapResult mapResult2 = mmap2.Map(0, mmap2.Length());
- Y_ASSERT(mapResult1.MappedSize() == mapResult2.MappedSize());
- int res = memcmp(mapResult1.MappedData(), mapResult2.MappedData(), mapResult1.MappedSize());
- mmap1.Unmap(mapResult1);
- mmap2.Unmap(mapResult2);
- SavedResults[std::make_pair(fname1, fname2)] = res;
- SavedResults[std::make_pair(fname2, fname1)] = -res;
- return res < 0;
- }
-
+
+ struct TMyFileComparator {
+ bool operator()(const TString& fname1, const TString& fname2) const {
+ if (fname1 == fname2) {
+ return false;
+ }
+ if (const auto* savedResultPtr = SavedResults.FindPtr(std::make_pair(fname1, fname2))) {
+ return *savedResultPtr < 0;
+ }
+ TMemoryMap mmap1(fname1, TMemoryMap::oRdOnly);
+ TMemoryMap mmap2(fname2, TMemoryMap::oRdOnly);
+ mmap1.SetSequential();
+ mmap2.SetSequential();
+ Y_ASSERT(mmap1.Length() == mmap2.Length());
+ TMemoryMap::TMapResult mapResult1 = mmap1.Map(0, mmap1.Length());
+ TMemoryMap::TMapResult mapResult2 = mmap2.Map(0, mmap2.Length());
+ Y_ASSERT(mapResult1.MappedSize() == mapResult2.MappedSize());
+ int res = memcmp(mapResult1.MappedData(), mapResult2.MappedData(), mapResult1.MappedSize());
+ mmap1.Unmap(mapResult1);
+ mmap2.Unmap(mapResult2);
+ SavedResults[std::make_pair(fname1, fname2)] = res;
+ SavedResults[std::make_pair(fname2, fname1)] = -res;
+ return res < 0;
+ }
+
mutable THashMap<std::pair<TString, TString>, int> SavedResults;
- };
-
- struct TDuplicatesMap {
- void Add(const TString& fname, const TString& rname) {
- Y_ENSURE(!InitialFillingDone);
- FileNames.push_back(fname);
- FileNameToRecordName[fname] = rname;
- }
-
- void Finish() {
- Y_ENSURE(!InitialFillingDone);
- InitialFillingDone = true;
+ };
+
+ struct TDuplicatesMap {
+ void Add(const TString& fname, const TString& rname) {
+ Y_ENSURE(!InitialFillingDone);
+ FileNames.push_back(fname);
+ FileNameToRecordName[fname] = rname;
+ }
+
+ void Finish() {
+ Y_ENSURE(!InitialFillingDone);
+ InitialFillingDone = true;
TMap<i64, TVector<TString>> bySize;
- for (const TString& fname: FileNames) {
- TFile file(fname, OpenExisting | RdOnly);
- bySize[file.GetLength()].push_back(fname);
- }
- for (const auto& bySizeElement: bySize) {
- if (bySizeElement.second.size() > 1) {
+ for (const TString& fname: FileNames) {
+ TFile file(fname, OpenExisting | RdOnly);
+ bySize[file.GetLength()].push_back(fname);
+ }
+ for (const auto& bySizeElement: bySize) {
+ if (bySizeElement.second.size() > 1) {
TMap<TString, TVector<TString>, TMyFileComparator> byContents;
- for (const TString& fname: bySizeElement.second) {
- byContents[fname].push_back(fname);
- }
- for (const auto& byContentsElement: byContents) {
- if (byContentsElement.second.size() > 1) {
- const TString& rootName = byContentsElement.second.front();
- const TString& rootRecordName = FileNameToRecordName[rootName];
- for (const TString& fname: byContentsElement.second) {
- if (fname != rootName) {
- Synonyms[FileNameToRecordName[fname]] = rootRecordName;
- }
- }
- }
- }
- }
- }
- FileNames.clear();
- FileNameToRecordName.clear();
- }
-
- bool InitialFillingDone = false;
+ for (const TString& fname: bySizeElement.second) {
+ byContents[fname].push_back(fname);
+ }
+ for (const auto& byContentsElement: byContents) {
+ if (byContentsElement.second.size() > 1) {
+ const TString& rootName = byContentsElement.second.front();
+ const TString& rootRecordName = FileNameToRecordName[rootName];
+ for (const TString& fname: byContentsElement.second) {
+ if (fname != rootName) {
+ Synonyms[FileNameToRecordName[fname]] = rootRecordName;
+ }
+ }
+ }
+ }
+ }
+ }
+ FileNames.clear();
+ FileNameToRecordName.clear();
+ }
+
+ bool InitialFillingDone = false;
TVector<TString> FileNames;
THashMap<TString, TString> FileNameToRecordName;
THashMap<TString, TString> Synonyms;
- };
-
- struct TDeduplicationArchiveWriter {
+ };
+
+ struct TDeduplicationArchiveWriter {
TDeduplicationArchiveWriter(const TDuplicatesMap& duplicatesMap, IOutputStream* out, bool compress)
- : DuplicatesMap(duplicatesMap)
- , Writer(out, compress)
- {}
-
- void Finish() {
- Writer.Finish();
- }
-
- const TDuplicatesMap& DuplicatesMap;
- TArchiveWriter Writer;
- };
+ : DuplicatesMap(duplicatesMap)
+ , Writer(out, compress)
+ {}
+
+ void Finish() {
+ Writer.Finish();
+ }
+
+ const TDuplicatesMap& DuplicatesMap;
+ TArchiveWriter Writer;
+ };
}
static inline TAutoPtr<IOutputStream> OpenOutput(const TString& url) {
@@ -338,21 +338,21 @@ static inline void Append(IOutputStream& w, const TString& fname, const TString&
TransferData((IInputStream*)&in, &w);
}
-static inline void Append(TDuplicatesMap& w, const TString& fname, const TString& rname) {
- w.Add(fname, rname);
-}
+static inline void Append(TDuplicatesMap& w, const TString& fname, const TString& rname) {
+ w.Add(fname, rname);
+}
-static inline void Append(TDeduplicationArchiveWriter& w, const TString& fname, const TString& rname) {
+static inline void Append(TDeduplicationArchiveWriter& w, const TString& fname, const TString& rname) {
if (!Quiet) {
Cerr << "--> " << rname << Endl;
}
- if (const TString* rootRecordName = w.DuplicatesMap.Synonyms.FindPtr(rname)) {
- w.Writer.AddSynonym(*rootRecordName, rname);
- } else {
- TMappedFileInput in(fname);
- w.Writer.Add(rname, &in);
- }
+ if (const TString* rootRecordName = w.DuplicatesMap.Synonyms.FindPtr(rname)) {
+ w.Writer.AddSynonym(*rootRecordName, rname);
+ } else {
+ TMappedFileInput in(fname);
+ w.Writer.Add(rname, &in);
+ }
}
namespace {
@@ -386,7 +386,7 @@ namespace {
const char* name;
const TString p = Path + off;
- fl.Fill(p, true);
+ fl.Fill(p, true);
while ((name = fl.Next())) {
const TString fname = p + name;
@@ -402,7 +402,7 @@ namespace {
const char* name;
const TString p = Path + off;
- dl.Fill(p, true);
+ dl.Fill(p, true);
while ((name = dl.Next())) {
if (strcmp(name, ".") && strcmp(name, "..")) {
@@ -516,12 +516,12 @@ int main(int argc, char** argv) {
.Optional()
.StoreValue(&doNotZip, true);
- bool deduplicate = false;
- opts.AddLongOption("deduplicate", "Turn on file-wise deduplication")
- .NoArgument()
- .Optional()
- .StoreValue(&deduplicate, true);
-
+ bool deduplicate = false;
+ opts.AddLongOption("deduplicate", "Turn on file-wise deduplication")
+ .NoArgument()
+ .Optional()
+ .StoreValue(&deduplicate, true);
+
bool unpack = false;
opts.AddLongOption('u', "unpack", "Unpack archive into current directory")
.NoArgument()
@@ -634,17 +634,17 @@ int main(int argc, char** argv) {
try {
if (listMd5) {
- for (const auto& rec: recs) {
+ for (const auto& rec: recs) {
ListArchiveMd5(rec.Path, cutSlash);
}
} else if (list) {
- for (const auto& rec: recs) {
+ for (const auto& rec: recs) {
ListArchive(rec.Path, cutSlash);
}
} else if (unpack) {
const TFsPath dir(unpackDir);
- for (const auto& rec: recs) {
- UnpackArchive(rec.Path, dir);
+ for (const auto& rec: recs) {
+ UnpackArchive(rec.Path, dir);
}
} else {
TAutoPtr<IOutputStream> outf(OpenOutput(outputf));
@@ -668,21 +668,21 @@ int main(int argc, char** argv) {
outf->Write(prepend.data(), prepend.size());
if (cat) {
- for (const auto& rec: recs) {
- rec.Recurse(*out);
+ for (const auto& rec: recs) {
+ rec.Recurse(*out);
}
} else {
- TDuplicatesMap duplicatesMap;
- if (deduplicate) {
- for (const auto& rec: recs) {
- rec.Recurse(duplicatesMap);
- }
+ TDuplicatesMap duplicatesMap;
+ if (deduplicate) {
+ for (const auto& rec: recs) {
+ rec.Recurse(duplicatesMap);
+ }
+ }
+ duplicatesMap.Finish();
+ TDeduplicationArchiveWriter w(duplicatesMap, out, !doNotZip);
+ for (const auto& rec: recs) {
+ rec.Recurse(w);
}
- duplicatesMap.Finish();
- TDeduplicationArchiveWriter w(duplicatesMap, out, !doNotZip);
- for (const auto& rec: recs) {
- rec.Recurse(w);
- }
w.Finish();
}
diff --git a/tools/archiver/tests/directory/file3 b/tools/archiver/tests/directory/file3
index ac9b5ed12b..e2129701f1 100644
--- a/tools/archiver/tests/directory/file3
+++ b/tools/archiver/tests/directory/file3
@@ -1 +1 @@
-file1
+file1
diff --git a/tools/archiver/tests/test.py b/tools/archiver/tests/test.py
index fbd02a7b2e..b92d58f6a9 100644
--- a/tools/archiver/tests/test.py
+++ b/tools/archiver/tests/test.py
@@ -33,36 +33,36 @@ class TestArchiver(object):
stderr=None,
)
archive_list = sorted(open('result').read().strip().split('\n'))
- assert len(archive_list) == 3
+ assert len(archive_list) == 3
assert archive_list[0] == 'file1'
assert archive_list[1] == 'file2'
- assert archive_list[2] == 'file3'
-
- def test_deduplicate(self):
- assert 'archiver' == os.path.basename(self.archiver_path)
- assert os.path.exists(self.archiver_path)
- contents = ytc.source_path("tools/archiver/tests/directory")
- ytc.execute(
- command=[
- self.archiver_path,
- "--output", "result_dedup",
- "--recursive",
- "--deduplicate",
- "--plain",
- contents,
- ]
- )
- ytc.execute(
- command=[
- self.archiver_path,
- "--output", "result_no_dedup",
- "--recursive",
- "--plain",
- contents,
- ]
- )
- with open('result_dedup', 'rb') as f_dedup, open('result_no_dedup', 'rb') as f_no_dedup:
- archive_dedup = f_dedup.read()
- archive_no_dedup = f_no_dedup.read()
+ assert archive_list[2] == 'file3'
+
+ def test_deduplicate(self):
+ assert 'archiver' == os.path.basename(self.archiver_path)
+ assert os.path.exists(self.archiver_path)
+ contents = ytc.source_path("tools/archiver/tests/directory")
+ ytc.execute(
+ command=[
+ self.archiver_path,
+ "--output", "result_dedup",
+ "--recursive",
+ "--deduplicate",
+ "--plain",
+ contents,
+ ]
+ )
+ ytc.execute(
+ command=[
+ self.archiver_path,
+ "--output", "result_no_dedup",
+ "--recursive",
+ "--plain",
+ contents,
+ ]
+ )
+ with open('result_dedup', 'rb') as f_dedup, open('result_no_dedup', 'rb') as f_no_dedup:
+ archive_dedup = f_dedup.read()
+ archive_no_dedup = f_no_dedup.read()
assert len(archive_dedup) == 58
assert len(archive_no_dedup) == 75