diff options
author | pritula <pritula@yandex-team.ru> | 2022-02-10 16:50:53 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:50:53 +0300 |
commit | 67060456f1386d2ddd6313929f48dc92356a0ff6 (patch) | |
tree | 3c7fa49a87ce72d7d5c6a3d282d9f99c476a79d9 | |
parent | 9905811fdc643c722d6464b8cf6661f931510f2f (diff) | |
download | ydb-67060456f1386d2ddd6313929f48dc92356a0ff6.tar.gz |
Restoring authorship annotation for <pritula@yandex-team.ru>. Commit 1 of 2.
-rw-r--r-- | library/cpp/archive/yarchive.cpp | 34 | ||||
-rw-r--r-- | library/cpp/archive/yarchive.h | 2 | ||||
-rw-r--r-- | tools/archiver/main.cpp | 228 | ||||
-rw-r--r-- | tools/archiver/tests/directory/file3 | 2 | ||||
-rw-r--r-- | tools/archiver/tests/test.py | 58 |
5 files changed, 162 insertions, 162 deletions
diff --git a/library/cpp/archive/yarchive.cpp b/library/cpp/archive/yarchive.cpp index 1becc3e5da..b014d3e93b 100644 --- a/library/cpp/archive/yarchive.cpp +++ b/library/cpp/archive/yarchive.cpp @@ -172,16 +172,16 @@ public: Off_ += out.Counter(); } - inline void AddSynonym(const TString& existingKey, const TString& newKey) { + inline void AddSynonym(const TString& existingKey, const TString& newKey) { Y_ENSURE(Dict_.contains(existingKey), "key " << existingKey.data() << " not stored yet"); Y_ENSURE(!Dict_.contains(newKey), "key " << newKey.data() << " already stored"); - - TArchiveRecordDescriptorRef existingDescr = Dict_[existingKey]; - TArchiveRecordDescriptorRef descr(new TArchiveRecordDescriptor(existingDescr->Offset(), existingDescr->Length(), newKey)); - - Dict_[newKey] = descr; - } - + + TArchiveRecordDescriptorRef existingDescr = Dict_[existingKey]; + TArchiveRecordDescriptorRef descr(new TArchiveRecordDescriptor(existingDescr->Offset(), existingDescr->Length(), newKey)); + + Dict_[newKey] = descr; + } + private: ui64 Off_; IOutputStream* Out_; @@ -215,17 +215,17 @@ void TArchiveWriter::Finish() { } void TArchiveWriter::Add(const TString& key, IInputStream* src) { - Y_ENSURE(Impl_.Get(), "archive already closed"); + Y_ENSURE(Impl_.Get(), "archive already closed"); Impl_->Add(key, src); } -void TArchiveWriter::AddSynonym(const TString& existingKey, const TString& newKey) { - Y_ENSURE(Impl_.Get(), "archive already closed"); - - Impl_->AddSynonym(existingKey, newKey); -} - +void TArchiveWriter::AddSynonym(const TString& existingKey, const TString& newKey) { + Y_ENSURE(Impl_.Get(), "archive already closed"); + + Impl_->AddSynonym(existingKey, newKey); +} + namespace { class TArchiveInputStreamBase { public: @@ -266,7 +266,7 @@ public: inline ~TImpl() = default; inline void ReadDict() { - Y_ENSURE(Blob_.Size() >= sizeof(ui64), "too small blob"); + Y_ENSURE(Blob_.Size() >= sizeof(ui64), "too small blob"); const char* end = (const char*)Blob_.End(); const char* ptr = end - sizeof(ui64); @@ -274,7 +274,7 @@ public: memcpy(&dictlen, ptr, sizeof(ui64)); dictlen = LittleToHost(dictlen); - Y_ENSURE(dictlen <= Blob_.Size() - sizeof(ui64), "bad blob"); + Y_ENSURE(dictlen <= Blob_.Size() - sizeof(ui64), "bad blob"); const char* beg = ptr - dictlen; TMemoryInput mi(beg, dictlen); diff --git a/library/cpp/archive/yarchive.h b/library/cpp/archive/yarchive.h index 8120bcb940..b871362c14 100644 --- a/library/cpp/archive/yarchive.h +++ b/library/cpp/archive/yarchive.h @@ -22,7 +22,7 @@ public: void Flush(); void Finish(); void Add(const TString& key, IInputStream* src); - void AddSynonym(const TString& existingKey, const TString& newKey); + void AddSynonym(const TString& existingKey, const TString& newKey); private: class TImpl; diff --git a/tools/archiver/main.cpp b/tools/archiver/main.cpp index 6cda54c1ea..a89556ffad 100644 --- a/tools/archiver/main.cpp +++ b/tools/archiver/main.cpp @@ -16,8 +16,8 @@ #include <util/string/subst.h> #include <util/system/filemap.h> -#include <cstring> - +#include <cstring> + namespace { class TStringArrayOutput: public IOutputStream { public: @@ -203,91 +203,91 @@ namespace { IOutputStream* O = nullptr; const TString B; }; - - struct TMyFileComparator { - bool operator()(const TString& fname1, const TString& fname2) const { - if (fname1 == fname2) { - return false; - } - if (const auto* savedResultPtr = SavedResults.FindPtr(std::make_pair(fname1, fname2))) { - return *savedResultPtr < 0; - } - TMemoryMap mmap1(fname1, TMemoryMap::oRdOnly); - TMemoryMap mmap2(fname2, TMemoryMap::oRdOnly); - mmap1.SetSequential(); - mmap2.SetSequential(); - Y_ASSERT(mmap1.Length() == mmap2.Length()); - TMemoryMap::TMapResult mapResult1 = mmap1.Map(0, mmap1.Length()); - TMemoryMap::TMapResult mapResult2 = mmap2.Map(0, mmap2.Length()); - Y_ASSERT(mapResult1.MappedSize() == mapResult2.MappedSize()); - int res = memcmp(mapResult1.MappedData(), mapResult2.MappedData(), mapResult1.MappedSize()); - mmap1.Unmap(mapResult1); - mmap2.Unmap(mapResult2); - SavedResults[std::make_pair(fname1, fname2)] = res; - SavedResults[std::make_pair(fname2, fname1)] = -res; - return res < 0; - } - + + struct TMyFileComparator { + bool operator()(const TString& fname1, const TString& fname2) const { + if (fname1 == fname2) { + return false; + } + if (const auto* savedResultPtr = SavedResults.FindPtr(std::make_pair(fname1, fname2))) { + return *savedResultPtr < 0; + } + TMemoryMap mmap1(fname1, TMemoryMap::oRdOnly); + TMemoryMap mmap2(fname2, TMemoryMap::oRdOnly); + mmap1.SetSequential(); + mmap2.SetSequential(); + Y_ASSERT(mmap1.Length() == mmap2.Length()); + TMemoryMap::TMapResult mapResult1 = mmap1.Map(0, mmap1.Length()); + TMemoryMap::TMapResult mapResult2 = mmap2.Map(0, mmap2.Length()); + Y_ASSERT(mapResult1.MappedSize() == mapResult2.MappedSize()); + int res = memcmp(mapResult1.MappedData(), mapResult2.MappedData(), mapResult1.MappedSize()); + mmap1.Unmap(mapResult1); + mmap2.Unmap(mapResult2); + SavedResults[std::make_pair(fname1, fname2)] = res; + SavedResults[std::make_pair(fname2, fname1)] = -res; + return res < 0; + } + mutable THashMap<std::pair<TString, TString>, int> SavedResults; - }; - - struct TDuplicatesMap { - void Add(const TString& fname, const TString& rname) { - Y_ENSURE(!InitialFillingDone); - FileNames.push_back(fname); - FileNameToRecordName[fname] = rname; - } - - void Finish() { - Y_ENSURE(!InitialFillingDone); - InitialFillingDone = true; + }; + + struct TDuplicatesMap { + void Add(const TString& fname, const TString& rname) { + Y_ENSURE(!InitialFillingDone); + FileNames.push_back(fname); + FileNameToRecordName[fname] = rname; + } + + void Finish() { + Y_ENSURE(!InitialFillingDone); + InitialFillingDone = true; TMap<i64, TVector<TString>> bySize; - for (const TString& fname: FileNames) { - TFile file(fname, OpenExisting | RdOnly); - bySize[file.GetLength()].push_back(fname); - } - for (const auto& bySizeElement: bySize) { - if (bySizeElement.second.size() > 1) { + for (const TString& fname: FileNames) { + TFile file(fname, OpenExisting | RdOnly); + bySize[file.GetLength()].push_back(fname); + } + for (const auto& bySizeElement: bySize) { + if (bySizeElement.second.size() > 1) { TMap<TString, TVector<TString>, TMyFileComparator> byContents; - for (const TString& fname: bySizeElement.second) { - byContents[fname].push_back(fname); - } - for (const auto& byContentsElement: byContents) { - if (byContentsElement.second.size() > 1) { - const TString& rootName = byContentsElement.second.front(); - const TString& rootRecordName = FileNameToRecordName[rootName]; - for (const TString& fname: byContentsElement.second) { - if (fname != rootName) { - Synonyms[FileNameToRecordName[fname]] = rootRecordName; - } - } - } - } - } - } - FileNames.clear(); - FileNameToRecordName.clear(); - } - - bool InitialFillingDone = false; + for (const TString& fname: bySizeElement.second) { + byContents[fname].push_back(fname); + } + for (const auto& byContentsElement: byContents) { + if (byContentsElement.second.size() > 1) { + const TString& rootName = byContentsElement.second.front(); + const TString& rootRecordName = FileNameToRecordName[rootName]; + for (const TString& fname: byContentsElement.second) { + if (fname != rootName) { + Synonyms[FileNameToRecordName[fname]] = rootRecordName; + } + } + } + } + } + } + FileNames.clear(); + FileNameToRecordName.clear(); + } + + bool InitialFillingDone = false; TVector<TString> FileNames; THashMap<TString, TString> FileNameToRecordName; THashMap<TString, TString> Synonyms; - }; - - struct TDeduplicationArchiveWriter { + }; + + struct TDeduplicationArchiveWriter { TDeduplicationArchiveWriter(const TDuplicatesMap& duplicatesMap, IOutputStream* out, bool compress) - : DuplicatesMap(duplicatesMap) - , Writer(out, compress) - {} - - void Finish() { - Writer.Finish(); - } - - const TDuplicatesMap& DuplicatesMap; - TArchiveWriter Writer; - }; + : DuplicatesMap(duplicatesMap) + , Writer(out, compress) + {} + + void Finish() { + Writer.Finish(); + } + + const TDuplicatesMap& DuplicatesMap; + TArchiveWriter Writer; + }; } static inline TAutoPtr<IOutputStream> OpenOutput(const TString& url) { @@ -338,21 +338,21 @@ static inline void Append(IOutputStream& w, const TString& fname, const TString& TransferData((IInputStream*)&in, &w); } -static inline void Append(TDuplicatesMap& w, const TString& fname, const TString& rname) { - w.Add(fname, rname); -} +static inline void Append(TDuplicatesMap& w, const TString& fname, const TString& rname) { + w.Add(fname, rname); +} -static inline void Append(TDeduplicationArchiveWriter& w, const TString& fname, const TString& rname) { +static inline void Append(TDeduplicationArchiveWriter& w, const TString& fname, const TString& rname) { if (!Quiet) { Cerr << "--> " << rname << Endl; } - if (const TString* rootRecordName = w.DuplicatesMap.Synonyms.FindPtr(rname)) { - w.Writer.AddSynonym(*rootRecordName, rname); - } else { - TMappedFileInput in(fname); - w.Writer.Add(rname, &in); - } + if (const TString* rootRecordName = w.DuplicatesMap.Synonyms.FindPtr(rname)) { + w.Writer.AddSynonym(*rootRecordName, rname); + } else { + TMappedFileInput in(fname); + w.Writer.Add(rname, &in); + } } namespace { @@ -386,7 +386,7 @@ namespace { const char* name; const TString p = Path + off; - fl.Fill(p, true); + fl.Fill(p, true); while ((name = fl.Next())) { const TString fname = p + name; @@ -402,7 +402,7 @@ namespace { const char* name; const TString p = Path + off; - dl.Fill(p, true); + dl.Fill(p, true); while ((name = dl.Next())) { if (strcmp(name, ".") && strcmp(name, "..")) { @@ -516,12 +516,12 @@ int main(int argc, char** argv) { .Optional() .StoreValue(&doNotZip, true); - bool deduplicate = false; - opts.AddLongOption("deduplicate", "Turn on file-wise deduplication") - .NoArgument() - .Optional() - .StoreValue(&deduplicate, true); - + bool deduplicate = false; + opts.AddLongOption("deduplicate", "Turn on file-wise deduplication") + .NoArgument() + .Optional() + .StoreValue(&deduplicate, true); + bool unpack = false; opts.AddLongOption('u', "unpack", "Unpack archive into current directory") .NoArgument() @@ -634,17 +634,17 @@ int main(int argc, char** argv) { try { if (listMd5) { - for (const auto& rec: recs) { + for (const auto& rec: recs) { ListArchiveMd5(rec.Path, cutSlash); } } else if (list) { - for (const auto& rec: recs) { + for (const auto& rec: recs) { ListArchive(rec.Path, cutSlash); } } else if (unpack) { const TFsPath dir(unpackDir); - for (const auto& rec: recs) { - UnpackArchive(rec.Path, dir); + for (const auto& rec: recs) { + UnpackArchive(rec.Path, dir); } } else { TAutoPtr<IOutputStream> outf(OpenOutput(outputf)); @@ -668,21 +668,21 @@ int main(int argc, char** argv) { outf->Write(prepend.data(), prepend.size()); if (cat) { - for (const auto& rec: recs) { - rec.Recurse(*out); + for (const auto& rec: recs) { + rec.Recurse(*out); } } else { - TDuplicatesMap duplicatesMap; - if (deduplicate) { - for (const auto& rec: recs) { - rec.Recurse(duplicatesMap); - } - } - duplicatesMap.Finish(); - TDeduplicationArchiveWriter w(duplicatesMap, out, !doNotZip); - for (const auto& rec: recs) { - rec.Recurse(w); + TDuplicatesMap duplicatesMap; + if (deduplicate) { + for (const auto& rec: recs) { + rec.Recurse(duplicatesMap); + } } + duplicatesMap.Finish(); + TDeduplicationArchiveWriter w(duplicatesMap, out, !doNotZip); + for (const auto& rec: recs) { + rec.Recurse(w); + } w.Finish(); } diff --git a/tools/archiver/tests/directory/file3 b/tools/archiver/tests/directory/file3 index e2129701f1..ac9b5ed12b 100644 --- a/tools/archiver/tests/directory/file3 +++ b/tools/archiver/tests/directory/file3 @@ -1 +1 @@ -file1 +file1 diff --git a/tools/archiver/tests/test.py b/tools/archiver/tests/test.py index b92d58f6a9..fbd02a7b2e 100644 --- a/tools/archiver/tests/test.py +++ b/tools/archiver/tests/test.py @@ -33,36 +33,36 @@ class TestArchiver(object): stderr=None, ) archive_list = sorted(open('result').read().strip().split('\n')) - assert len(archive_list) == 3 + assert len(archive_list) == 3 assert archive_list[0] == 'file1' assert archive_list[1] == 'file2' - assert archive_list[2] == 'file3' - - def test_deduplicate(self): - assert 'archiver' == os.path.basename(self.archiver_path) - assert os.path.exists(self.archiver_path) - contents = ytc.source_path("tools/archiver/tests/directory") - ytc.execute( - command=[ - self.archiver_path, - "--output", "result_dedup", - "--recursive", - "--deduplicate", - "--plain", - contents, - ] - ) - ytc.execute( - command=[ - self.archiver_path, - "--output", "result_no_dedup", - "--recursive", - "--plain", - contents, - ] - ) - with open('result_dedup', 'rb') as f_dedup, open('result_no_dedup', 'rb') as f_no_dedup: - archive_dedup = f_dedup.read() - archive_no_dedup = f_no_dedup.read() + assert archive_list[2] == 'file3' + + def test_deduplicate(self): + assert 'archiver' == os.path.basename(self.archiver_path) + assert os.path.exists(self.archiver_path) + contents = ytc.source_path("tools/archiver/tests/directory") + ytc.execute( + command=[ + self.archiver_path, + "--output", "result_dedup", + "--recursive", + "--deduplicate", + "--plain", + contents, + ] + ) + ytc.execute( + command=[ + self.archiver_path, + "--output", "result_no_dedup", + "--recursive", + "--plain", + contents, + ] + ) + with open('result_dedup', 'rb') as f_dedup, open('result_no_dedup', 'rb') as f_no_dedup: + archive_dedup = f_dedup.read() + archive_no_dedup = f_no_dedup.read() assert len(archive_dedup) == 58 assert len(archive_no_dedup) == 75 |