aboutsummaryrefslogtreecommitdiffstats
path: root/tools/archiver
diff options
context:
space:
mode:
authorpritula <pritula@yandex-team.ru>2022-02-10 16:50:53 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:50:53 +0300
commit67060456f1386d2ddd6313929f48dc92356a0ff6 (patch)
tree3c7fa49a87ce72d7d5c6a3d282d9f99c476a79d9 /tools/archiver
parent9905811fdc643c722d6464b8cf6661f931510f2f (diff)
downloadydb-67060456f1386d2ddd6313929f48dc92356a0ff6.tar.gz
Restoring authorship annotation for <pritula@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'tools/archiver')
-rw-r--r--tools/archiver/main.cpp228
-rw-r--r--tools/archiver/tests/directory/file32
-rw-r--r--tools/archiver/tests/test.py58
3 files changed, 144 insertions, 144 deletions
diff --git a/tools/archiver/main.cpp b/tools/archiver/main.cpp
index 6cda54c1ea..a89556ffad 100644
--- a/tools/archiver/main.cpp
+++ b/tools/archiver/main.cpp
@@ -16,8 +16,8 @@
#include <util/string/subst.h>
#include <util/system/filemap.h>
-#include <cstring>
-
+#include <cstring>
+
namespace {
class TStringArrayOutput: public IOutputStream {
public:
@@ -203,91 +203,91 @@ namespace {
IOutputStream* O = nullptr;
const TString B;
};
-
- struct TMyFileComparator {
- bool operator()(const TString& fname1, const TString& fname2) const {
- if (fname1 == fname2) {
- return false;
- }
- if (const auto* savedResultPtr = SavedResults.FindPtr(std::make_pair(fname1, fname2))) {
- return *savedResultPtr < 0;
- }
- TMemoryMap mmap1(fname1, TMemoryMap::oRdOnly);
- TMemoryMap mmap2(fname2, TMemoryMap::oRdOnly);
- mmap1.SetSequential();
- mmap2.SetSequential();
- Y_ASSERT(mmap1.Length() == mmap2.Length());
- TMemoryMap::TMapResult mapResult1 = mmap1.Map(0, mmap1.Length());
- TMemoryMap::TMapResult mapResult2 = mmap2.Map(0, mmap2.Length());
- Y_ASSERT(mapResult1.MappedSize() == mapResult2.MappedSize());
- int res = memcmp(mapResult1.MappedData(), mapResult2.MappedData(), mapResult1.MappedSize());
- mmap1.Unmap(mapResult1);
- mmap2.Unmap(mapResult2);
- SavedResults[std::make_pair(fname1, fname2)] = res;
- SavedResults[std::make_pair(fname2, fname1)] = -res;
- return res < 0;
- }
-
+
+ struct TMyFileComparator {
+ bool operator()(const TString& fname1, const TString& fname2) const {
+ if (fname1 == fname2) {
+ return false;
+ }
+ if (const auto* savedResultPtr = SavedResults.FindPtr(std::make_pair(fname1, fname2))) {
+ return *savedResultPtr < 0;
+ }
+ TMemoryMap mmap1(fname1, TMemoryMap::oRdOnly);
+ TMemoryMap mmap2(fname2, TMemoryMap::oRdOnly);
+ mmap1.SetSequential();
+ mmap2.SetSequential();
+ Y_ASSERT(mmap1.Length() == mmap2.Length());
+ TMemoryMap::TMapResult mapResult1 = mmap1.Map(0, mmap1.Length());
+ TMemoryMap::TMapResult mapResult2 = mmap2.Map(0, mmap2.Length());
+ Y_ASSERT(mapResult1.MappedSize() == mapResult2.MappedSize());
+ int res = memcmp(mapResult1.MappedData(), mapResult2.MappedData(), mapResult1.MappedSize());
+ mmap1.Unmap(mapResult1);
+ mmap2.Unmap(mapResult2);
+ SavedResults[std::make_pair(fname1, fname2)] = res;
+ SavedResults[std::make_pair(fname2, fname1)] = -res;
+ return res < 0;
+ }
+
mutable THashMap<std::pair<TString, TString>, int> SavedResults;
- };
-
- struct TDuplicatesMap {
- void Add(const TString& fname, const TString& rname) {
- Y_ENSURE(!InitialFillingDone);
- FileNames.push_back(fname);
- FileNameToRecordName[fname] = rname;
- }
-
- void Finish() {
- Y_ENSURE(!InitialFillingDone);
- InitialFillingDone = true;
+ };
+
+ struct TDuplicatesMap {
+ void Add(const TString& fname, const TString& rname) {
+ Y_ENSURE(!InitialFillingDone);
+ FileNames.push_back(fname);
+ FileNameToRecordName[fname] = rname;
+ }
+
+ void Finish() {
+ Y_ENSURE(!InitialFillingDone);
+ InitialFillingDone = true;
TMap<i64, TVector<TString>> bySize;
- for (const TString& fname: FileNames) {
- TFile file(fname, OpenExisting | RdOnly);
- bySize[file.GetLength()].push_back(fname);
- }
- for (const auto& bySizeElement: bySize) {
- if (bySizeElement.second.size() > 1) {
+ for (const TString& fname: FileNames) {
+ TFile file(fname, OpenExisting | RdOnly);
+ bySize[file.GetLength()].push_back(fname);
+ }
+ for (const auto& bySizeElement: bySize) {
+ if (bySizeElement.second.size() > 1) {
TMap<TString, TVector<TString>, TMyFileComparator> byContents;
- for (const TString& fname: bySizeElement.second) {
- byContents[fname].push_back(fname);
- }
- for (const auto& byContentsElement: byContents) {
- if (byContentsElement.second.size() > 1) {
- const TString& rootName = byContentsElement.second.front();
- const TString& rootRecordName = FileNameToRecordName[rootName];
- for (const TString& fname: byContentsElement.second) {
- if (fname != rootName) {
- Synonyms[FileNameToRecordName[fname]] = rootRecordName;
- }
- }
- }
- }
- }
- }
- FileNames.clear();
- FileNameToRecordName.clear();
- }
-
- bool InitialFillingDone = false;
+ for (const TString& fname: bySizeElement.second) {
+ byContents[fname].push_back(fname);
+ }
+ for (const auto& byContentsElement: byContents) {
+ if (byContentsElement.second.size() > 1) {
+ const TString& rootName = byContentsElement.second.front();
+ const TString& rootRecordName = FileNameToRecordName[rootName];
+ for (const TString& fname: byContentsElement.second) {
+ if (fname != rootName) {
+ Synonyms[FileNameToRecordName[fname]] = rootRecordName;
+ }
+ }
+ }
+ }
+ }
+ }
+ FileNames.clear();
+ FileNameToRecordName.clear();
+ }
+
+ bool InitialFillingDone = false;
TVector<TString> FileNames;
THashMap<TString, TString> FileNameToRecordName;
THashMap<TString, TString> Synonyms;
- };
-
- struct TDeduplicationArchiveWriter {
+ };
+
+ struct TDeduplicationArchiveWriter {
TDeduplicationArchiveWriter(const TDuplicatesMap& duplicatesMap, IOutputStream* out, bool compress)
- : DuplicatesMap(duplicatesMap)
- , Writer(out, compress)
- {}
-
- void Finish() {
- Writer.Finish();
- }
-
- const TDuplicatesMap& DuplicatesMap;
- TArchiveWriter Writer;
- };
+ : DuplicatesMap(duplicatesMap)
+ , Writer(out, compress)
+ {}
+
+ void Finish() {
+ Writer.Finish();
+ }
+
+ const TDuplicatesMap& DuplicatesMap;
+ TArchiveWriter Writer;
+ };
}
static inline TAutoPtr<IOutputStream> OpenOutput(const TString& url) {
@@ -338,21 +338,21 @@ static inline void Append(IOutputStream& w, const TString& fname, const TString&
TransferData((IInputStream*)&in, &w);
}
-static inline void Append(TDuplicatesMap& w, const TString& fname, const TString& rname) {
- w.Add(fname, rname);
-}
+static inline void Append(TDuplicatesMap& w, const TString& fname, const TString& rname) {
+ w.Add(fname, rname);
+}
-static inline void Append(TDeduplicationArchiveWriter& w, const TString& fname, const TString& rname) {
+static inline void Append(TDeduplicationArchiveWriter& w, const TString& fname, const TString& rname) {
if (!Quiet) {
Cerr << "--> " << rname << Endl;
}
- if (const TString* rootRecordName = w.DuplicatesMap.Synonyms.FindPtr(rname)) {
- w.Writer.AddSynonym(*rootRecordName, rname);
- } else {
- TMappedFileInput in(fname);
- w.Writer.Add(rname, &in);
- }
+ if (const TString* rootRecordName = w.DuplicatesMap.Synonyms.FindPtr(rname)) {
+ w.Writer.AddSynonym(*rootRecordName, rname);
+ } else {
+ TMappedFileInput in(fname);
+ w.Writer.Add(rname, &in);
+ }
}
namespace {
@@ -386,7 +386,7 @@ namespace {
const char* name;
const TString p = Path + off;
- fl.Fill(p, true);
+ fl.Fill(p, true);
while ((name = fl.Next())) {
const TString fname = p + name;
@@ -402,7 +402,7 @@ namespace {
const char* name;
const TString p = Path + off;
- dl.Fill(p, true);
+ dl.Fill(p, true);
while ((name = dl.Next())) {
if (strcmp(name, ".") && strcmp(name, "..")) {
@@ -516,12 +516,12 @@ int main(int argc, char** argv) {
.Optional()
.StoreValue(&doNotZip, true);
- bool deduplicate = false;
- opts.AddLongOption("deduplicate", "Turn on file-wise deduplication")
- .NoArgument()
- .Optional()
- .StoreValue(&deduplicate, true);
-
+ bool deduplicate = false;
+ opts.AddLongOption("deduplicate", "Turn on file-wise deduplication")
+ .NoArgument()
+ .Optional()
+ .StoreValue(&deduplicate, true);
+
bool unpack = false;
opts.AddLongOption('u', "unpack", "Unpack archive into current directory")
.NoArgument()
@@ -634,17 +634,17 @@ int main(int argc, char** argv) {
try {
if (listMd5) {
- for (const auto& rec: recs) {
+ for (const auto& rec: recs) {
ListArchiveMd5(rec.Path, cutSlash);
}
} else if (list) {
- for (const auto& rec: recs) {
+ for (const auto& rec: recs) {
ListArchive(rec.Path, cutSlash);
}
} else if (unpack) {
const TFsPath dir(unpackDir);
- for (const auto& rec: recs) {
- UnpackArchive(rec.Path, dir);
+ for (const auto& rec: recs) {
+ UnpackArchive(rec.Path, dir);
}
} else {
TAutoPtr<IOutputStream> outf(OpenOutput(outputf));
@@ -668,21 +668,21 @@ int main(int argc, char** argv) {
outf->Write(prepend.data(), prepend.size());
if (cat) {
- for (const auto& rec: recs) {
- rec.Recurse(*out);
+ for (const auto& rec: recs) {
+ rec.Recurse(*out);
}
} else {
- TDuplicatesMap duplicatesMap;
- if (deduplicate) {
- for (const auto& rec: recs) {
- rec.Recurse(duplicatesMap);
- }
- }
- duplicatesMap.Finish();
- TDeduplicationArchiveWriter w(duplicatesMap, out, !doNotZip);
- for (const auto& rec: recs) {
- rec.Recurse(w);
+ TDuplicatesMap duplicatesMap;
+ if (deduplicate) {
+ for (const auto& rec: recs) {
+ rec.Recurse(duplicatesMap);
+ }
}
+ duplicatesMap.Finish();
+ TDeduplicationArchiveWriter w(duplicatesMap, out, !doNotZip);
+ for (const auto& rec: recs) {
+ rec.Recurse(w);
+ }
w.Finish();
}
diff --git a/tools/archiver/tests/directory/file3 b/tools/archiver/tests/directory/file3
index e2129701f1..ac9b5ed12b 100644
--- a/tools/archiver/tests/directory/file3
+++ b/tools/archiver/tests/directory/file3
@@ -1 +1 @@
-file1
+file1
diff --git a/tools/archiver/tests/test.py b/tools/archiver/tests/test.py
index b92d58f6a9..fbd02a7b2e 100644
--- a/tools/archiver/tests/test.py
+++ b/tools/archiver/tests/test.py
@@ -33,36 +33,36 @@ class TestArchiver(object):
stderr=None,
)
archive_list = sorted(open('result').read().strip().split('\n'))
- assert len(archive_list) == 3
+ assert len(archive_list) == 3
assert archive_list[0] == 'file1'
assert archive_list[1] == 'file2'
- assert archive_list[2] == 'file3'
-
- def test_deduplicate(self):
- assert 'archiver' == os.path.basename(self.archiver_path)
- assert os.path.exists(self.archiver_path)
- contents = ytc.source_path("tools/archiver/tests/directory")
- ytc.execute(
- command=[
- self.archiver_path,
- "--output", "result_dedup",
- "--recursive",
- "--deduplicate",
- "--plain",
- contents,
- ]
- )
- ytc.execute(
- command=[
- self.archiver_path,
- "--output", "result_no_dedup",
- "--recursive",
- "--plain",
- contents,
- ]
- )
- with open('result_dedup', 'rb') as f_dedup, open('result_no_dedup', 'rb') as f_no_dedup:
- archive_dedup = f_dedup.read()
- archive_no_dedup = f_no_dedup.read()
+ assert archive_list[2] == 'file3'
+
+ def test_deduplicate(self):
+ assert 'archiver' == os.path.basename(self.archiver_path)
+ assert os.path.exists(self.archiver_path)
+ contents = ytc.source_path("tools/archiver/tests/directory")
+ ytc.execute(
+ command=[
+ self.archiver_path,
+ "--output", "result_dedup",
+ "--recursive",
+ "--deduplicate",
+ "--plain",
+ contents,
+ ]
+ )
+ ytc.execute(
+ command=[
+ self.archiver_path,
+ "--output", "result_no_dedup",
+ "--recursive",
+ "--plain",
+ contents,
+ ]
+ )
+ with open('result_dedup', 'rb') as f_dedup, open('result_no_dedup', 'rb') as f_no_dedup:
+ archive_dedup = f_dedup.read()
+ archive_no_dedup = f_no_dedup.read()
assert len(archive_dedup) == 58
assert len(archive_no_dedup) == 75