#include <library/cpp/archive/yarchive.h>
#include <library/cpp/deprecated/mapped_file/mapped_file.h>
#include <library/cpp/digest/md5/md5.h>
#include <library/cpp/getopt/small/last_getopt.h>

#include <util/folder/dirut.h>
#include <util/folder/filelist.h>
#include <util/folder/path.h>
#include <util/generic/vector.h>
#include <util/generic/yexception.h>
#include <util/memory/blob.h>
#include <util/stream/file.h>
#include <util/string/cast.h>
#include <util/string/escape.h>
#include <util/string/hex.h>
#include <util/string/subst.h>
#include <util/system/filemap.h>

#include <cstring>

namespace {
    class TStringArrayOutput: public IOutputStream {
    public:
        TStringArrayOutput(IOutputStream* slave, size_t stride)
            : Slave(*slave)
            , Stride(stride)
        {
            Buf.reserve(stride);
        }
        void DoFinish() override {
            WriteBuf();
            Flush();
        }
        void DoWrite(const void* data, size_t len) override {
            for (const char* p = (const char*)data; len > 0; ++p, --len) {
                Buf.append(*p);
                if (Buf.size() == Stride)
                    WriteBuf();
            }
        }

    private:
        void WriteBuf() {
            Slave << '"' << Buf << "\",\n"sv;
            Buf.clear();
        }

    private:
        IOutputStream& Slave;
        const size_t Stride;
        TString Buf;
    };

    class THexOutput: public IOutputStream {
    public:
        inline THexOutput(IOutputStream* slave)
            : Slave_(slave)
        {
        }

        ~THexOutput() override {
        }

        inline IOutputStream* Slave() const noexcept {
            return Slave_;
        }

    private:
        void DoFinish() override {
            Slave_->Write('\n');
            Slave_->Flush();
        }

        void DoWrite(const void* data, size_t len) override {
            const char* b = (const char*)data;

            while (len) {
                const unsigned char c = *b;
                char buf[12];
                char* tmp = buf;

                if (Count_ % Columns == 0) {
                    *tmp++ = ' ';
                    *tmp++ = ' ';
                    *tmp++ = ' ';
                    *tmp++ = ' ';
                }

                if (Count_ && Count_ % Columns != 0) {
                    *tmp++ = ',';
                    *tmp++ = ' ';
                }

                *tmp++ = '0';
                *tmp++ = 'x';
                tmp = HexEncode(&c, 1, tmp);

                if ((Count_ % Columns) == (Columns - 1)) {
                    *tmp++ = ',';
                    *tmp++ = '\n';
                }

                Slave_->Write(buf, tmp - buf);

                --len;
                ++b;
                ++Count_;
            }
        }

    private:
        // width in source chars
        static const size_t Columns = 10;
        ui64 Count_ = 0;
        IOutputStream* Slave_ = nullptr;
    };

    struct TYasmOutput: public IOutputStream {
        inline TYasmOutput(IOutputStream* out, const TString& base)
            : Out_(out)
            , Base_(base)
        {
            *Out_ << "global " << Base_ << "\n";
            *Out_ << "global " << Base_ << "Size\n\nSECTION .rodata\n\n";
            *Out_ << Base_ << ":\n";
        }

        ~TYasmOutput() override {
        }

        void DoFinish() override {
            *Out_ << Base_ << "Size:\ndd " << Count_ << '\n';

            *Out_ << "%ifidn __OUTPUT_FORMAT__,elf64\n";
            *Out_ << "size " << Base_ << " " << Count_ << "\n";
            *Out_ << "size " << Base_ << "Size 4\n";
            *Out_ << "%endif\n";
        }

        void DoWrite(const void* data, size_t len) override {
            Count_ += len;

            const unsigned char* p = (const unsigned char*)data;

            while (len) {
                const size_t step = Min<size_t>(len, 100);

                *Out_ << "db " << (int)*p++;

                for (size_t i = 1; i < step; ++i) {
                    *Out_ << ',' << (int)*p++;
                }

                *Out_ << '\n';

                len -= step;
            }
        }

        IOutputStream* Out_ = nullptr;
        const TString Base_;
        ui64 Count_ = 0;
    };

    struct TCOutput: public THexOutput {
        inline TCOutput(IOutputStream* out, const TString& base)
            : THexOutput(out)
            , B(base)
        {
            *Slave() << "static_assert(sizeof(unsigned int) == 4, \"ups, unsupported platform\");\n\nextern \"C\" {\nextern const unsigned char " << B << "[] = {\n";
        }

        ~TCOutput() override {
        }

        void DoFinish() override {
            *Slave() << "\n};\nextern const unsigned int " << B << "Size = sizeof(" << B << ") / sizeof(" << B << "[0]);\n}\n";
        }

        const TString B;
    };

    struct TCStringOutput: public IOutputStream {
        inline TCStringOutput(IOutputStream* out, const TString& base)
            : O(out)
            , B(base)
        {
            *O << "static_assert(sizeof(unsigned int) == 4, \"ups, unsupported platform\");\n\nextern \"C\" {\nextern const unsigned char " << B << "[] = \n";
        }

        ~TCStringOutput() override {
        }

        void DoWrite(const void* data, size_t len) override {
            *O << TString((const char*)data, len).Quote() << '\n';
        }

        void DoFinish() override {
            //*O << ";\nextern const unsigned char* " << B << " = (const unsigned char*)" << B << "Array;\n";
            *O << ";\nextern const unsigned int " << B << "Size = sizeof(" << B << ") / sizeof(" << B << "[0]) - 1;\n}\n";
        }

        IOutputStream* O = nullptr;
        const TString B;
    };

    struct TMyFileComparator {
        bool operator()(const TString& fname1, const TString& fname2) const {
            if (fname1 == fname2) {
                return false;
            }
            if (const auto* savedResultPtr = SavedResults.FindPtr(std::make_pair(fname1, fname2))) {
                return *savedResultPtr < 0;
            }
            TMemoryMap mmap1(fname1, TMemoryMap::oRdOnly);
            TMemoryMap mmap2(fname2, TMemoryMap::oRdOnly);
            mmap1.SetSequential();
            mmap2.SetSequential();
            Y_ASSERT(mmap1.Length() == mmap2.Length());
            TMemoryMap::TMapResult mapResult1 = mmap1.Map(0, mmap1.Length());
            TMemoryMap::TMapResult mapResult2 = mmap2.Map(0, mmap2.Length());
            Y_ASSERT(mapResult1.MappedSize() == mapResult2.MappedSize());
            int res = memcmp(mapResult1.MappedData(), mapResult2.MappedData(), mapResult1.MappedSize());
            mmap1.Unmap(mapResult1);
            mmap2.Unmap(mapResult2);
            SavedResults[std::make_pair(fname1, fname2)] = res;
            SavedResults[std::make_pair(fname2, fname1)] = -res;
            return res < 0;
        }

        mutable THashMap<std::pair<TString, TString>, int> SavedResults;
    };

    struct TDuplicatesMap {
        void Add(const TString& fname, const TString& rname) {
            Y_ENSURE(!InitialFillingDone);
            FileNames.push_back(fname);
            FileNameToRecordName[fname] = rname;
        }

        void Finish() {
            Y_ENSURE(!InitialFillingDone);
            InitialFillingDone = true;
            TMap<i64, TVector<TString>> bySize;
            for (const TString& fname: FileNames) {
                TFile file(fname, OpenExisting | RdOnly);
                bySize[file.GetLength()].push_back(fname);
            }
            for (const auto& bySizeElement: bySize) {
                if (bySizeElement.second.size() > 1) {
                    TMap<TString, TVector<TString>, TMyFileComparator> byContents;
                    for (const TString& fname: bySizeElement.second) {
                        byContents[fname].push_back(fname);
                    }
                    for (const auto& byContentsElement: byContents) {
                        if (byContentsElement.second.size() > 1) {
                            const TString& rootName = byContentsElement.second.front();
                            const TString& rootRecordName = FileNameToRecordName[rootName];
                            for (const TString& fname: byContentsElement.second) {
                                if (fname != rootName) {
                                    Synonyms[FileNameToRecordName[fname]] = rootRecordName;
                                }
                            }
                        }
                    }
                }
            }
            FileNames.clear();
            FileNameToRecordName.clear();
        }

        bool InitialFillingDone = false;
        TVector<TString> FileNames;
        THashMap<TString, TString> FileNameToRecordName;
        THashMap<TString, TString> Synonyms;
    };

    struct TDeduplicationArchiveWriter {
        TDeduplicationArchiveWriter(const TDuplicatesMap& duplicatesMap, IOutputStream* out, bool compress)
            : DuplicatesMap(duplicatesMap)
            , Writer(out, compress)
        {}

        void Finish() {
            Writer.Finish();
        }

        const TDuplicatesMap& DuplicatesMap;
        TArchiveWriter Writer;
    };
}

static inline TAutoPtr<IOutputStream> OpenOutput(const TString& url) {
    if (url.empty()) {
        return new TBuffered<TUnbufferedFileOutput>(8192, Duplicate(1));
    } else {
        return new TBuffered<TUnbufferedFileOutput>(8192, url);
    }
}

static inline bool IsDelim(char ch) noexcept {
    return ch == '/' || ch == '\\';
}

static inline TString GetFile(const TString& s) {
    const char* e = s.end();
    const char* b = s.begin();
    const char* c = e - 1;

    while (c != b && !IsDelim(*c)) {
        --c;
    }

    if (c != e && IsDelim(*c)) {
        ++c;
    }

    return TString(c, e - c);
}

static inline TString Fix(TString f) {
    if (!f.empty() && IsDelim(f[f.size() - 1])) {
        f.pop_back();
    }

    return f;
}

static bool Quiet = false;

static inline void Append(IOutputStream& w, const TString& fname, const TString& rname) {
    TMappedFileInput in(fname);

    if (!Quiet) {
        Cerr << "--> " << rname << Endl;
    }

    TransferData((IInputStream*)&in, &w);
}

static inline void Append(TDuplicatesMap& w, const TString& fname, const TString& rname) {
    w.Add(fname, rname);
}

static inline void Append(TDeduplicationArchiveWriter& w, const TString& fname, const TString& rname) {
    if (!Quiet) {
        Cerr << "--> " << rname << Endl;
    }

    if (const TString* rootRecordName = w.DuplicatesMap.Synonyms.FindPtr(rname)) {
        w.Writer.AddSynonym(*rootRecordName, rname);
    } else {
        TMappedFileInput in(fname);
        w.Writer.Add(rname, &in);
    }
}

namespace {
    struct TRec {
        bool Recursive = false;
        TString Key;
        TString Path;
        TString Prefix;

        TRec() = default;

        inline void Fix() {
            ::Fix(Path);
            ::Fix(Prefix);
        }

        template <typename T>
        inline void Recurse(T& w) const {
            if (IsDir(Path)) {
                DoRecurse(w, "/");
            } else {
                Append(w, Path, Key.size() ? Key : Prefix + "/" + GetFile(Path));
            }
        }

        template <typename T>
        inline void DoRecurse(T& w, const TString& off) const {
            {
                TFileList fl;

                const char* name;
                const TString p = Path + off;

                fl.Fill(p, true);

                while ((name = fl.Next())) {
                    const TString fname = p + name;
                    const TString rname = Prefix + off + name;

                    Append(w, fname, rname);
                }
            }

            if (Recursive) {
                TDirsList dl;

                const char* name;
                const TString p = Path + off;

                dl.Fill(p, true);

                while ((name = dl.Next())) {
                    if (strcmp(name, ".") && strcmp(name, "..")) {
                        DoRecurse(w, off + name + "/");
                    }
                }
            }
        }
    };
}

static TString CutFirstSlash(const TString& fileName) {
    if (fileName[0] == '/') {
        return fileName.substr(1);
    } else {
        return fileName;
    }
}

struct TMappingReader {
    TMemoryMap Map;
    TBlob Blob;
    TArchiveReader Reader;

    TMappingReader(const TString& archive)
        : Map(archive)
        , Blob(TBlob::FromMemoryMapSingleThreaded(Map, 0, Map.Length()))
        , Reader(Blob)
    {
    }
};

static void UnpackArchive(const TString& archive, const TFsPath& dir = TFsPath()) {
    TMappingReader mappingReader(archive);
    const TArchiveReader& reader = mappingReader.Reader;
    const size_t count = reader.Count();
    for (size_t i = 0; i < count; ++i) {
        const TString key = reader.KeyByIndex(i);
        const TString fileName = CutFirstSlash(key);
        if (!Quiet) {
            Cerr << archive << " --> " << fileName << Endl;
        }
        const TFsPath path(dir / fileName);
        path.Parent().MkDirs();
        TAutoPtr<IInputStream> in = reader.ObjectByKey(key);
        TFixedBufferFileOutput out(path);
        TransferData(in.Get(), &out);
        out.Finish();
    }
}

static void ListArchive(const TString& archive, bool cutSlash) {
    TMappingReader mappingReader(archive);
    const TArchiveReader& reader = mappingReader.Reader;
    const size_t count = reader.Count();
    for (size_t i = 0; i < count; ++i) {
        const TString key = reader.KeyByIndex(i);
        TString fileName = key;
        if (cutSlash) {
            fileName = CutFirstSlash(key);
        }
        Cout << fileName << Endl;
    }
}

static void ListArchiveMd5(const TString& archive, bool cutSlash) {
    TMappingReader mappingReader(archive);
    const TArchiveReader& reader = mappingReader.Reader;
    const size_t count = reader.Count();
    for (size_t i = 0; i < count; ++i) {
        const TString key = reader.KeyByIndex(i);
        TString fileName = key;
        if (cutSlash) {
            fileName = CutFirstSlash(key);
        }
        char md5buf[33];
        Cout << fileName << '\t' << MD5::Stream(reader.ObjectByKey(key).Get(), md5buf) << Endl;
    }
}

int main(int argc, char** argv) {
    NLastGetopt::TOpts opts;
    opts.AddHelpOption('?');
    opts.SetTitle(
        "Archiver\n"
        "Docs: https://wiki.yandex-team.ru/Development/Poisk/arcadia/tools/archiver"
    );

    bool hexdump = false;
    opts.AddLongOption('x', "hexdump", "Produce hexdump")
        .NoArgument()
        .Optional()
        .StoreValue(&hexdump, true);

    size_t stride = 0;
    opts.AddLongOption('s', "segments", "Produce segmented C strings array of given size")
        .RequiredArgument("<size>")
        .Optional()
        .DefaultValue("0")
        .StoreResult(&stride);

    bool cat = false;
    opts.AddLongOption('c', "cat", "Do not store keys (file names), just cat uncompressed files")
        .NoArgument()
        .Optional()
        .StoreValue(&cat, true);

    bool doNotZip = false;
    opts.AddLongOption('p', "plain", "Do not use compression")
        .NoArgument()
        .Optional()
        .StoreValue(&doNotZip, true);

    bool deduplicate = false;
    opts.AddLongOption("deduplicate", "Turn on file-wise deduplication")
        .NoArgument()
        .Optional()
        .StoreValue(&deduplicate, true);

    bool unpack = false;
    opts.AddLongOption('u', "unpack", "Unpack archive into current directory")
        .NoArgument()
        .Optional()
        .StoreValue(&unpack, true);

    bool list = false;
    opts.AddLongOption('l', "list", "List files in archive")
        .NoArgument()
        .Optional()
        .StoreValue(&list, true);

    bool cutSlash = true;
    opts.AddLongOption("as-is", "somewhy slash is cutted by default in list; with this option key will be shown as-is")
        .NoArgument()
        .Optional()
        .StoreValue(&cutSlash, false);

    bool listMd5 = false;
    opts.AddLongOption('m', "md5", "List files in archive with MD5 sums")
        .NoArgument()
        .Optional()
        .StoreValue(&listMd5, true);

    bool recursive = false;
    opts.AddLongOption('r', "recursive", "Read all files under each directory, recursively")
        .NoArgument()
        .Optional()
        .StoreValue(&recursive, true);

    Quiet = false;
    opts.AddLongOption('q', "quiet", "Do not output progress to stderr")
        .NoArgument()
        .Optional()
        .StoreValue(&Quiet, true);

    TString prepend;
    opts.AddLongOption('z', "prepend", "Prepend string to output")
        .RequiredArgument("<prefix>")
        .StoreResult(&prepend);

    TString append;
    opts.AddLongOption('a', "append", "Append string to output")
        .RequiredArgument("<suffix>")
        .StoreResult(&append);

    TString outputf;
    opts.AddLongOption('o', "output", "Output to file instead stdout")
        .RequiredArgument("<file>")
        .StoreResult(&outputf);

    TString unpackDir;
    opts.AddLongOption('d', "unpackdir", "Unpack destination directory")
        .RequiredArgument("<dir>")
        .DefaultValue(".")
        .StoreResult(&unpackDir);

    TString yasmBase;
    opts.AddLongOption('A', "yasm", "Output dump is yasm format")
        .RequiredArgument("<base>")
        .StoreResult(&yasmBase);

    TString cppBase;
    opts.AddLongOption('C', "cpp", "Output dump is C/C++ format")
        .RequiredArgument("<base>")
        .StoreResult(&cppBase);

    TString forceKeys;
    opts.AddLongOption('k', "keys", "Set explicit list of keys for elements")
        .RequiredArgument("<keys>")
        .StoreResult(&forceKeys);

    opts.SetFreeArgDefaultTitle("<file>");
    opts.SetFreeArgsMin(1);
    NLastGetopt::TOptsParseResult optsRes(&opts, argc, argv);

    SubstGlobal(append, "\\n", "\n");
    SubstGlobal(prepend, "\\n", "\n");

    TVector<TRec> recs;
    const auto& files = optsRes.GetFreeArgs();

    TVector<TStringBuf> keys;
    if (forceKeys.size())
        StringSplitter(forceKeys).Split(':').SkipEmpty().Collect(&keys);

    if (keys.size() && keys.size() != files.size()) {
        Cerr << "Invalid number of keys=" << keys.size() << " (!= number of files=" << files.size() << ")" << Endl;
        return 1;
    }

    for (size_t i = 0; i < files.size(); ++i) {
        const auto& path = files[i];
        size_t off = 0;
#ifdef _win_
        if (path[0] > 0 && isalpha(path[0]) && path[1] == ':')
            off = 2; // skip drive letter ("d:")
#endif               // _win_
        const size_t pos = path.find(':', off);
        TRec cur;
        cur.Path = path.substr(0, pos);
        if (pos != TString::npos)
            cur.Prefix = path.substr(pos + 1);
        if (keys.size())
            cur.Key = keys[i];
        cur.Recursive = recursive;
        cur.Fix();
        recs.push_back(cur);
    }

    try {
        if (listMd5) {
            for (const auto& rec: recs) {
                ListArchiveMd5(rec.Path, cutSlash);
            }
        } else if (list) {
            for (const auto& rec: recs) {
                ListArchive(rec.Path, cutSlash);
            }
        } else if (unpack) {
            const TFsPath dir(unpackDir);
            for (const auto& rec: recs) {
                UnpackArchive(rec.Path, dir);
            }
        } else {
            TAutoPtr<IOutputStream> outf(OpenOutput(outputf));
            IOutputStream* out = outf.Get();
            THolder<IOutputStream> hexout;

            if (hexdump) {
                hexout.Reset(new THexOutput(out));
                out = hexout.Get();
            } else if (stride) {
                hexout.Reset(new TStringArrayOutput(out, stride));
                out = hexout.Get();
            } else if (yasmBase) {
                hexout.Reset(new TYasmOutput(out, yasmBase));
                out = hexout.Get();
            } else if (cppBase) {
                hexout.Reset(new TCStringOutput(out, cppBase));
                out = hexout.Get();
            }

            outf->Write(prepend.data(), prepend.size());

            if (cat) {
                for (const auto& rec: recs) {
                    rec.Recurse(*out);
                }
            } else {
                TDuplicatesMap duplicatesMap;
                if (deduplicate) {
                    for (const auto& rec: recs) {
                        rec.Recurse(duplicatesMap);
                    }
                }
                duplicatesMap.Finish();
                TDeduplicationArchiveWriter w(duplicatesMap, out, !doNotZip);
                for (const auto& rec: recs) {
                    rec.Recurse(w);
                }
                w.Finish();
            }

            try {
                out->Finish();
            } catch (...) {
            }

            outf->Write(append.data(), append.size());
        }
    } catch (...) {
        Cerr << CurrentExceptionMessage() << Endl;
        return 1;
    }

    return 0;
}