diff options
| author | Anton Samokhvalov <[email protected]> | 2022-02-10 16:45:17 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:45:17 +0300 | 
| commit | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch) | |
| tree | dd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/comptable | |
| parent | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff) | |
Restoring authorship annotation for Anton Samokhvalov <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/comptable')
| -rw-r--r-- | library/cpp/comptable/comptable.cpp | 730 | ||||
| -rw-r--r-- | library/cpp/comptable/comptable.h | 94 | ||||
| -rw-r--r-- | library/cpp/comptable/usage/usage.cpp | 30 | ||||
| -rw-r--r-- | library/cpp/comptable/ut/comptable_ut.cpp | 28 | 
4 files changed, 441 insertions, 441 deletions
diff --git a/library/cpp/comptable/comptable.cpp b/library/cpp/comptable/comptable.cpp index 8e87da30604..8a92d4d1aa6 100644 --- a/library/cpp/comptable/comptable.cpp +++ b/library/cpp/comptable/comptable.cpp @@ -11,433 +11,433 @@  #include <cstdlib>  namespace NCompTable { -    static const ui32 magicHashMul = 0xd077cd1f;  -    static const size_t hashSizeLog = 18;  -    static const size_t hashSize = 1 << hashSizeLog;  +    static const ui32 magicHashMul = 0xd077cd1f; +    static const size_t hashSizeLog = 18; +    static const size_t hashSize = 1 << hashSizeLog; -    size_t HashIndex(ui32 value, ui32 hashMul) {  -        return (value * hashMul) >> (32 - hashSizeLog);  -    }  +    size_t HashIndex(ui32 value, ui32 hashMul) { +        return (value * hashMul) >> (32 - hashSizeLog); +    } -    void TCompressorTable::GetHuffCode(const NCompProto::TCoderEntry& entry, ui32 value, ui64& bitCode, ui8& bitLength) const {  -        ui64 code = value - entry.MinValue;  -        bitCode = (code << entry.PrefixBits) | entry.Prefix;  -        bitLength = entry.AllBits;  -    }  -    void TCompressorTable::GetLastHuffCode(ui32 value, ui64& bitCode, ui8& bitLength) const {  -        GetHuffCode(HuffCodes[9], value, bitCode, bitLength);  -    }  -    void TCompressorTable::GetHuffCode(ui32 value, ui64& bitCode, ui8& bitLength) const {  -        for (auto huffCode : HuffCodes) {  -            ui32 minValue = huffCode.MinValue;  -            if (minValue <= value && value < huffCode.MaxValue()) {  -                GetHuffCode(huffCode, value, bitCode, bitLength);  -                return;  -            }  +    void TCompressorTable::GetHuffCode(const NCompProto::TCoderEntry& entry, ui32 value, ui64& bitCode, ui8& bitLength) const { +        ui64 code = value - entry.MinValue; +        bitCode = (code << entry.PrefixBits) | entry.Prefix; +        bitLength = entry.AllBits; +    } +    void TCompressorTable::GetLastHuffCode(ui32 value, ui64& bitCode, ui8& bitLength) const { +        GetHuffCode(HuffCodes[9], value, bitCode, bitLength); +    } +    void TCompressorTable::GetHuffCode(ui32 value, ui64& bitCode, ui8& bitLength) const { +        for (auto huffCode : HuffCodes) { +            ui32 minValue = huffCode.MinValue; +            if (minValue <= value && value < huffCode.MaxValue()) { +                GetHuffCode(huffCode, value, bitCode, bitLength); +                return; +            }          } -        abort();  +        abort();      } -    ui8 TCompressorTable::GetHuffIndex(ui8 prefix) {  -        for (size_t i = 0; i < 10; ++i) {  -            if ((prefix & ((1 << HuffCodes[i].PrefixBits) - 1)) == HuffCodes[i].Prefix) {  -                return i;  -            }  +    ui8 TCompressorTable::GetHuffIndex(ui8 prefix) { +        for (size_t i = 0; i < 10; ++i) { +            if ((prefix & ((1 << HuffCodes[i].PrefixBits) - 1)) == HuffCodes[i].Prefix) { +                return i; +            }          } -        abort();  -        return 0;  +        abort(); +        return 0;      } -    void TCompressorTable::BuildHuffCodes(i64 totalFreq, i64 freqs[65536]) {  -        i64 add = 1;  -        while (1) {  -            if (BuildHuffCodes(totalFreq, freqs, add)) {  -                return;  -            }  -            add = add * 2;  +    void TCompressorTable::BuildHuffCodes(i64 totalFreq, i64 freqs[65536]) { +        i64 add = 1; +        while (1) { +            if (BuildHuffCodes(totalFreq, freqs, add)) { +                return; +            } +            add = add * 2;          }      } -    bool TCompressorTable::BuildHuffCodes(i64 totalFreq, i64 freqs[65536], i64 add) {  -        TVector<NCompProto::TCode> codes;  -        size_t bits[] = {0, 1, 2, 4, 8, 10, 12, 14, 16, 32};  -        size_t off = 0;  -        ui32 total = totalFreq;  -        for (size_t i = 0; i < 9; ++i) {  -            size_t size = 1 << bits[i];  -            ui32 weight = 0;  -            for (size_t j = off; j < off + size && j < 65536; ++j) {  -                weight += freqs[j];  -            }  -            codes.push_back(NCompProto::TCode(weight + add, ui32(off), bits[i]));  -            total = total > weight ? total - weight : 0;  -            off += size;  +    bool TCompressorTable::BuildHuffCodes(i64 totalFreq, i64 freqs[65536], i64 add) { +        TVector<NCompProto::TCode> codes; +        size_t bits[] = {0, 1, 2, 4, 8, 10, 12, 14, 16, 32}; +        size_t off = 0; +        ui32 total = totalFreq; +        for (size_t i = 0; i < 9; ++i) { +            size_t size = 1 << bits[i]; +            ui32 weight = 0; +            for (size_t j = off; j < off + size && j < 65536; ++j) { +                weight += freqs[j]; +            } +            codes.push_back(NCompProto::TCode(weight + add, ui32(off), bits[i])); +            total = total > weight ? total - weight : 0; +            off += size;          } -        codes.push_back(NCompProto::TCode(total + add, 0, 32));  -        i64 ret = NCompProto::BuildHuff(codes);  -        Y_UNUSED(ret);  -        for (size_t i = 0; i < codes.size(); ++i) {  -            NCompProto::TCoderEntry& code = HuffCodes[i];  -            code.MinValue = codes[i].Start;  -            code.Prefix = codes[i].Prefix;  -            code.PrefixBits = codes[i].PrefLength;  -            code.AllBits = code.PrefixBits + codes[i].Bits;  -            if (code.PrefixBits > 8) {  -                return false;  -            }  +        codes.push_back(NCompProto::TCode(total + add, 0, 32)); +        i64 ret = NCompProto::BuildHuff(codes); +        Y_UNUSED(ret); +        for (size_t i = 0; i < codes.size(); ++i) { +            NCompProto::TCoderEntry& code = HuffCodes[i]; +            code.MinValue = codes[i].Start; +            code.Prefix = codes[i].Prefix; +            code.PrefixBits = codes[i].PrefLength; +            code.AllBits = code.PrefixBits + codes[i].Bits; +            if (code.PrefixBits > 8) { +                return false; +            } +        } +        for (size_t i = 0; i < 256; ++i) { +            HuffIndices[i] = GetHuffIndex(ui8(i));          } -        for (size_t i = 0; i < 256; ++i) {  -            HuffIndices[i] = GetHuffIndex(ui8(i));  -        }  -        return true;  +        return true;      } -    template <class TIterator>  -    void Iterate(const TStringBuf& data, TIterator& iterator) {  -        size_t i = 0;  -        iterator.Visit(ui32(data.size()));  -        for (; i + 3 < data.size(); i += 4) {  +    template <class TIterator> +    void Iterate(const TStringBuf& data, TIterator& iterator) { +        size_t i = 0; +        iterator.Visit(ui32(data.size())); +        for (; i + 3 < data.size(); i += 4) {              iterator.Visit(reinterpret_cast<const ui32*>(data.data() + i)[0]); -        }  -        if (i != data.size()) {  -            ui32 buffer[1] = {0};  +        } +        if (i != data.size()) { +            ui32 buffer[1] = {0};              memcpy(buffer, data.data() + i, data.size() - i); -            iterator.Visit(buffer[0]);  -        }  +            iterator.Visit(buffer[0]); +        }      } -    class TDataCompressor {  -        ui32 Keys[hashSize];  -        ui32 Vals[hashSize];  -        ui64 BitCodes[hashSize];  -        ui8 BitLengths[hashSize];  -        ui32 HashMul;  -        const TCompressorTable Table;  -  -    public:  -        TDataCompressor(const TCompressorTable& table)  -            : Table(table)  -        {  -            HashMul = table.HashMul;  -            for (size_t i = 0; i < hashSize; ++i) {  -                Keys[i] = 0;  -                Vals[i] = ui32(-1);  -            }  -            for (size_t i = 0; i < 65536; ++i) {  -                size_t index = HashIndex(table.Table[i], table.HashMul);  -                Keys[index] = table.Table[i];  -                Vals[index] = i;  -                table.GetHuffCode(ui32(i), BitCodes[index], BitLengths[index]);  -            }  +    class TDataCompressor { +        ui32 Keys[hashSize]; +        ui32 Vals[hashSize]; +        ui64 BitCodes[hashSize]; +        ui8 BitLengths[hashSize]; +        ui32 HashMul; +        const TCompressorTable Table; + +    public: +        TDataCompressor(const TCompressorTable& table) +            : Table(table) +        { +            HashMul = table.HashMul; +            for (size_t i = 0; i < hashSize; ++i) { +                Keys[i] = 0; +                Vals[i] = ui32(-1); +            } +            for (size_t i = 0; i < 65536; ++i) { +                size_t index = HashIndex(table.Table[i], table.HashMul); +                Keys[index] = table.Table[i]; +                Vals[index] = i; +                table.GetHuffCode(ui32(i), BitCodes[index], BitLengths[index]); +            }          } -        void GetStat(ui32 val, ui32& stat) const {  -            size_t index = HashIndex(val, HashMul);  -            if (Keys[index] == val) {  -                stat = Vals[index];  -            } else {  -                stat = ui32(-1);  -            }  +        void GetStat(ui32 val, ui32& stat) const { +            size_t index = HashIndex(val, HashMul); +            if (Keys[index] == val) { +                stat = Vals[index]; +            } else { +                stat = ui32(-1); +            }          } -        void GetStat(ui32 val, ui64& bitCode, ui8& bitLength) const {  -            size_t index = HashIndex(val, HashMul);  -            if (Keys[index] == val) {  -                bitCode = BitCodes[index];  -                bitLength = BitLengths[index];  -            } else {  -                Table.GetLastHuffCode(val, bitCode, bitLength);  -            }  +        void GetStat(ui32 val, ui64& bitCode, ui8& bitLength) const { +            size_t index = HashIndex(val, HashMul); +            if (Keys[index] == val) { +                bitCode = BitCodes[index]; +                bitLength = BitLengths[index]; +            } else { +                Table.GetLastHuffCode(val, bitCode, bitLength); +            }          } -        size_t Compress4(const ui32* data4, ui8* dataBuff) const {  -            ui8* oldBuff = dataBuff;  -            ++dataBuff;  -            ui32 status = 0;  -            for (size_t i = 0; i < 4; ++i) {  -                status = (status << 2);  -                ui32 data = data4[i];  -                if (data == 0) {  -                    continue;  -                }  -                ui32 stat;  -                GetStat(data, stat);  -                if (stat < 256) {  -                    memcpy(dataBuff, &stat, 1);  -                    dataBuff += 1;  -                    status += 1;  -                } else if (stat < 65536) {  -                    memcpy(dataBuff, &stat, 2);  -                    dataBuff += 2;  -                    status += 2;  -                } else {  -                    memcpy(dataBuff, &data, 4);  -                    dataBuff += 4;  -                    status += 3;  -                }  -            }  -            oldBuff[0] = ui8(status);  -            return dataBuff - oldBuff;  +        size_t Compress4(const ui32* data4, ui8* dataBuff) const { +            ui8* oldBuff = dataBuff; +            ++dataBuff; +            ui32 status = 0; +            for (size_t i = 0; i < 4; ++i) { +                status = (status << 2); +                ui32 data = data4[i]; +                if (data == 0) { +                    continue; +                } +                ui32 stat; +                GetStat(data, stat); +                if (stat < 256) { +                    memcpy(dataBuff, &stat, 1); +                    dataBuff += 1; +                    status += 1; +                } else if (stat < 65536) { +                    memcpy(dataBuff, &stat, 2); +                    dataBuff += 2; +                    status += 2; +                } else { +                    memcpy(dataBuff, &data, 4); +                    dataBuff += 4; +                    status += 3; +                } +            } +            oldBuff[0] = ui8(status); +            return dataBuff - oldBuff;          } -        struct TCompressorIterator {  -            TVector<char>& Result;  -            const TDataCompressor& Compressor;  -            ui32 Cached[4];  -            size_t Index;  -            size_t RealSize;  -            TCompressorIterator(TVector<char>& result, const TDataCompressor& compressor)  -                : Result(result)  -                , Compressor(compressor)  -                , Index(0)  -                , RealSize(0)  -            {  +        struct TCompressorIterator { +            TVector<char>& Result; +            const TDataCompressor& Compressor; +            ui32 Cached[4]; +            size_t Index; +            size_t RealSize; +            TCompressorIterator(TVector<char>& result, const TDataCompressor& compressor) +                : Result(result) +                , Compressor(compressor) +                , Index(0) +                , RealSize(0) +            { +            } +            void Flush() { +                Result.yresize(RealSize + 32); +                RealSize += Compressor.Compress4(Cached, reinterpret_cast<ui8*>(Result.data()) + RealSize); +                Index = 0; +            } +            void Visit(const ui32 data) { +                Cached[Index] = data; +                ++Index; +                if (Index == 4) { +                    Flush(); +                } +            } +            ~TCompressorIterator() { +                if (Index != 0) { +                    for (size_t i = Index; i < 4; ++i) { +                        Cached[i] = 0; +                    } +                    Flush(); +                } +                Result.yresize(RealSize);              } -            void Flush() {  -                Result.yresize(RealSize + 32);  -                RealSize += Compressor.Compress4(Cached, reinterpret_cast<ui8*>(Result.data()) + RealSize);  -                Index = 0;  -            }  -            void Visit(const ui32 data) {  -                Cached[Index] = data;  -                ++Index;  -                if (Index == 4) {  -                    Flush();  -                }  -            }  -            ~TCompressorIterator() {  -                if (Index != 0) {  -                    for (size_t i = Index; i < 4; ++i) {  -                        Cached[i] = 0;  -                    }  -                    Flush();  -                }  -                Result.yresize(RealSize);  -            }  -        };  -        struct THQCompressorIterator {  -            TVector<char>& Result;  -            const TDataCompressor& Compressor;  -            size_t RealSize;  -            ui64 Offset;  -            THQCompressorIterator(TVector<char>& result, const TDataCompressor& compressor)  -                : Result(result)  -                , Compressor(compressor)  -                , RealSize(0)  -                , Offset(0)  -            {  -            }  -            void Visit(const ui32 data) {  -                size_t byteOff = Offset >> 3;  -                Result.yresize(byteOff + 32);  -                ui64 bitCode;  -                ui8 bitLength;  -                Compressor.GetStat(data, bitCode, bitLength);  -                ui64 dst;  -                memcpy(&dst, &Result[byteOff], sizeof(dst));  -                ui64 mask = ((1ULL << (Offset & 7)) - 1ULL);  -                dst = (dst & mask) | (bitCode << (Offset & 7));  -                memcpy(&Result[byteOff], &dst, sizeof(dst));  -                Offset += bitLength;  -            }  -            ~THQCompressorIterator() {  -                Result.yresize((Offset + 7) >> 3);  -                if (Offset & 7)  -                    Result.back() &= (1 << (Offset & 7)) - 1;  -            }  -        };  -        template <bool HQ>  -        void Compress(const TStringBuf& stringBuf, TVector<char>& rslt) const {  -            if (!HQ) {  -                TCompressorIterator iterator(rslt, *this);  -                Iterate(stringBuf, iterator);  +        }; +        struct THQCompressorIterator { +            TVector<char>& Result; +            const TDataCompressor& Compressor; +            size_t RealSize; +            ui64 Offset; +            THQCompressorIterator(TVector<char>& result, const TDataCompressor& compressor) +                : Result(result) +                , Compressor(compressor) +                , RealSize(0) +                , Offset(0) +            { +            } +            void Visit(const ui32 data) { +                size_t byteOff = Offset >> 3; +                Result.yresize(byteOff + 32); +                ui64 bitCode; +                ui8 bitLength; +                Compressor.GetStat(data, bitCode, bitLength); +                ui64 dst; +                memcpy(&dst, &Result[byteOff], sizeof(dst)); +                ui64 mask = ((1ULL << (Offset & 7)) - 1ULL); +                dst = (dst & mask) | (bitCode << (Offset & 7)); +                memcpy(&Result[byteOff], &dst, sizeof(dst)); +                Offset += bitLength; +            } +            ~THQCompressorIterator() { +                Result.yresize((Offset + 7) >> 3); +                if (Offset & 7) +                    Result.back() &= (1 << (Offset & 7)) - 1; +            } +        }; +        template <bool HQ> +        void Compress(const TStringBuf& stringBuf, TVector<char>& rslt) const { +            if (!HQ) { +                TCompressorIterator iterator(rslt, *this); +                Iterate(stringBuf, iterator);              } else { -                THQCompressorIterator iterator(rslt, *this);  -                Iterate(stringBuf, iterator);  +                THQCompressorIterator iterator(rslt, *this); +                Iterate(stringBuf, iterator);              }          } -    };  -  -    class TDataDecompressor {  -        const TCompressorTable Table;  -  -    public:  -        TDataDecompressor(const TCompressorTable& table)  -            : Table(table)  +    }; + +    class TDataDecompressor { +        const TCompressorTable Table; + +    public: +        TDataDecompressor(const TCompressorTable& table) +            : Table(table)          {          } -        size_t Decompress(ui32* data4, const ui8* dataBuff, ui32 type) const {  -            if (type == 0) {  -                data4[0] = 0;  -                return 0;  -            }  -            if (type == 1) {  -                ui8 masked;  -                memcpy(&masked, dataBuff, sizeof(masked));  -                data4[0] = Table.Table[masked];  -                return 1;  -            } else if (type == 2) {  -                ui16 masked;  -                memcpy(&masked, dataBuff, sizeof(masked));  -                data4[0] = Table.Table[masked];  -                return 2;  -            } else {  -                memcpy(data4, dataBuff, sizeof(*data4));  -                return 4;  -            }  +        size_t Decompress(ui32* data4, const ui8* dataBuff, ui32 type) const { +            if (type == 0) { +                data4[0] = 0; +                return 0; +            } +            if (type == 1) { +                ui8 masked; +                memcpy(&masked, dataBuff, sizeof(masked)); +                data4[0] = Table.Table[masked]; +                return 1; +            } else if (type == 2) { +                ui16 masked; +                memcpy(&masked, dataBuff, sizeof(masked)); +                data4[0] = Table.Table[masked]; +                return 2; +            } else { +                memcpy(data4, dataBuff, sizeof(*data4)); +                return 4; +            } +        } +        size_t Decompress(ui32* data4, const ui8* dataBuff) const { +            ui32 status = *dataBuff; +            const ui8* oldData = dataBuff; +            ++dataBuff; +            dataBuff += Decompress(data4 + 0, dataBuff, (status >> 6)); +            dataBuff += Decompress(data4 + 1, dataBuff, (status >> 4) & 0x3); +            dataBuff += Decompress(data4 + 2, dataBuff, (status >> 2) & 0x3); +            dataBuff += Decompress(data4 + 3, dataBuff, (status)&0x3); +            return dataBuff - oldData;          } -        size_t Decompress(ui32* data4, const ui8* dataBuff) const {  -            ui32 status = *dataBuff;  -            const ui8* oldData = dataBuff;  -            ++dataBuff;  -            dataBuff += Decompress(data4 + 0, dataBuff, (status >> 6));  -            dataBuff += Decompress(data4 + 1, dataBuff, (status >> 4) & 0x3);  -            dataBuff += Decompress(data4 + 2, dataBuff, (status >> 2) & 0x3);  -            dataBuff += Decompress(data4 + 3, dataBuff, (status)&0x3);  -            return dataBuff - oldData;  -        }  -        void Decompress(ui32* data, const ui8* dataBuff, ui64& offset, ui64 border) const {  -            size_t off = offset >> 3;  -            ui64 val = 0;  -            if (border - off >= sizeof(val)) {  -                memcpy(&val, dataBuff + off, sizeof(val));  -            } else {  -                memcpy(&val, dataBuff + off, border - off);  +        void Decompress(ui32* data, const ui8* dataBuff, ui64& offset, ui64 border) const { +            size_t off = offset >> 3; +            ui64 val = 0; +            if (border - off >= sizeof(val)) { +                memcpy(&val, dataBuff + off, sizeof(val)); +            } else { +                memcpy(&val, dataBuff + off, border - off);              } -            val >>= (offset & 7);  -            ui8 index = Table.HuffIndices[ui8(val)];  -            const NCompProto::TCoderEntry& entry = Table.HuffCodes[index];  -            ui8 allBits = entry.AllBits;  -            val = (val & ((1ULL << allBits) - 1ULL)) >> entry.PrefixBits;  -            val = val + entry.MinValue;  -            data[0] = (index == 9) ? val : Table.Table[val];  -            offset += allBits;  +            val >>= (offset & 7); +            ui8 index = Table.HuffIndices[ui8(val)]; +            const NCompProto::TCoderEntry& entry = Table.HuffCodes[index]; +            ui8 allBits = entry.AllBits; +            val = (val & ((1ULL << allBits) - 1ULL)) >> entry.PrefixBits; +            val = val + entry.MinValue; +            data[0] = (index == 9) ? val : Table.Table[val]; +            offset += allBits;          } -        size_t GetJunkSize() const {  -            return 8;  -        }  -        template <bool HQ>  -        void Decompress(const TStringBuf& dataBuf, TVector<char>& rslt) const {  -            rslt.clear();  -            if (dataBuf.empty()) {  -                return;  -            }  +        size_t GetJunkSize() const { +            return 8; +        } +        template <bool HQ> +        void Decompress(const TStringBuf& dataBuf, TVector<char>& rslt) const { +            rslt.clear(); +            if (dataBuf.empty()) { +                return; +            }              const ui8* src = reinterpret_cast<const ui8*>(dataBuf.data());              ui64 border = dataBuf.size(); -            ui32 len = 0;  -            ui32 nullTerm = 1;  -            if (HQ) {  -                ui64 offset = 0;  -                ui32 length = 0;  -                Decompress(&length, src, offset, border);  -                size_t length32 = (length + 3) / 4;  -                rslt.yresize(length32 * 4 + nullTerm);  -                ui32* result = reinterpret_cast<ui32*>(rslt.data());  -                len = length;  -                for (size_t i = 0; i < length32; ++i) {  -                    Decompress(&result[i], src, offset, border);  +            ui32 len = 0; +            ui32 nullTerm = 1; +            if (HQ) { +                ui64 offset = 0; +                ui32 length = 0; +                Decompress(&length, src, offset, border); +                size_t length32 = (length + 3) / 4; +                rslt.yresize(length32 * 4 + nullTerm); +                ui32* result = reinterpret_cast<ui32*>(rslt.data()); +                len = length; +                for (size_t i = 0; i < length32; ++i) { +                    Decompress(&result[i], src, offset, border); +                } +            } else { +                ui32 data[4]; +                src += Decompress(data, src); +                len = data[0]; +                size_t length32x4 = (4 + len + 15) / 16; +                rslt.yresize(length32x4 * 16 + nullTerm); +                ui32* result = reinterpret_cast<ui32*>(rslt.data()); +                for (size_t i = 0; i < 3; ++i) { +                    result[i] = data[i + 1]; +                } +                for (size_t j = 1; j < length32x4; ++j) { +                    src += Decompress(&result[j * 4 - 1], src);                  } -            } else {  -                ui32 data[4];  -                src += Decompress(data, src);  -                len = data[0];  -                size_t length32x4 = (4 + len + 15) / 16;  -                rslt.yresize(length32x4 * 16 + nullTerm);  -                ui32* result = reinterpret_cast<ui32*>(rslt.data());  -                for (size_t i = 0; i < 3; ++i) {  -                    result[i] = data[i + 1];  -                }  -                for (size_t j = 1; j < length32x4; ++j) {  -                    src += Decompress(&result[j * 4 - 1], src);  -                }               } -            rslt.resize(len);  +            rslt.resize(len);          }      }; -  -    struct TSamplerIterator {  -        TDataSampler& Sampler;  -        TSamplerIterator(TDataSampler& sampler)  -            : Sampler(sampler)  + +    struct TSamplerIterator { +        TDataSampler& Sampler; +        TSamplerIterator(TDataSampler& sampler) +            : Sampler(sampler)          {          }          void Visit(const ui32 data) { -            Sampler.AddStat(data);  +            Sampler.AddStat(data);          } -    };  -  -    struct TGreaterComparer {  -        //std::greater in arcadia???  -        bool operator()(ui64 a, ui64 b) const {  -            return a > b;  +    }; + +    struct TGreaterComparer { +        //std::greater in arcadia??? +        bool operator()(ui64 a, ui64 b) const { +            return a > b;          }      }; -  -    TDataSampler::TDataSampler() {  -        memset(this, 0, sizeof(*this));  + +    TDataSampler::TDataSampler() { +        memset(this, 0, sizeof(*this));      } -    void TDataSampler::BuildTable(TCompressorTable& table) const {  -        std::vector<ui64> sorted;  -        for (size_t i = 0; i < Size; ++i) {  -            ui64 res = (ui64(EntryHit[i]) << 32) + ui32(i);  -            sorted.push_back(res);  +    void TDataSampler::BuildTable(TCompressorTable& table) const { +        std::vector<ui64> sorted; +        for (size_t i = 0; i < Size; ++i) { +            ui64 res = (ui64(EntryHit[i]) << 32) + ui32(i); +            sorted.push_back(res);          } -        std::vector<ui32> entryTbl(Size);  -        std::sort(sorted.begin(), sorted.end(), TGreaterComparer());  -        table.HashMul = magicHashMul;  -        i64 freqs[65536];  -        for (size_t i = 0; i < 65536; ++i) {  -            ui32 ind = ui32(sorted[i]);  -            table.Table[i] = EntryVal[ind];  -            freqs[i] = EntryHit[ind];  +        std::vector<ui32> entryTbl(Size); +        std::sort(sorted.begin(), sorted.end(), TGreaterComparer()); +        table.HashMul = magicHashMul; +        i64 freqs[65536]; +        for (size_t i = 0; i < 65536; ++i) { +            ui32 ind = ui32(sorted[i]); +            table.Table[i] = EntryVal[ind]; +            freqs[i] = EntryHit[ind];          } -        table.BuildHuffCodes(Counter, freqs);  +        table.BuildHuffCodes(Counter, freqs);      } -  -    void TDataSampler::AddStat(ui32 val) {  -        ++Counter;  -        size_t hashInd = HashIndex(val, magicHashMul);  -        if (EntryVal[hashInd] == val) {  -            ++EntryHit[hashInd];  -        } else if (EntryHit[hashInd] > 1) {  -            --EntryHit[hashInd];  + +    void TDataSampler::AddStat(ui32 val) { +        ++Counter; +        size_t hashInd = HashIndex(val, magicHashMul); +        if (EntryVal[hashInd] == val) { +            ++EntryHit[hashInd]; +        } else if (EntryHit[hashInd] > 1) { +            --EntryHit[hashInd];          } else { -            EntryHit[hashInd] = 1;  -            EntryVal[hashInd] = val;  +            EntryHit[hashInd] = 1; +            EntryVal[hashInd] = val;          }      } -  -    void TDataSampler::AddStat(const TStringBuf& stringBuf) {  -        TSamplerIterator iterator(*this);  -        Iterate(stringBuf, iterator);  + +    void TDataSampler::AddStat(const TStringBuf& stringBuf) { +        TSamplerIterator iterator(*this); +        Iterate(stringBuf, iterator);      } -    TChunkCompressor::TChunkCompressor(bool highQuality, const TCompressorTable& table)  -        : HighQuality(highQuality)  +    TChunkCompressor::TChunkCompressor(bool highQuality, const TCompressorTable& table) +        : HighQuality(highQuality)      { -        Compressor.Reset(new TDataCompressor(table));  +        Compressor.Reset(new TDataCompressor(table));      } -    void TChunkCompressor::Compress(TStringBuf data, TVector<char>* rslt) const {  -        if (HighQuality) {  -            Compressor->Compress<1>(data, *rslt);  -        } else {  -            Compressor->Compress<0>(data, *rslt);  -        }  +    void TChunkCompressor::Compress(TStringBuf data, TVector<char>* rslt) const { +        if (HighQuality) { +            Compressor->Compress<1>(data, *rslt); +        } else { +            Compressor->Compress<0>(data, *rslt); +        }      } -    TChunkCompressor::~TChunkCompressor() = default;  +    TChunkCompressor::~TChunkCompressor() = default; -    TChunkDecompressor::TChunkDecompressor(bool highQuality, const TCompressorTable& table)  -        : HighQuality(highQuality)  -    {  -        Decompressor.Reset(new TDataDecompressor(table));  +    TChunkDecompressor::TChunkDecompressor(bool highQuality, const TCompressorTable& table) +        : HighQuality(highQuality) +    { +        Decompressor.Reset(new TDataDecompressor(table));      } -    void TChunkDecompressor::Decompress(TStringBuf data, TVector<char>* rslt) const {  -        if (HighQuality) {  -            Decompressor->Decompress<1>(data, *rslt);  -        } else {  -            Decompressor->Decompress<0>(data, *rslt);  -        }  +    void TChunkDecompressor::Decompress(TStringBuf data, TVector<char>* rslt) const { +        if (HighQuality) { +            Decompressor->Decompress<1>(data, *rslt); +        } else { +            Decompressor->Decompress<0>(data, *rslt); +        }      } -    TChunkDecompressor::~TChunkDecompressor() = default;  +    TChunkDecompressor::~TChunkDecompressor() = default;  } diff --git a/library/cpp/comptable/comptable.h b/library/cpp/comptable/comptable.h index ff9f241964b..d225fed7a0b 100644 --- a/library/cpp/comptable/comptable.h +++ b/library/cpp/comptable/comptable.h @@ -1,5 +1,5 @@  #pragma once -  +  #include <util/generic/vector.h>  #include <util/memory/blob.h>  #include <util/ysaveload.h> @@ -7,62 +7,62 @@  #include <library/cpp/compproto/huff.h>  namespace NCompTable { -    struct TCompressorTable {  -        ui32 Table[65536];  -        ui32 HashMul;  -        NCompProto::TCoderEntry HuffCodes[10];  -        ui8 HuffIndices[256];  +    struct TCompressorTable { +        ui32 Table[65536]; +        ui32 HashMul; +        NCompProto::TCoderEntry HuffCodes[10]; +        ui8 HuffIndices[256]; -        void GetHuffCode(const NCompProto::TCoderEntry& entry, ui32 value, ui64& bitCode, ui8& bitLength) const;  -        void GetLastHuffCode(ui32 value, ui64& bitCode, ui8& bitLength) const;  -        void GetHuffCode(ui32 value, ui64& bitCode, ui8& bitLength) const;  -        ui8 GetHuffIndex(ui8 prefix);  -        void BuildHuffCodes(i64 totalFreq, i64 freqs[65536]);  -        bool BuildHuffCodes(i64 totalFreq, i64 freqs[65536], i64 add);  -    };  +        void GetHuffCode(const NCompProto::TCoderEntry& entry, ui32 value, ui64& bitCode, ui8& bitLength) const; +        void GetLastHuffCode(ui32 value, ui64& bitCode, ui8& bitLength) const; +        void GetHuffCode(ui32 value, ui64& bitCode, ui8& bitLength) const; +        ui8 GetHuffIndex(ui8 prefix); +        void BuildHuffCodes(i64 totalFreq, i64 freqs[65536]); +        bool BuildHuffCodes(i64 totalFreq, i64 freqs[65536], i64 add); +    }; -    struct TDataSampler {  -        enum {  -            Size = 1 << 18,  -        };  -        ui32 EntryVal[Size];  -        i64 EntryHit[Size];  -        i64 Counter;  +    struct TDataSampler { +        enum { +            Size = 1 << 18, +        }; +        ui32 EntryVal[Size]; +        i64 EntryHit[Size]; +        i64 Counter; -    public:  -        TDataSampler();  -        void BuildTable(TCompressorTable& table) const;  -        void AddStat(ui32 val);  -        void AddStat(const TStringBuf& stringBuf);  +    public: +        TDataSampler(); +        void BuildTable(TCompressorTable& table) const; +        void AddStat(ui32 val); +        void AddStat(const TStringBuf& stringBuf);      }; -    class TDataCompressor;  -    class TDataDecompressor;  +    class TDataCompressor; +    class TDataDecompressor; -    class TChunkCompressor {  -    public:  -        TChunkCompressor(bool highQuality, const TCompressorTable& table);  -        void Compress(TStringBuf data, TVector<char>* result) const;  -        ~TChunkCompressor();  +    class TChunkCompressor { +    public: +        TChunkCompressor(bool highQuality, const TCompressorTable& table); +        void Compress(TStringBuf data, TVector<char>* result) const; +        ~TChunkCompressor(); -    private:  -        bool HighQuality;  -        THolder<TDataCompressor> Compressor;  -    };  +    private: +        bool HighQuality; +        THolder<TDataCompressor> Compressor; +    }; -    class TChunkDecompressor {  -    public:  -        TChunkDecompressor(bool highQuality, const TCompressorTable& table);  -        void Decompress(TStringBuf data, TVector<char>* result) const;  -        ~TChunkDecompressor();  +    class TChunkDecompressor { +    public: +        TChunkDecompressor(bool highQuality, const TCompressorTable& table); +        void Decompress(TStringBuf data, TVector<char>* result) const; +        ~TChunkDecompressor(); + +    private: +        bool HighQuality; +        THolder<TDataDecompressor> Decompressor; +    }; -    private:  -        bool HighQuality;  -        THolder<TDataDecompressor> Decompressor;  -    };  +} -}  -   template <>  class TSerializer<NCompTable::TCompressorTable> {  public: diff --git a/library/cpp/comptable/usage/usage.cpp b/library/cpp/comptable/usage/usage.cpp index 550b6777c3d..9997c83686a 100644 --- a/library/cpp/comptable/usage/usage.cpp +++ b/library/cpp/comptable/usage/usage.cpp @@ -1,15 +1,15 @@  #include <library/cpp/comptable/comptable.h> -#include <util/random/random.h>  -#include <util/random/fast.h>  -  +#include <util/random/random.h> +#include <util/random/fast.h> +  #include <time.h>  #include <stdlib.h>  using namespace NCompTable; -template <bool HQ>  -void DoTest(const TCompressorTable& table, const TVector<TString>& lines) {  +template <bool HQ> +void DoTest(const TCompressorTable& table, const TVector<TString>& lines) {      TVector<char> compressed;      TVector<char> decompressed; @@ -19,8 +19,8 @@ void DoTest(const TCompressorTable& table, const TVector<TString>& lines) {      size_t origSize = 0;      size_t compSize = 0;      float cl1 = clock(); -    for (size_t i = 0; i < lines.size(); ++i) {  -        const TString& line = lines[i];  +    for (size_t i = 0; i < lines.size(); ++i) { +        const TString& line = lines[i];          compressor.Compress(line, &compressed);          origSize += line.size();          compSize += compressed.size(); @@ -28,20 +28,20 @@ void DoTest(const TCompressorTable& table, const TVector<TString>& lines) {          deCompressor.Decompress(in, &decompressed);          if (decompressed.size() != line.size() || memcmp(decompressed.data(), line.data(), decompressed.size())) {              Cout << i << "\n"; -            Cout << line << "\n"  -                 << TString(decompressed.data(), decompressed.size()) << "\n";  +            Cout << line << "\n" +                 << TString(decompressed.data(), decompressed.size()) << "\n";              abort();          }      }      float cl2 = clock();      float secs = (cl2 - cl1) / CLOCKS_PER_SEC;      Cout << "origSize: " << origSize << "\tcompSize: " << compSize << Endl; -    Cout << "yep! compression + decompression speed " << origSize / 1024.0f / 1024.0f / secs << " mbps\n";  +    Cout << "yep! compression + decompression speed " << origSize / 1024.0f / 1024.0f / secs << " mbps\n";      Cout << "yep! compression ratio " << double(origSize) / double(compSize + 1) << "\n";  } -int main(int argc, const char* argv[]) {  -    TReallyFastRng32 rr(17);  +int main(int argc, const char* argv[]) { +    TReallyFastRng32 rr(17);      TVector<TString> lines;      /*FILE *fp = fopen("res", "rb");      while (!feof(fp)) { @@ -52,16 +52,16 @@ int main(int argc, const char* argv[]) {      //for (size_t i = 0; i < 10000000; ++i) {      //for (size_t i = 0; i < 1000000; ++i) {      for (size_t i = 0; i < 1000000; ++i) { -        size_t size = rr.Uniform(32);  +        size_t size = rr.Uniform(32);          TString res = "www.yandex.ru/yandsearch?text=";          for (size_t j = 0; j < size; ++j) { -            res += "qwer"[rr.Uniform(4)];  +            res += "qwer"[rr.Uniform(4)];          }          lines.push_back(res);      }      THolder<TDataSampler> sampler(new TDataSampler);      for (size_t i = 0; i < lines.size(); ++i) { -        sampler->AddStat(lines[i]);  +        sampler->AddStat(lines[i]);      }      TCompressorTable table;      sampler->BuildTable(table); diff --git a/library/cpp/comptable/ut/comptable_ut.cpp b/library/cpp/comptable/ut/comptable_ut.cpp index 5bb2058fce2..5901d0246fb 100644 --- a/library/cpp/comptable/ut/comptable_ut.cpp +++ b/library/cpp/comptable/ut/comptable_ut.cpp @@ -1,13 +1,13 @@  #include <library/cpp/comptable/comptable.h>  #include <library/cpp/testing/unittest/registar.h> -#include <util/random/random.h>  -#include <util/random/fast.h>  -  +#include <util/random/random.h> +#include <util/random/fast.h> +  using namespace NCompTable; -template <bool HQ>  -void DoTest(const TCompressorTable& table, const TVector<TString>& lines) {  +template <bool HQ> +void DoTest(const TCompressorTable& table, const TVector<TString>& lines) {      TVector<char> compressed;      TVector<char> decompressed; @@ -21,8 +21,8 @@ void DoTest(const TCompressorTable& table, const TVector<TString>& lines) {      size_t origSize = 0;      size_t compSize = 0; -    for (size_t i = 0; i < lines.size(); ++i) {  -        const TString& line = lines[i];  +    for (size_t i = 0; i < lines.size(); ++i) { +        const TString& line = lines[i];          compressor.Compress(line, &compressed);          origSize += line.size();          compSize += compressed.size(); @@ -30,30 +30,30 @@ void DoTest(const TCompressorTable& table, const TVector<TString>& lines) {          deCompressor.Decompress(in, &decompressed);          UNIT_ASSERT(decompressed.size() == line.size() && memcmp(decompressed.data(), line.data(), decompressed.size()) == 0);      } -    UNIT_ASSERT_EQUAL(origSize, 45491584);  +    UNIT_ASSERT_EQUAL(origSize, 45491584);      if (HQ) { -        UNIT_ASSERT_EQUAL(compSize, 11074583);  +        UNIT_ASSERT_EQUAL(compSize, 11074583);      } else { -        UNIT_ASSERT_EQUAL(compSize, 17459336);  +        UNIT_ASSERT_EQUAL(compSize, 17459336);      }      UNIT_ASSERT(compSize < origSize);  }  Y_UNIT_TEST_SUITE(TestComptable) {      Y_UNIT_TEST(TestComptableCompressDecompress) { -        TReallyFastRng32 rr(17);  +        TReallyFastRng32 rr(17);          TVector<TString> lines;          for (size_t i = 0; i < 1000000; ++i) { -            size_t size = rr.Uniform(32);  +            size_t size = rr.Uniform(32);              TString res = "www.yandex.ru/yandsearch?text=";              for (size_t j = 0; j < size; ++j) { -                res += "qwer"[rr.Uniform(4)];  +                res += "qwer"[rr.Uniform(4)];              }              lines.push_back(res);          }          THolder<TDataSampler> sampler(new TDataSampler);          for (size_t i = 0; i < lines.size(); ++i) { -            sampler->AddStat(lines[i]);  +            sampler->AddStat(lines[i]);          }          TCompressorTable table;          sampler->BuildTable(table);  | 
