diff options
author | Vladislav Kuznetsov <va.kuznecov@physics.msu.ru> | 2022-02-10 16:46:54 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:54 +0300 |
commit | 3cbae1ba94bff7a82ee848c3e9b2cebd96a69dd5 (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f | |
parent | de20f5598f0832a6e646f61b4feca942c00da928 (diff) | |
download | ydb-3cbae1ba94bff7a82ee848c3e9b2cebd96a69dd5.tar.gz |
Restoring authorship annotation for Vladislav Kuznetsov <va.kuznecov@physics.msu.ru>. Commit 2 of 2.
453 files changed, 35795 insertions, 35795 deletions
diff --git a/contrib/libs/t1ha/LICENSE b/contrib/libs/t1ha/LICENSE index b01342cfe4..c198acc89c 100644 --- a/contrib/libs/t1ha/LICENSE +++ b/contrib/libs/t1ha/LICENSE @@ -1,23 +1,23 @@ - zlib License, see https://en.wikipedia.org/wiki/Zlib_License - + zlib License, see https://en.wikipedia.org/wiki/Zlib_License + Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - Fast Positive Hash. - - Portions Copyright (c) 2010-2013 Leonid Yuriev <leo@yuriev.ru>, - The 1Hippeus project (t1h). - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgement in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. + Fast Positive Hash. + + Portions Copyright (c) 2010-2013 Leonid Yuriev <leo@yuriev.ru>, + The 1Hippeus project (t1h). + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgement in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. diff --git a/contrib/libs/t1ha/README.md b/contrib/libs/t1ha/README.md index f49eca2a54..13c3d82f6a 100644 --- a/contrib/libs/t1ha/README.md +++ b/contrib/libs/t1ha/README.md @@ -1,23 +1,23 @@ <!-- Required extensions: pymdownx.betterem, pymdownx.tilde, pymdownx.emoji, pymdownx.tasklist, pymdownx.superfences --> -t1ha +t1ha ===== -Fast Positive Hash, aka "Позитивный Хэш" -by [Positive Technologies](https://www.ptsecurity.com). -Included in the [Awesome C](https://github.com/kozross/awesome-c) list of open source C software. - +Fast Positive Hash, aka "Позитивный Хэш" +by [Positive Technologies](https://www.ptsecurity.com). +Included in the [Awesome C](https://github.com/kozross/awesome-c) list of open source C software. + *The Future will (be) [Positive](https://www.ptsecurity.com). Всё будет хорошо.* -[![License: Zlib](https://img.shields.io/badge/License-Zlib-lightgrey.svg)](https://opensource.org/licenses/Zlib) +[![License: Zlib](https://img.shields.io/badge/License-Zlib-lightgrey.svg)](https://opensource.org/licenses/Zlib) [![Build Status](https://travis-ci.org/erthink/t1ha.svg?branch=master)](https://travis-ci.org/erthink/t1ha) -[![Build status](https://ci.appveyor.com/api/projects/status/ptug5fl2ouxdo68h/branch/master?svg=true)](https://ci.appveyor.com/project/leo-yuriev/t1ha/branch/master) +[![Build status](https://ci.appveyor.com/api/projects/status/ptug5fl2ouxdo68h/branch/master?svg=true)](https://ci.appveyor.com/project/leo-yuriev/t1ha/branch/master) [![CircleCI](https://circleci.com/gh/erthink/t1ha/tree/master.svg?style=svg)](https://circleci.com/gh/erthink/t1ha/tree/master) -[![Coverity Scan Status](https://scan.coverity.com/projects/12918/badge.svg)](https://scan.coverity.com/projects/leo-yuriev-t1ha) - +[![Coverity Scan Status](https://scan.coverity.com/projects/12918/badge.svg)](https://scan.coverity.com/projects/leo-yuriev-t1ha) + ## Briefly, it is a portable non-cryptographic 64-bit hash function: 1. Intended for 64-bit little-endian platforms, predominantly for Elbrus and x86_64, but portable and without penalties it can run on any 64-bit CPU. - + 2. In most cases up to 15% faster than [xxHash](https://cyan4973.github.io/xxHash/), [StadtX](https://github.com/demerphq/BeagleHash/blob/master/stadtx_hash.h), @@ -38,10 +38,10 @@ hash-functions (which do not use specific hardware tricks). 3. Licensed under [zlib License](https://en.wikipedia.org/wiki/Zlib_License). -Also pay attention to [Rust](https://github.com/flier/rust-t1ha), +Also pay attention to [Rust](https://github.com/flier/rust-t1ha), [Erlang](https://github.com/lemenkov/erlang-t1ha) and [Golang](https://github.com/dgryski/go-t1ha) implementations. - + ### FAQ: Why _t1ha_ don't follow [NH](https://en.wikipedia.org/wiki/UMAC)-approach like [FARSH](https://github.com/Bulat-Ziganshin/FARSH), [XXH3](https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html), HighwayHash and so on? Okay, just for clarity, we should distinguish functions families: @@ -77,164 +77,164 @@ The right NMH/NH code without entropy loss should be looking like this: } ``` -******************************************************************************** - -# Usage -The `t1ha` library provides several terraced hash functions -with the dissimilar properties and for a different cases. -These functions briefly described below, see [t1ha.h](t1ha.h) for more API details. - -To use in your own project you may link with the t1ha-library, -or just add to your project corresponding source files from `/src` directory. - -Please, feel free to fill an issue or make pull request. - - -`t1ha0` = 64 bits, "Just Only Faster" -------------------------------------- - - Provides fast-as-possible hashing for current CPU, including 32-bit - systems and engaging the available hardware acceleration. - You can rest assured that t1ha0 faster than all other fast hashes - (with comparable quality) so, otherwise we will extend and refine it time-to-time. - - On the other hand, without warranty that the hash result will be same - for particular key on another machine or another version. - Moreover, is deliberately known that the result will be different - for systems with different bitness or endianness. - Briefly, such hash-results and their derivatives, should be - used only in runtime, but should not be persist or transferred - over a network. - - Also should be noted, the quality of t1ha0() hashing is a subject - for tradeoffs with performance. Therefore the quality and strength - of `t1ha0()` may be lower than `t1ha1()` and `t1ha2()`, - especially on 32-bit targets, but then much faster. - However, guaranteed that it passes all SMHasher tests. - - Internally `t1ha0()` selects most faster implementation for current CPU, - for now these are includes: - - | Implementation | Platform/CPU | - | :---------------------- | :------------------------------------- | - | `t1ha0_ia32aes_avx()` | x86 with AES-NI and AVX extensions | - | `t1ha0_ia32aes_avx2()` | x86 with AES-NI and AVX2 extensions | - | `t1ha0_ia32aes_noavx()` | x86 with AES-NI without AVX extensions | - | `t1ha0_32le()` | 32-bit little-endian | - | `t1h0a_32be()` | 32-bit big-endian | - | `t1ha1_le()` | 64-bit little-endian | - | `t1ha1_be()` | 64-bit big-endian | - | `t1ha2_atonce()` | 64-bit little-endian | - - -`t1ha1` = 64 bits, baseline fast portable hash -------------------------------------- - - The first version of "Fast Positive Hash" with reasonable quality - for checksum, hash tables and thin fingerprinting. It is stable, e.g. - returns same result on all architectures and CPUs. - - 1. Speed with the reasonable quality of hashing. - 2. Efficiency on modern 64-bit CPUs, but not in a hardware. - 3. Strong as possible, until no penalties on performance. - - Unfortunatelly, [Yves Orton](https://github.com/demerphq/smhasher) discovered - that `t1ha1()` family fails the strict avalanche criteria in some cases. - This flaw is insignificant for the `t1ha1()` purposes and imperceptible - from a practical point of view. - However, nowadays this issue has resolved in the next `t1ha2()` function, - that was initially planned to providing a bit more quality. - - The basic version of `t1ha1()` intends for little-endian systems and will run - slowly on big-endian. Therefore a dedicated big-endian version is also - provided, but returns the different result than the basic version. - - -`t1ha2` = 64 and 128 bits, slightly more attention for quality and strength ------------------------------------------------------------------ - The recommended version of "Fast Positive Hash" with good quality - for checksum, hash tables and fingerprinting. It is stable, e.g. - returns same result on all architectures and CPUs. - - 1. Portable and extremely efficiency on modern 64-bit CPUs. - 2. Great quality of hashing and still faster than other non-t1ha hashes. - 3. Provides streaming mode and 128-bit result. - - The `t1ha2()` is intended for little-endian systems and will run - slightly slowly on big-endian systems. - - -`t1ha3` = 128 and 256 bits, fast non-cryptographic fingerprinting ---------------------------------------------------------- - The next-step version of "Fast Positive Hash", - but not yet finished and therefore not available. - - -#### Planned: `t1ha4` = 128 and 256 bits, fast insecure fingerprinting - -#### Planned: `t1ha5` = 256 bits, fast Cryptographic, but with some limitations - -#### Planned: `t1ha6` = 256 and 512 bits, Cryptographic with reasonable resistance to acceleration on GPU and FPGA. - -#### Planned: `t1ha7` = 256, 512 and 1024 bits, Cryptographic, Strong Post-Quantum - -******************************************************************************** - -### Requirements and Portability: - 1. _t1ha_ designed for **modern 64-bit architectures**. - But on the other hand, _t1ha_ doesn't require - instructions specific to a particular architecture: - - therefore t1ha could be used on any CPU for - which compiler provides support 64-bit arithmetic. - - but unfortunately _t1ha_ could be dramatically slowly - on architectures without native 64-bit operations. - 2. This implementation of _t1ha_ requires **modern GNU C compatible compiler**, - including Clang/LLVM, or **Visual Studio 2013/2015/2017**. - For proper performance please use one of: GNU C 5.5 or later, CLANG 5.0 or later, Microsoft Visual Studio 2017 15.6 or later. - -#### Acknowledgement: -The _t1ha_ was originally developed by Leonid Yuriev (Леонид Юрьев) -for _The 1Hippeus project - zerocopy messaging in the spirit of Sparta!_ - - -******************************************************************************** - -## Benchmarking and Testing - -Current version of t1ha library includes tool for basic testing and benchmarking. -Just try `make check` from t1ha directory. - +******************************************************************************** + +# Usage +The `t1ha` library provides several terraced hash functions +with the dissimilar properties and for a different cases. +These functions briefly described below, see [t1ha.h](t1ha.h) for more API details. + +To use in your own project you may link with the t1ha-library, +or just add to your project corresponding source files from `/src` directory. + +Please, feel free to fill an issue or make pull request. + + +`t1ha0` = 64 bits, "Just Only Faster" +------------------------------------- + + Provides fast-as-possible hashing for current CPU, including 32-bit + systems and engaging the available hardware acceleration. + You can rest assured that t1ha0 faster than all other fast hashes + (with comparable quality) so, otherwise we will extend and refine it time-to-time. + + On the other hand, without warranty that the hash result will be same + for particular key on another machine or another version. + Moreover, is deliberately known that the result will be different + for systems with different bitness or endianness. + Briefly, such hash-results and their derivatives, should be + used only in runtime, but should not be persist or transferred + over a network. + + Also should be noted, the quality of t1ha0() hashing is a subject + for tradeoffs with performance. Therefore the quality and strength + of `t1ha0()` may be lower than `t1ha1()` and `t1ha2()`, + especially on 32-bit targets, but then much faster. + However, guaranteed that it passes all SMHasher tests. + + Internally `t1ha0()` selects most faster implementation for current CPU, + for now these are includes: + + | Implementation | Platform/CPU | + | :---------------------- | :------------------------------------- | + | `t1ha0_ia32aes_avx()` | x86 with AES-NI and AVX extensions | + | `t1ha0_ia32aes_avx2()` | x86 with AES-NI and AVX2 extensions | + | `t1ha0_ia32aes_noavx()` | x86 with AES-NI without AVX extensions | + | `t1ha0_32le()` | 32-bit little-endian | + | `t1h0a_32be()` | 32-bit big-endian | + | `t1ha1_le()` | 64-bit little-endian | + | `t1ha1_be()` | 64-bit big-endian | + | `t1ha2_atonce()` | 64-bit little-endian | + + +`t1ha1` = 64 bits, baseline fast portable hash +------------------------------------- + + The first version of "Fast Positive Hash" with reasonable quality + for checksum, hash tables and thin fingerprinting. It is stable, e.g. + returns same result on all architectures and CPUs. + + 1. Speed with the reasonable quality of hashing. + 2. Efficiency on modern 64-bit CPUs, but not in a hardware. + 3. Strong as possible, until no penalties on performance. + + Unfortunatelly, [Yves Orton](https://github.com/demerphq/smhasher) discovered + that `t1ha1()` family fails the strict avalanche criteria in some cases. + This flaw is insignificant for the `t1ha1()` purposes and imperceptible + from a practical point of view. + However, nowadays this issue has resolved in the next `t1ha2()` function, + that was initially planned to providing a bit more quality. + + The basic version of `t1ha1()` intends for little-endian systems and will run + slowly on big-endian. Therefore a dedicated big-endian version is also + provided, but returns the different result than the basic version. + + +`t1ha2` = 64 and 128 bits, slightly more attention for quality and strength +----------------------------------------------------------------- + The recommended version of "Fast Positive Hash" with good quality + for checksum, hash tables and fingerprinting. It is stable, e.g. + returns same result on all architectures and CPUs. + + 1. Portable and extremely efficiency on modern 64-bit CPUs. + 2. Great quality of hashing and still faster than other non-t1ha hashes. + 3. Provides streaming mode and 128-bit result. + + The `t1ha2()` is intended for little-endian systems and will run + slightly slowly on big-endian systems. + + +`t1ha3` = 128 and 256 bits, fast non-cryptographic fingerprinting +--------------------------------------------------------- + The next-step version of "Fast Positive Hash", + but not yet finished and therefore not available. + + +#### Planned: `t1ha4` = 128 and 256 bits, fast insecure fingerprinting + +#### Planned: `t1ha5` = 256 bits, fast Cryptographic, but with some limitations + +#### Planned: `t1ha6` = 256 and 512 bits, Cryptographic with reasonable resistance to acceleration on GPU and FPGA. + +#### Planned: `t1ha7` = 256, 512 and 1024 bits, Cryptographic, Strong Post-Quantum + +******************************************************************************** + +### Requirements and Portability: + 1. _t1ha_ designed for **modern 64-bit architectures**. + But on the other hand, _t1ha_ doesn't require + instructions specific to a particular architecture: + - therefore t1ha could be used on any CPU for + which compiler provides support 64-bit arithmetic. + - but unfortunately _t1ha_ could be dramatically slowly + on architectures without native 64-bit operations. + 2. This implementation of _t1ha_ requires **modern GNU C compatible compiler**, + including Clang/LLVM, or **Visual Studio 2013/2015/2017**. + For proper performance please use one of: GNU C 5.5 or later, CLANG 5.0 or later, Microsoft Visual Studio 2017 15.6 or later. + +#### Acknowledgement: +The _t1ha_ was originally developed by Leonid Yuriev (Леонид Юрьев) +for _The 1Hippeus project - zerocopy messaging in the spirit of Sparta!_ + + +******************************************************************************** + +## Benchmarking and Testing + +Current version of t1ha library includes tool for basic testing and benchmarking. +Just try `make check` from t1ha directory. + To comparison benchmark also includes `wyhash`, `xxHash`, `StadtX` and `HighwayHash` functions. For example actual results for `Intel(R) Core(TM) i7-4600U CPU`: -``` +``` $ make all && sudo make check Build by GNU C/C++ compiler 9.3 (self-check passed) -Testing t1ha2_atonce... Ok -Testing t1ha2_atonce128... Ok -Testing t1ha2_stream... Ok -Testing t1ha2_stream128... Ok -Testing t1ha1_64le... Ok -Testing t1ha1_64be... Ok -Testing t1ha0_32le... Ok -Testing t1ha0_32be... Ok -Testing t1ha0_ia32aes_noavx... Ok -Testing t1ha0_ia32aes_avx... Ok -Testing t1ha0_ia32aes_avx2... Ok -Testing HighwayHash64_pure_c... Ok -Testing HighwayHash64_portable_cxx... Ok -Testing HighwayHash64_sse41... Ok -Testing HighwayHash64_avx2... Ok -Testing StadtX... Ok +Testing t1ha2_atonce... Ok +Testing t1ha2_atonce128... Ok +Testing t1ha2_stream... Ok +Testing t1ha2_stream128... Ok +Testing t1ha1_64le... Ok +Testing t1ha1_64be... Ok +Testing t1ha0_32le... Ok +Testing t1ha0_32be... Ok +Testing t1ha0_ia32aes_noavx... Ok +Testing t1ha0_ia32aes_avx... Ok +Testing t1ha0_ia32aes_avx2... Ok +Testing HighwayHash64_pure_c... Ok +Testing HighwayHash64_portable_cxx... Ok +Testing HighwayHash64_sse41... Ok +Testing HighwayHash64_avx2... Ok +Testing StadtX... Ok Testing wyhash_v7... Ok - -Preparing to benchmarking... + +Preparing to benchmarking... - running on CPU#0 - use RDPMC_40000001 as clock source for benchmarking - - assume it cheap and stable + - assume it cheap and stable - measure granularity and overhead: 54 cycles, 0.0185185 iteration/cycle - -Bench for tiny keys (7 bytes): + +Bench for tiny keys (7 bytes): t1ha2_atonce : 17.250 cycle/hash, 2.464 cycle/byte, 0.406 byte/cycle, 1.217 GiB/s @3GHz t1ha2_atonce128* : 33.281 cycle/hash, 4.754 cycle/byte, 0.210 byte/cycle, 0.631 GiB/s @3GHz t1ha2_stream* : 77.500 cycle/hash, 11.071 cycle/byte, 0.090 byte/cycle, 0.271 GiB/s @3GHz @@ -251,8 +251,8 @@ HighwayHash64_portable: 513.000 cycle/hash, 73.286 cycle/byte, 0.014 byte/cy HighwayHash64_sse41 : 69.438 cycle/hash, 9.920 cycle/byte, 0.101 byte/cycle, 0.302 GiB/s @3GHz HighwayHash64_avx2 : 54.875 cycle/hash, 7.839 cycle/byte, 0.128 byte/cycle, 0.383 GiB/s @3GHz wyhash_v7 : 14.102 cycle/hash, 2.015 cycle/byte, 0.496 byte/cycle, 1.489 GiB/s @3GHz - -Bench for large keys (16384 bytes): + +Bench for large keys (16384 bytes): t1ha2_atonce : 3493.000 cycle/hash, 0.213 cycle/byte, 4.691 byte/cycle, 14.072 GiB/s @3GHz t1ha2_atonce128* : 3664.000 cycle/hash, 0.224 cycle/byte, 4.472 byte/cycle, 13.415 GiB/s @3GHz t1ha2_stream* : 3684.000 cycle/hash, 0.225 cycle/byte, 4.447 byte/cycle, 13.342 GiB/s @3GHz @@ -269,208 +269,208 @@ HighwayHash64_portable: 44982.321 cycle/hash, 2.746 cycle/byte, 0.364 byte/cy HighwayHash64_sse41 : 7041.000 cycle/hash, 0.430 cycle/byte, 2.327 byte/cycle, 6.981 GiB/s @3GHz HighwayHash64_avx2 : 4542.000 cycle/hash, 0.277 cycle/byte, 3.607 byte/cycle, 10.822 GiB/s @3GHz wyhash_v7 : 3383.000 cycle/hash, 0.206 cycle/byte, 4.843 byte/cycle, 14.529 GiB/s @3GHz -``` - -The `test` tool support a set of command line options to selecting functions and size of keys for benchmarking. -For more info please run `./test --help`. - -### The `--hash-stdin-strings` option -One noteable option is `--hash-stdin-strings`, it intended to estimate hash collisions on your custom data. -With this option `test` tool will hash each line from standard input and print its hash to standard output. - -For instance, you could count collisions for lines from some `words.list` file by bash's command: -``` - ./t1ha/test --hash-stdin-strings < words.list | sort | uniq -c -d | wc -l -``` - -More complex example - count `xxhash()` collisions for lines from `words.list` and 0...10000 numbers, -with distinction only in 32 bit of hash values: -``` - (cat words.list && seq 0 10000) | \ - ./t1ha/test --xxhash --hash-stdin-strings | \ - cut --bytes=-8 | sort | uniq -c -d | wc -l -``` - - -### SMHasher -[_SMHasher_](https://github.com/aappleby/smhasher/wiki) is a wellknown -test suite designed to test the distribution, collision, -and performance properties of non-cryptographic hash functions. - -_Reini Urban_ provides [extended version/fork of SMHasher](https://github.com/rurban/smhasher) -which integrates a lot of modern hash functions, including _t1ha_. - -So, **the quality and speed of _t1ha_ can be easily checked with the following scenario:** - -``` -git clone https://github.com/rurban/smhasher -cd smhasher -cmake . -make -./SMHasher City64 -./SMHasher metrohash64_1 -./SMHasher xxHash64 -... -./SMHasher t1ha -``` - -For properly performance please use at least GCC 5.5, Clang 6.0 or Visual Studio 2017. - -### Scores - -Please take in account that the results is significantly depend on actual CPU, compiler version and CFLAGS. -The results below were obtained in **2016** with: - - CPU: `Intel(R) Core(TM) i7-6700K CPU`; - - Compiler: `gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.4)`; - - CFLAGS: `-march=native -O3 -fPIC`; - - -#### The _SMALL KEYS_ case -Order by average Cycles per Hash for 1..31 bytes (less is better). - -| Function | MiB/Second | Cycles/Hash | Notes (quality, portability) | -| :-------------------- | ------------: | -------: | :--------------------------- | -_donothing_ | 15747227.36 | 6.00 | not a hash (just for reference) -_sumhash32_ | 43317.86 | 16.69 | not a hash (just for reference) -FNV1a_YoshimitsuTRIAD | 13000.49 | 24.96 | poor (100% bias, collisions, distrib) -crc64_hw | 7308.06 | 28.37 | poor (insecure, 100% bias, collisions, distrib), non-portable (SSE4.2) -crc32_hw | 5577.64 | 29.10 | poor (insecure, 100% bias, collisions, distrib), non-portable (SSE4.2) -NOP_OAAT_read64 | 1991.31 | 30.46 | poor (100% bias, 2.17x collisions) -Crap8 | 2743.80 | 32.50 | poor (2.42% bias, collisions, 2% distrib) -**t1ha_aes** | **34636.42** | **33.03** | non-portable (AES-NI) -**t1ha** | **12228.80** | **35.55** | -MUM | 10246.20 | 37.25 | non-portable (different result, machine specific) -Murmur2 | 2789.89 | 38.37 | poor (1.7% bias, 81x coll, 1.7% distrib) -t1ha_32le | 5958.54 | 38.54 | alien (designed for 32-bit CPU) -t1ha_64be | 9321.23 | 38.29 | alien (designed for big-endian CPU) -lookup3 | 1817.11 | 39.30 | poor (28% bias, collisions, 30% distrib) -t1ha_32be | 5873.45 | 39.81 | alien (designed for 32-bit big-endian CPU) -Murmur2C | 3655.60 | 42.68 | poor (91% bias, collisions, distrib) -fasthash64 | 5578.06 | 43.42 | -Murmur2A | 2789.85 | 43.38 | poor (12.7% bias) -xxHash32 | 5513.55 | 43.72 | -Murmur2B | 5578.21 | 44.13 | weak (1.8% bias, collisions, distrib) -fasthash32 | 5381.46 | 45.50 | -cmetrohash64_1_optshort | 11808.92 | 46.33 | _seems weak_ (likely cyclic collisions) -metrohash64_2 | 12113.12 | 46.88 | _seems weak_ (likely cyclic collisions) -cmetrohash64_1 | 12081.32 | 47.28 | _seems weak_ (likely cyclic collisions) -metrohash64_1 | 12024.68 | 47.21 | _seems weak_ (likely cyclic collisions) -Murmur3F | 5473.62 | 47.37 | -superfast | 1860.25 | 47.45 | poor (91% bias, 5273.01x collisions, 37% distrib) -cmetrohash64_2 | 12052.58 | 48.66 | -Murmur3A | 2232.00 | 48.16 | -City32 | 5014.33 | 51.13 | far to perfect (2 minor collisions) -City64 | 11041.72 | 51.77 | -metrohash64crc_2 | 20582.76 | 51.39 | _seems weak_ (likely cyclic collisions), non-portable (SSE4.2) -_sumhash_ | 9668.13 | 51.31 | not a hash (just for reference) -metrohash64crc_1 | 21319.23 | 52.36 | weak (cyclic collisions), non-portable (SSE4.2) -PMurHash32 | 2232.26 | 53.18 | -Murmur3C | 3719.22 | 54.05 | -bernstein | 921.43 | 55.17 | poor (100% bias, collisions, distrib) -xxHash64 | 11123.15 | 56.17 | -Spooky32 | 11464.20 | 59.45 | -City128 | 12551.54 | 60.93 | -FarmHash64 | 12145.36 | 60.12 | non-portable (SSE4.2) -Spooky128 | 11735.99 | 60.45 | weak (collisions with 4bit diff) -Spooky64 | 11820.20 | 60.39 | -CityCrc128 | 14821.82 | 62.38 | non-portable (SSE4.2) -MicroOAAT | 826.32 | 62.06 | poor (100% bias, distrib) -metrohash128_1 | 11063.78 | 66.58 | _seems weak_ (likely cyclic collisions) -metrohash128_2 | 11465.18 | 66.72 | weak (cyclic collisions) -GoodOAAT | 930.18 | 68.24 | -metrohash128crc_1 | 21322.80 | 70.33 | _seems weak_ (likely cyclic collisions), non-portable (SSE4.2) -metrohash128crc_2 | 20990.70 | 70.40 | _seems weak_ (likely cyclic collisions), non-portable (SSE4.2) -farmhash64_c | 12033.13 | 71.30 | non-portable (SSE4.2) -sdbm | 695.29 | 71.76 | poor (100% bias, collisions, distrib) -FNV1a | 684.17 | 72.75 | poor (zeros, 100% bias, collisions, distrib) -FNV64 | 697.67 | 72.70 | poor (100% bias, collisions, distrib) -FarmHash128 | 12515.98 | 77.43 | non-portable (SSE4.2) -hasshe2 | 2587.39 | 81.23 | poor (insecure, 100% bias, collisions, distrib), non-portable (SSE2) -_BadHash_ | 558.14 | 87.87 | not a hash (just for reference) -x17 | 551.99 | 89.24 | poor (99.98% bias, collisions, distrib) -JenkinsOOAT_perl | 558.14 | 95.26 | poor (1.5-11.5% bias, 7.2x collisions) -farmhash128_c | 12709.06 | 96.42 | non-portable (SSE4.1) -MurmurOAAT | 465.12 | 107.61 | poor (collisions, 99.99% distrib) -JenkinsOOAT | 558.13 | 116.75 | poor (53.5% bias, collisions, distrib) -falkhash | 8909.54 | 124.48 | non-portable (AES-NI) -crc32 | 342.27 | 142.06 | poor (insecure, 8589.93x collisions, distrib) -SipHash | 962.35 | 147.36 | -md5_32a | 433.03 | 508.98 | -sha1_32a | 531.44 | 1222.44 | - - -#### The _LARGE KEYS_ case -Order by hashing speed in Mi-bytes (2^20 = 1048576) per second for 262144-byte block (more is better). - -| Function | MiB/Second | Cycles/Hash | Notes (quality, portability) | -| :-------------------- | ------------: | -------: | :--------------------------- | -_donothing_ | 15747227.36 | 6.00 | not a hash (just for reference) -_sumhash32_ | 43317.86 | 16.69 | not a hash (just for reference) -**t1ha_aes** | **34636.42** | **33.03** | non-portable (AES-NI) -metrohash128crc_1 | 21322.80 | 70.33 | _seems weak_ (likely cyclic collisions), non-portable (SSE4.2) -metrohash64crc_1 | 21319.23 | 52.36 | _seems weak_ (cyclic collisions), non-portable (SSE4.2) -metrohash128crc_2 | 20990.70 | 70.40 | _seems weak_ (likely cyclic collisions), non-portable (SSE4.2) -metrohash64crc_2 | 20582.76 | 51.39 | _seems weak_ (likely cyclic collisions), non-portable (SSE4.2) -CityCrc128 | 14821.82 | 62.38 | non-portable (SSE4.2) -FNV1a_YoshimitsuTRIAD | 13000.49 | 24.96 | poor (100% bias, collisions, distrib) -farmhash128_c | 12709.06 | 96.42 | non-portable (SSE4.1) -City128 | 12551.54 | 60.93 | -FarmHash128 | 12515.98 | 77.43 | non-portable (SSE4.2) -**t1ha** | **12228.80** | **35.55** | -FarmHash64 | 12145.36 | 60.12 | non-portable (SSE4.2) -metrohash64_2 | 12113.12 | 46.88 | _seems weak_ (likely cyclic collisions) -cmetrohash64_1 | 12081.32 | 47.28 | _seems weak_ (likely cyclic collisions) -cmetrohash64_2 | 12052.58 | 48.66 | _seems weak_ (likely cyclic collisions) -farmhash64_c | 12033.13 | 71.30 | non-portable (SSE4.2) -metrohash64_1 | 12024.68 | 47.21 | _seems weak_ (likely cyclic collisions) -Spooky64 | 11820.20 | 60.39 | -cmetrohash64_1_optshort | 11808.92 | 46.33 | _seems weak_ (likely cyclic collisions) -Spooky128 | 11735.99 | 60.45 | weak (collisions with 4-bit diff) -metrohash128_2 | 11465.18 | 66.72 | weak (cyclic collisions) -Spooky32 | 11464.20 | 59.45 | -xxHash64 | 11123.15 | 56.17 | -metrohash128_1 | 11063.78 | 66.58 | _seems weak_ (likely cyclic collisions) -City64 | 11041.72 | 51.77 | -MUM | 10246.20 | 37.25 | non-portable (different result, machine specific) -_sumhash_ | 9668.13 | 51.31 | not a hash (just for reference) -t1ha_64be | 9321.23 | 38.29 | alien (designed for big-endian CPU) -falkhash | 8909.54 | 124.48 | non-portable (AES-NI) -crc64_hw | 7308.06 | 28.37 | poor (insecure, 100% bias, collisions, distrib), non-portable (SSE4.2) -t1ha_32le | 5958.54 | 38.54 | alien (designed for 32-bit CPU) -t1ha_32be | 5873.45 | 39.81 | alien (designed for 32-bit big-endian CPU) -fasthash64 | 5578.06 | 43.42 | -Murmur2B | 5578.21 | 44.13 | weak (1.8% bias, collisions, distrib) -crc32_hw | 5577.64 | 29.10 | poor (insecure, 100% bias, collisions, distrib), non-portable (SSE4.2) -xxHash32 | 5513.55 | 43.72 | -Murmur3F | 5473.62 | 47.37 | -fasthash32 | 5381.46 | 45.50 | -City32 | 5014.33 | 51.13 | far to perfect (2 minor collisions) -Murmur3C | 3719.22 | 54.05 | -Murmur2C | 3655.60 | 42.68 | poor (91% bias, collisions, distrib) -Murmur2 | 2789.89 | 38.37 | poor (1.7% bias, 81x coll, 1.7% distrib) -Murmur2A | 2789.85 | 43.38 | poor (12.7% bias) -Crap8 | 2743.80 | 32.50 | poor (2.42% bias, collisions, 2% distrib) -hasshe2 | 2587.39 | 81.23 | poor (insecure, 100% bias, collisions, distrib), non-portable (SSE2) -Murmur3A | 2232.00 | 48.16 | -PMurHash32 | 2232.26 | 53.18 | -NOP_OAAT_read64 | 1991.31 | 30.46 | poor (100% bias, 2.17x collisions) -superfast | 1860.25 | 47.45 | poor (91% bias, 5273.01x collisions, 37% distrib) -lookup3 | 1817.11 | 39.30 | poor (28% bias, collisions, 30% distrib) -SipHash | 962.35 | 147.36 | -GoodOAAT | 930.18 | 68.24 | -bernstein | 921.43 | 55.17 | poor (100% bias, collisions, distrib) -MicroOAAT | 826.32 | 62.06 | poor (100% bias, distrib) -FNV64 | 697.67 | 72.70 | poor (100% bias, collisions, distrib) -sdbm | 695.29 | 71.76 | poor (100% bias, collisions, distrib) -FNV1a | 684.17 | 72.75 | poor (zeros, 100% bias, collisions, distrib) -_BadHash_ | 558.14 | 87.87 | not a hash (just for reference) -JenkinsOOAT | 558.13 | 116.75 | poor (53.5% bias, collisions, distrib) -JenkinsOOAT_perl | 558.14 | 95.26 | poor (1.5-11.5% bias, 7.2x collisions) -x17 | 551.99 | 89.24 | poor (99.98% bias, collisions, distrib) -sha1_32a | 531.44 | 1222.44 | -MurmurOAAT | 465.12 | 107.61 | poor (collisions, 99.99% distrib) -md5_32a | 433.03 | 508.98 | -crc32 | 342.27 | 142.06 | poor (insecure, 8589.93x collisions, distrib) +``` + +The `test` tool support a set of command line options to selecting functions and size of keys for benchmarking. +For more info please run `./test --help`. + +### The `--hash-stdin-strings` option +One noteable option is `--hash-stdin-strings`, it intended to estimate hash collisions on your custom data. +With this option `test` tool will hash each line from standard input and print its hash to standard output. + +For instance, you could count collisions for lines from some `words.list` file by bash's command: +``` + ./t1ha/test --hash-stdin-strings < words.list | sort | uniq -c -d | wc -l +``` + +More complex example - count `xxhash()` collisions for lines from `words.list` and 0...10000 numbers, +with distinction only in 32 bit of hash values: +``` + (cat words.list && seq 0 10000) | \ + ./t1ha/test --xxhash --hash-stdin-strings | \ + cut --bytes=-8 | sort | uniq -c -d | wc -l +``` + + +### SMHasher +[_SMHasher_](https://github.com/aappleby/smhasher/wiki) is a wellknown +test suite designed to test the distribution, collision, +and performance properties of non-cryptographic hash functions. + +_Reini Urban_ provides [extended version/fork of SMHasher](https://github.com/rurban/smhasher) +which integrates a lot of modern hash functions, including _t1ha_. + +So, **the quality and speed of _t1ha_ can be easily checked with the following scenario:** + +``` +git clone https://github.com/rurban/smhasher +cd smhasher +cmake . +make +./SMHasher City64 +./SMHasher metrohash64_1 +./SMHasher xxHash64 +... +./SMHasher t1ha +``` + +For properly performance please use at least GCC 5.5, Clang 6.0 or Visual Studio 2017. + +### Scores + +Please take in account that the results is significantly depend on actual CPU, compiler version and CFLAGS. +The results below were obtained in **2016** with: + - CPU: `Intel(R) Core(TM) i7-6700K CPU`; + - Compiler: `gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.4)`; + - CFLAGS: `-march=native -O3 -fPIC`; + + +#### The _SMALL KEYS_ case +Order by average Cycles per Hash for 1..31 bytes (less is better). + +| Function | MiB/Second | Cycles/Hash | Notes (quality, portability) | +| :-------------------- | ------------: | -------: | :--------------------------- | +_donothing_ | 15747227.36 | 6.00 | not a hash (just for reference) +_sumhash32_ | 43317.86 | 16.69 | not a hash (just for reference) +FNV1a_YoshimitsuTRIAD | 13000.49 | 24.96 | poor (100% bias, collisions, distrib) +crc64_hw | 7308.06 | 28.37 | poor (insecure, 100% bias, collisions, distrib), non-portable (SSE4.2) +crc32_hw | 5577.64 | 29.10 | poor (insecure, 100% bias, collisions, distrib), non-portable (SSE4.2) +NOP_OAAT_read64 | 1991.31 | 30.46 | poor (100% bias, 2.17x collisions) +Crap8 | 2743.80 | 32.50 | poor (2.42% bias, collisions, 2% distrib) +**t1ha_aes** | **34636.42** | **33.03** | non-portable (AES-NI) +**t1ha** | **12228.80** | **35.55** | +MUM | 10246.20 | 37.25 | non-portable (different result, machine specific) +Murmur2 | 2789.89 | 38.37 | poor (1.7% bias, 81x coll, 1.7% distrib) +t1ha_32le | 5958.54 | 38.54 | alien (designed for 32-bit CPU) +t1ha_64be | 9321.23 | 38.29 | alien (designed for big-endian CPU) +lookup3 | 1817.11 | 39.30 | poor (28% bias, collisions, 30% distrib) +t1ha_32be | 5873.45 | 39.81 | alien (designed for 32-bit big-endian CPU) +Murmur2C | 3655.60 | 42.68 | poor (91% bias, collisions, distrib) +fasthash64 | 5578.06 | 43.42 | +Murmur2A | 2789.85 | 43.38 | poor (12.7% bias) +xxHash32 | 5513.55 | 43.72 | +Murmur2B | 5578.21 | 44.13 | weak (1.8% bias, collisions, distrib) +fasthash32 | 5381.46 | 45.50 | +cmetrohash64_1_optshort | 11808.92 | 46.33 | _seems weak_ (likely cyclic collisions) +metrohash64_2 | 12113.12 | 46.88 | _seems weak_ (likely cyclic collisions) +cmetrohash64_1 | 12081.32 | 47.28 | _seems weak_ (likely cyclic collisions) +metrohash64_1 | 12024.68 | 47.21 | _seems weak_ (likely cyclic collisions) +Murmur3F | 5473.62 | 47.37 | +superfast | 1860.25 | 47.45 | poor (91% bias, 5273.01x collisions, 37% distrib) +cmetrohash64_2 | 12052.58 | 48.66 | +Murmur3A | 2232.00 | 48.16 | +City32 | 5014.33 | 51.13 | far to perfect (2 minor collisions) +City64 | 11041.72 | 51.77 | +metrohash64crc_2 | 20582.76 | 51.39 | _seems weak_ (likely cyclic collisions), non-portable (SSE4.2) +_sumhash_ | 9668.13 | 51.31 | not a hash (just for reference) +metrohash64crc_1 | 21319.23 | 52.36 | weak (cyclic collisions), non-portable (SSE4.2) +PMurHash32 | 2232.26 | 53.18 | +Murmur3C | 3719.22 | 54.05 | +bernstein | 921.43 | 55.17 | poor (100% bias, collisions, distrib) +xxHash64 | 11123.15 | 56.17 | +Spooky32 | 11464.20 | 59.45 | +City128 | 12551.54 | 60.93 | +FarmHash64 | 12145.36 | 60.12 | non-portable (SSE4.2) +Spooky128 | 11735.99 | 60.45 | weak (collisions with 4bit diff) +Spooky64 | 11820.20 | 60.39 | +CityCrc128 | 14821.82 | 62.38 | non-portable (SSE4.2) +MicroOAAT | 826.32 | 62.06 | poor (100% bias, distrib) +metrohash128_1 | 11063.78 | 66.58 | _seems weak_ (likely cyclic collisions) +metrohash128_2 | 11465.18 | 66.72 | weak (cyclic collisions) +GoodOAAT | 930.18 | 68.24 | +metrohash128crc_1 | 21322.80 | 70.33 | _seems weak_ (likely cyclic collisions), non-portable (SSE4.2) +metrohash128crc_2 | 20990.70 | 70.40 | _seems weak_ (likely cyclic collisions), non-portable (SSE4.2) +farmhash64_c | 12033.13 | 71.30 | non-portable (SSE4.2) +sdbm | 695.29 | 71.76 | poor (100% bias, collisions, distrib) +FNV1a | 684.17 | 72.75 | poor (zeros, 100% bias, collisions, distrib) +FNV64 | 697.67 | 72.70 | poor (100% bias, collisions, distrib) +FarmHash128 | 12515.98 | 77.43 | non-portable (SSE4.2) +hasshe2 | 2587.39 | 81.23 | poor (insecure, 100% bias, collisions, distrib), non-portable (SSE2) +_BadHash_ | 558.14 | 87.87 | not a hash (just for reference) +x17 | 551.99 | 89.24 | poor (99.98% bias, collisions, distrib) +JenkinsOOAT_perl | 558.14 | 95.26 | poor (1.5-11.5% bias, 7.2x collisions) +farmhash128_c | 12709.06 | 96.42 | non-portable (SSE4.1) +MurmurOAAT | 465.12 | 107.61 | poor (collisions, 99.99% distrib) +JenkinsOOAT | 558.13 | 116.75 | poor (53.5% bias, collisions, distrib) +falkhash | 8909.54 | 124.48 | non-portable (AES-NI) +crc32 | 342.27 | 142.06 | poor (insecure, 8589.93x collisions, distrib) +SipHash | 962.35 | 147.36 | +md5_32a | 433.03 | 508.98 | +sha1_32a | 531.44 | 1222.44 | + + +#### The _LARGE KEYS_ case +Order by hashing speed in Mi-bytes (2^20 = 1048576) per second for 262144-byte block (more is better). + +| Function | MiB/Second | Cycles/Hash | Notes (quality, portability) | +| :-------------------- | ------------: | -------: | :--------------------------- | +_donothing_ | 15747227.36 | 6.00 | not a hash (just for reference) +_sumhash32_ | 43317.86 | 16.69 | not a hash (just for reference) +**t1ha_aes** | **34636.42** | **33.03** | non-portable (AES-NI) +metrohash128crc_1 | 21322.80 | 70.33 | _seems weak_ (likely cyclic collisions), non-portable (SSE4.2) +metrohash64crc_1 | 21319.23 | 52.36 | _seems weak_ (cyclic collisions), non-portable (SSE4.2) +metrohash128crc_2 | 20990.70 | 70.40 | _seems weak_ (likely cyclic collisions), non-portable (SSE4.2) +metrohash64crc_2 | 20582.76 | 51.39 | _seems weak_ (likely cyclic collisions), non-portable (SSE4.2) +CityCrc128 | 14821.82 | 62.38 | non-portable (SSE4.2) +FNV1a_YoshimitsuTRIAD | 13000.49 | 24.96 | poor (100% bias, collisions, distrib) +farmhash128_c | 12709.06 | 96.42 | non-portable (SSE4.1) +City128 | 12551.54 | 60.93 | +FarmHash128 | 12515.98 | 77.43 | non-portable (SSE4.2) +**t1ha** | **12228.80** | **35.55** | +FarmHash64 | 12145.36 | 60.12 | non-portable (SSE4.2) +metrohash64_2 | 12113.12 | 46.88 | _seems weak_ (likely cyclic collisions) +cmetrohash64_1 | 12081.32 | 47.28 | _seems weak_ (likely cyclic collisions) +cmetrohash64_2 | 12052.58 | 48.66 | _seems weak_ (likely cyclic collisions) +farmhash64_c | 12033.13 | 71.30 | non-portable (SSE4.2) +metrohash64_1 | 12024.68 | 47.21 | _seems weak_ (likely cyclic collisions) +Spooky64 | 11820.20 | 60.39 | +cmetrohash64_1_optshort | 11808.92 | 46.33 | _seems weak_ (likely cyclic collisions) +Spooky128 | 11735.99 | 60.45 | weak (collisions with 4-bit diff) +metrohash128_2 | 11465.18 | 66.72 | weak (cyclic collisions) +Spooky32 | 11464.20 | 59.45 | +xxHash64 | 11123.15 | 56.17 | +metrohash128_1 | 11063.78 | 66.58 | _seems weak_ (likely cyclic collisions) +City64 | 11041.72 | 51.77 | +MUM | 10246.20 | 37.25 | non-portable (different result, machine specific) +_sumhash_ | 9668.13 | 51.31 | not a hash (just for reference) +t1ha_64be | 9321.23 | 38.29 | alien (designed for big-endian CPU) +falkhash | 8909.54 | 124.48 | non-portable (AES-NI) +crc64_hw | 7308.06 | 28.37 | poor (insecure, 100% bias, collisions, distrib), non-portable (SSE4.2) +t1ha_32le | 5958.54 | 38.54 | alien (designed for 32-bit CPU) +t1ha_32be | 5873.45 | 39.81 | alien (designed for 32-bit big-endian CPU) +fasthash64 | 5578.06 | 43.42 | +Murmur2B | 5578.21 | 44.13 | weak (1.8% bias, collisions, distrib) +crc32_hw | 5577.64 | 29.10 | poor (insecure, 100% bias, collisions, distrib), non-portable (SSE4.2) +xxHash32 | 5513.55 | 43.72 | +Murmur3F | 5473.62 | 47.37 | +fasthash32 | 5381.46 | 45.50 | +City32 | 5014.33 | 51.13 | far to perfect (2 minor collisions) +Murmur3C | 3719.22 | 54.05 | +Murmur2C | 3655.60 | 42.68 | poor (91% bias, collisions, distrib) +Murmur2 | 2789.89 | 38.37 | poor (1.7% bias, 81x coll, 1.7% distrib) +Murmur2A | 2789.85 | 43.38 | poor (12.7% bias) +Crap8 | 2743.80 | 32.50 | poor (2.42% bias, collisions, 2% distrib) +hasshe2 | 2587.39 | 81.23 | poor (insecure, 100% bias, collisions, distrib), non-portable (SSE2) +Murmur3A | 2232.00 | 48.16 | +PMurHash32 | 2232.26 | 53.18 | +NOP_OAAT_read64 | 1991.31 | 30.46 | poor (100% bias, 2.17x collisions) +superfast | 1860.25 | 47.45 | poor (91% bias, 5273.01x collisions, 37% distrib) +lookup3 | 1817.11 | 39.30 | poor (28% bias, collisions, 30% distrib) +SipHash | 962.35 | 147.36 | +GoodOAAT | 930.18 | 68.24 | +bernstein | 921.43 | 55.17 | poor (100% bias, collisions, distrib) +MicroOAAT | 826.32 | 62.06 | poor (100% bias, distrib) +FNV64 | 697.67 | 72.70 | poor (100% bias, collisions, distrib) +sdbm | 695.29 | 71.76 | poor (100% bias, collisions, distrib) +FNV1a | 684.17 | 72.75 | poor (zeros, 100% bias, collisions, distrib) +_BadHash_ | 558.14 | 87.87 | not a hash (just for reference) +JenkinsOOAT | 558.13 | 116.75 | poor (53.5% bias, collisions, distrib) +JenkinsOOAT_perl | 558.14 | 95.26 | poor (1.5-11.5% bias, 7.2x collisions) +x17 | 551.99 | 89.24 | poor (99.98% bias, collisions, distrib) +sha1_32a | 531.44 | 1222.44 | +MurmurOAAT | 465.12 | 107.61 | poor (collisions, 99.99% distrib) +md5_32a | 433.03 | 508.98 | +crc32 | 342.27 | 142.06 | poor (insecure, 8589.93x collisions, distrib) ----- diff --git a/contrib/libs/t1ha/src/t1ha0.c b/contrib/libs/t1ha/src/t1ha0.c index c51d25957d..bde71299cb 100644 --- a/contrib/libs/t1ha/src/t1ha0.c +++ b/contrib/libs/t1ha/src/t1ha0.c @@ -1,462 +1,462 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#ifndef T1HA0_DISABLED -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -static __maybe_unused __always_inline uint32_t tail32_le_aligned(const void *v, - size_t tail) { - const uint8_t *const p = (const uint8_t *)v; -#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) - /* We can perform a 'oneshot' read, which is little bit faster. */ - const unsigned shift = ((4 - tail) & 3) << 3; - return fetch32_le_aligned(p) & ((~UINT32_C(0)) >> shift); -#else - uint32_t r = 0; - switch (tail & 3) { - default: - unreachable(); -/* fall through */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - /* For most CPUs this code is better when not needed - * copying for alignment or byte reordering. */ - case 0: - return fetch32_le_aligned(p); - case 3: - r = (uint32_t)p[2] << 16; - /* fall through */ - case 2: - return r + fetch16_le_aligned(p); - case 1: - return p[0]; -#else - case 0: - r += p[3]; - r <<= 8; - /* fall through */ - case 3: - r += p[2]; - r <<= 8; - /* fall through */ - case 2: - r += p[1]; - r <<= 8; - /* fall through */ - case 1: - return r + p[0]; -#endif - } -#endif /* T1HA_USE_FAST_ONESHOT_READ */ -} - -static __maybe_unused __always_inline uint32_t -tail32_le_unaligned(const void *v, size_t tail) { - const uint8_t *p = (const uint8_t *)v; -#ifdef can_read_underside - /* On some systems (e.g. x86) we can perform a 'oneshot' read, which - * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> - * for the reminder. */ - const unsigned offset = (4 - tail) & 3; - const unsigned shift = offset << 3; - if (likely(can_read_underside(p, 4))) { - p -= offset; - return fetch32_le_unaligned(p) >> shift; - } - return fetch32_le_unaligned(p) & ((~UINT32_C(0)) >> shift); -#else - uint32_t r = 0; - switch (tail & 3) { - default: - unreachable(); -/* fall through */ -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ - __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - /* For most CPUs this code is better when not needed - * copying for alignment or byte reordering. */ - case 0: - return fetch32_le_unaligned(p); - case 3: - r = (uint32_t)p[2] << 16; - /* fall through */ - case 2: - return r + fetch16_le_unaligned(p); - case 1: - return p[0]; -#else - /* For most CPUs this code is better than a - * copying for alignment and/or byte reordering. */ - case 0: - r += p[3]; - r <<= 8; - /* fall through */ - case 3: - r += p[2]; - r <<= 8; - /* fall through */ - case 2: - r += p[1]; - r <<= 8; - /* fall through */ - case 1: - return r + p[0]; -#endif - } -#endif /* can_read_underside */ -} - -static __maybe_unused __always_inline uint32_t tail32_be_aligned(const void *v, - size_t tail) { - const uint8_t *const p = (const uint8_t *)v; -#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) - /* We can perform a 'oneshot' read, which is little bit faster. */ - const unsigned shift = ((4 - tail) & 3) << 3; - return fetch32_be_aligned(p) >> shift; -#else - switch (tail & 3) { - default: - unreachable(); -/* fall through */ -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - /* For most CPUs this code is better when not needed - * copying for alignment or byte reordering. */ - case 1: - return p[0]; - case 2: - return fetch16_be_aligned(p); - case 3: - return fetch16_be_aligned(p) << 8 | p[2]; - case 0: - return fetch32_be_aligned(p); -#else - case 1: - return p[0]; - case 2: - return p[1] | (uint32_t)p[0] << 8; - case 3: - return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; - case 0: - return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | - (uint32_t)p[0] << 24; -#endif - } -#endif /* T1HA_USE_FAST_ONESHOT_READ */ -} - -static __maybe_unused __always_inline uint32_t -tail32_be_unaligned(const void *v, size_t tail) { - const uint8_t *p = (const uint8_t *)v; -#ifdef can_read_underside - /* On some systems we can perform a 'oneshot' read, which is little bit - * faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> for the - * reminder. */ - const unsigned offset = (4 - tail) & 3; - const unsigned shift = offset << 3; - if (likely(can_read_underside(p, 4))) { - p -= offset; - return fetch32_be_unaligned(p) & ((~UINT32_C(0)) >> shift); - } - return fetch32_be_unaligned(p) >> shift; -#else - switch (tail & 3) { - default: - unreachable(); -/* fall through */ -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ - __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - /* For most CPUs this code is better when not needed - * copying for alignment or byte reordering. */ - case 1: - return p[0]; - case 2: - return fetch16_be_unaligned(p); - case 3: - return fetch16_be_unaligned(p) << 8 | p[2]; - case 0: - return fetch32_be_unaligned(p); -#else - /* For most CPUs this code is better than a - * copying for alignment and/or byte reordering. */ - case 1: - return p[0]; - case 2: - return p[1] | (uint32_t)p[0] << 8; - case 3: - return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; - case 0: - return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | - (uint32_t)p[0] << 24; -#endif - } -#endif /* can_read_underside */ -} - -/***************************************************************************/ - -#ifndef rot32 -static __maybe_unused __always_inline uint32_t rot32(uint32_t v, unsigned s) { - return (v >> s) | (v << (32 - s)); -} -#endif /* rot32 */ - -static __always_inline void mixup32(uint32_t *a, uint32_t *b, uint32_t v, - uint32_t prime) { - uint64_t l = mul_32x32_64(*b + v, prime); - *a ^= (uint32_t)l; - *b += (uint32_t)(l >> 32); -} - -static __always_inline uint64_t final32(uint32_t a, uint32_t b) { - uint64_t l = (b ^ rot32(a, 13)) | (uint64_t)a << 32; - l *= prime_0; - l ^= l >> 41; - l *= prime_4; - l ^= l >> 47; - l *= prime_6; - return l; -} - -/* 32-bit 'magic' primes */ -static const uint32_t prime32_0 = UINT32_C(0x92D78269); -static const uint32_t prime32_1 = UINT32_C(0xCA9B4735); -static const uint32_t prime32_2 = UINT32_C(0xA4ABA1C3); -static const uint32_t prime32_3 = UINT32_C(0xF6499843); -static const uint32_t prime32_4 = UINT32_C(0x86F0FD61); -static const uint32_t prime32_5 = UINT32_C(0xCA2DA6FB); -static const uint32_t prime32_6 = UINT32_C(0xC4BB3575); - -/* TODO: C++ template in the next version */ -#define T1HA0_BODY(ENDIANNES, ALIGNESS) \ - const uint32_t *v = (const uint32_t *)data; \ - if (unlikely(len > 16)) { \ - uint32_t c = ~a; \ - uint32_t d = rot32(b, 5); \ - const uint32_t *detent = \ - (const uint32_t *)((const uint8_t *)data + len - 15); \ - do { \ - const uint32_t w0 = fetch32_##ENDIANNES##_##ALIGNESS(v + 0); \ - const uint32_t w1 = fetch32_##ENDIANNES##_##ALIGNESS(v + 1); \ - const uint32_t w2 = fetch32_##ENDIANNES##_##ALIGNESS(v + 2); \ - const uint32_t w3 = fetch32_##ENDIANNES##_##ALIGNESS(v + 3); \ - v += 4; \ - prefetch(v); \ - \ - const uint32_t d13 = w1 + rot32(w3 + d, 17); \ - const uint32_t c02 = w0 ^ rot32(w2 + c, 11); \ - d ^= rot32(a + w0, 3); \ - c ^= rot32(b + w1, 7); \ - b = prime32_1 * (c02 + w3); \ - a = prime32_0 * (d13 ^ w2); \ - } while (likely(v < detent)); \ - \ - c += a; \ - d += b; \ - a ^= prime32_6 * (rot32(c, 16) + d); \ - b ^= prime32_5 * (c + rot32(d, 16)); \ - \ - len &= 15; \ - } \ - \ - switch (len) { \ - default: \ - mixup32(&a, &b, fetch32_##ENDIANNES##_##ALIGNESS(v++), prime32_4); \ - /* fall through */ \ - case 12: \ - case 11: \ - case 10: \ - case 9: \ - mixup32(&b, &a, fetch32_##ENDIANNES##_##ALIGNESS(v++), prime32_3); \ - /* fall through */ \ - case 8: \ - case 7: \ - case 6: \ - case 5: \ - mixup32(&a, &b, fetch32_##ENDIANNES##_##ALIGNESS(v++), prime32_2); \ - /* fall through */ \ - case 4: \ - case 3: \ - case 2: \ - case 1: \ - mixup32(&b, &a, tail32_##ENDIANNES##_##ALIGNESS(v, len), prime32_1); \ - /* fall through */ \ - case 0: \ - return final32(a, b); \ - } - -uint64_t t1ha0_32le(const void *data, size_t len, uint64_t seed) { - uint32_t a = rot32((uint32_t)len, 17) + (uint32_t)seed; - uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32); - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - T1HA0_BODY(le, unaligned); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_32 - 1)) != 0; - if (misaligned) { - T1HA0_BODY(le, unaligned); - } else { - T1HA0_BODY(le, aligned); - } -#endif -} - -uint64_t t1ha0_32be(const void *data, size_t len, uint64_t seed) { - uint32_t a = rot32((uint32_t)len, 17) + (uint32_t)seed; - uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32); - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - T1HA0_BODY(be, unaligned); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_32 - 1)) != 0; - if (misaligned) { - T1HA0_BODY(be, unaligned); - } else { - T1HA0_BODY(be, aligned); - } -#endif -} - -/***************************************************************************/ - -#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) -__cold uint64_t t1ha_ia32cpu_features(void) { - uint32_t features = 0; - uint32_t extended = 0; + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#ifndef T1HA0_DISABLED +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +static __maybe_unused __always_inline uint32_t tail32_le_aligned(const void *v, + size_t tail) { + const uint8_t *const p = (const uint8_t *)v; +#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) + /* We can perform a 'oneshot' read, which is little bit faster. */ + const unsigned shift = ((4 - tail) & 3) << 3; + return fetch32_le_aligned(p) & ((~UINT32_C(0)) >> shift); +#else + uint32_t r = 0; + switch (tail & 3) { + default: + unreachable(); +/* fall through */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 0: + return fetch32_le_aligned(p); + case 3: + r = (uint32_t)p[2] << 16; + /* fall through */ + case 2: + return r + fetch16_le_aligned(p); + case 1: + return p[0]; +#else + case 0: + r += p[3]; + r <<= 8; + /* fall through */ + case 3: + r += p[2]; + r <<= 8; + /* fall through */ + case 2: + r += p[1]; + r <<= 8; + /* fall through */ + case 1: + return r + p[0]; +#endif + } +#endif /* T1HA_USE_FAST_ONESHOT_READ */ +} + +static __maybe_unused __always_inline uint32_t +tail32_le_unaligned(const void *v, size_t tail) { + const uint8_t *p = (const uint8_t *)v; +#ifdef can_read_underside + /* On some systems (e.g. x86) we can perform a 'oneshot' read, which + * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> + * for the reminder. */ + const unsigned offset = (4 - tail) & 3; + const unsigned shift = offset << 3; + if (likely(can_read_underside(p, 4))) { + p -= offset; + return fetch32_le_unaligned(p) >> shift; + } + return fetch32_le_unaligned(p) & ((~UINT32_C(0)) >> shift); +#else + uint32_t r = 0; + switch (tail & 3) { + default: + unreachable(); +/* fall through */ +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 0: + return fetch32_le_unaligned(p); + case 3: + r = (uint32_t)p[2] << 16; + /* fall through */ + case 2: + return r + fetch16_le_unaligned(p); + case 1: + return p[0]; +#else + /* For most CPUs this code is better than a + * copying for alignment and/or byte reordering. */ + case 0: + r += p[3]; + r <<= 8; + /* fall through */ + case 3: + r += p[2]; + r <<= 8; + /* fall through */ + case 2: + r += p[1]; + r <<= 8; + /* fall through */ + case 1: + return r + p[0]; +#endif + } +#endif /* can_read_underside */ +} + +static __maybe_unused __always_inline uint32_t tail32_be_aligned(const void *v, + size_t tail) { + const uint8_t *const p = (const uint8_t *)v; +#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) + /* We can perform a 'oneshot' read, which is little bit faster. */ + const unsigned shift = ((4 - tail) & 3) << 3; + return fetch32_be_aligned(p) >> shift; +#else + switch (tail & 3) { + default: + unreachable(); +/* fall through */ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 1: + return p[0]; + case 2: + return fetch16_be_aligned(p); + case 3: + return fetch16_be_aligned(p) << 8 | p[2]; + case 0: + return fetch32_be_aligned(p); +#else + case 1: + return p[0]; + case 2: + return p[1] | (uint32_t)p[0] << 8; + case 3: + return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; + case 0: + return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | + (uint32_t)p[0] << 24; +#endif + } +#endif /* T1HA_USE_FAST_ONESHOT_READ */ +} + +static __maybe_unused __always_inline uint32_t +tail32_be_unaligned(const void *v, size_t tail) { + const uint8_t *p = (const uint8_t *)v; +#ifdef can_read_underside + /* On some systems we can perform a 'oneshot' read, which is little bit + * faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> for the + * reminder. */ + const unsigned offset = (4 - tail) & 3; + const unsigned shift = offset << 3; + if (likely(can_read_underside(p, 4))) { + p -= offset; + return fetch32_be_unaligned(p) & ((~UINT32_C(0)) >> shift); + } + return fetch32_be_unaligned(p) >> shift; +#else + switch (tail & 3) { + default: + unreachable(); +/* fall through */ +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 1: + return p[0]; + case 2: + return fetch16_be_unaligned(p); + case 3: + return fetch16_be_unaligned(p) << 8 | p[2]; + case 0: + return fetch32_be_unaligned(p); +#else + /* For most CPUs this code is better than a + * copying for alignment and/or byte reordering. */ + case 1: + return p[0]; + case 2: + return p[1] | (uint32_t)p[0] << 8; + case 3: + return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; + case 0: + return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | + (uint32_t)p[0] << 24; +#endif + } +#endif /* can_read_underside */ +} + +/***************************************************************************/ + +#ifndef rot32 +static __maybe_unused __always_inline uint32_t rot32(uint32_t v, unsigned s) { + return (v >> s) | (v << (32 - s)); +} +#endif /* rot32 */ + +static __always_inline void mixup32(uint32_t *a, uint32_t *b, uint32_t v, + uint32_t prime) { + uint64_t l = mul_32x32_64(*b + v, prime); + *a ^= (uint32_t)l; + *b += (uint32_t)(l >> 32); +} + +static __always_inline uint64_t final32(uint32_t a, uint32_t b) { + uint64_t l = (b ^ rot32(a, 13)) | (uint64_t)a << 32; + l *= prime_0; + l ^= l >> 41; + l *= prime_4; + l ^= l >> 47; + l *= prime_6; + return l; +} + +/* 32-bit 'magic' primes */ +static const uint32_t prime32_0 = UINT32_C(0x92D78269); +static const uint32_t prime32_1 = UINT32_C(0xCA9B4735); +static const uint32_t prime32_2 = UINT32_C(0xA4ABA1C3); +static const uint32_t prime32_3 = UINT32_C(0xF6499843); +static const uint32_t prime32_4 = UINT32_C(0x86F0FD61); +static const uint32_t prime32_5 = UINT32_C(0xCA2DA6FB); +static const uint32_t prime32_6 = UINT32_C(0xC4BB3575); + +/* TODO: C++ template in the next version */ +#define T1HA0_BODY(ENDIANNES, ALIGNESS) \ + const uint32_t *v = (const uint32_t *)data; \ + if (unlikely(len > 16)) { \ + uint32_t c = ~a; \ + uint32_t d = rot32(b, 5); \ + const uint32_t *detent = \ + (const uint32_t *)((const uint8_t *)data + len - 15); \ + do { \ + const uint32_t w0 = fetch32_##ENDIANNES##_##ALIGNESS(v + 0); \ + const uint32_t w1 = fetch32_##ENDIANNES##_##ALIGNESS(v + 1); \ + const uint32_t w2 = fetch32_##ENDIANNES##_##ALIGNESS(v + 2); \ + const uint32_t w3 = fetch32_##ENDIANNES##_##ALIGNESS(v + 3); \ + v += 4; \ + prefetch(v); \ + \ + const uint32_t d13 = w1 + rot32(w3 + d, 17); \ + const uint32_t c02 = w0 ^ rot32(w2 + c, 11); \ + d ^= rot32(a + w0, 3); \ + c ^= rot32(b + w1, 7); \ + b = prime32_1 * (c02 + w3); \ + a = prime32_0 * (d13 ^ w2); \ + } while (likely(v < detent)); \ + \ + c += a; \ + d += b; \ + a ^= prime32_6 * (rot32(c, 16) + d); \ + b ^= prime32_5 * (c + rot32(d, 16)); \ + \ + len &= 15; \ + } \ + \ + switch (len) { \ + default: \ + mixup32(&a, &b, fetch32_##ENDIANNES##_##ALIGNESS(v++), prime32_4); \ + /* fall through */ \ + case 12: \ + case 11: \ + case 10: \ + case 9: \ + mixup32(&b, &a, fetch32_##ENDIANNES##_##ALIGNESS(v++), prime32_3); \ + /* fall through */ \ + case 8: \ + case 7: \ + case 6: \ + case 5: \ + mixup32(&a, &b, fetch32_##ENDIANNES##_##ALIGNESS(v++), prime32_2); \ + /* fall through */ \ + case 4: \ + case 3: \ + case 2: \ + case 1: \ + mixup32(&b, &a, tail32_##ENDIANNES##_##ALIGNESS(v, len), prime32_1); \ + /* fall through */ \ + case 0: \ + return final32(a, b); \ + } + +uint64_t t1ha0_32le(const void *data, size_t len, uint64_t seed) { + uint32_t a = rot32((uint32_t)len, 17) + (uint32_t)seed; + uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32); + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + T1HA0_BODY(le, unaligned); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_32 - 1)) != 0; + if (misaligned) { + T1HA0_BODY(le, unaligned); + } else { + T1HA0_BODY(le, aligned); + } +#endif +} + +uint64_t t1ha0_32be(const void *data, size_t len, uint64_t seed) { + uint32_t a = rot32((uint32_t)len, 17) + (uint32_t)seed; + uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32); + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + T1HA0_BODY(be, unaligned); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_32 - 1)) != 0; + if (misaligned) { + T1HA0_BODY(be, unaligned); + } else { + T1HA0_BODY(be, aligned); + } +#endif +} + +/***************************************************************************/ + +#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) +__cold uint64_t t1ha_ia32cpu_features(void) { + uint32_t features = 0; + uint32_t extended = 0; #if defined(__GNUC__) || defined(__clang__) - uint32_t eax, ebx, ecx, edx; - const unsigned cpuid_max = __get_cpuid_max(0, NULL); - if (cpuid_max >= 1) { - __cpuid_count(1, 0, eax, ebx, features, edx); - if (cpuid_max >= 7) - __cpuid_count(7, 0, eax, extended, ecx, edx); - } -#elif defined(_MSC_VER) - int info[4]; - __cpuid(info, 0); - const unsigned cpuid_max = info[0]; - if (cpuid_max >= 1) { - __cpuidex(info, 1, 0); - features = info[2]; - if (cpuid_max >= 7) { - __cpuidex(info, 7, 0); - extended = info[1]; - } - } -#endif - return features | (uint64_t)extended << 32; -} -#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ - -#if T1HA0_RUNTIME_SELECT - -__cold t1ha0_function_t t1ha0_resolve(void) { - -#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) - uint64_t features = t1ha_ia32cpu_features(); - if (t1ha_ia32_AESNI_avail(features)) { - if (t1ha_ia32_AVX_avail(features)) - return t1ha_ia32_AVX2_avail(features) ? t1ha0_ia32aes_avx2 - : t1ha0_ia32aes_avx; - return t1ha0_ia32aes_noavx; - } -#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ - -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ - (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) -#ifndef T1HA1_DISABLED - return t1ha1_be; -#else - return t1ha2_atonce; -#endif /* T1HA1_DISABLED */ -#else - return t1ha0_32be; -#endif -#else /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */ -#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ - (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) -#ifndef T1HA1_DISABLED - return t1ha1_le; -#else - return t1ha2_atonce; -#endif /* T1HA1_DISABLED */ -#else - return t1ha0_32le; -#endif -#endif /* __BYTE_ORDER__ */ -} - -#if T1HA_USE_INDIRECT_FUNCTIONS -/* Use IFUNC (GNU ELF indirect functions) to choice implementation at runtime. - * For more info please see - * https://en.wikipedia.org/wiki/Executable_and_Linkable_Format - * and https://sourceware.org/glibc/wiki/GNU_IFUNC */ + uint32_t eax, ebx, ecx, edx; + const unsigned cpuid_max = __get_cpuid_max(0, NULL); + if (cpuid_max >= 1) { + __cpuid_count(1, 0, eax, ebx, features, edx); + if (cpuid_max >= 7) + __cpuid_count(7, 0, eax, extended, ecx, edx); + } +#elif defined(_MSC_VER) + int info[4]; + __cpuid(info, 0); + const unsigned cpuid_max = info[0]; + if (cpuid_max >= 1) { + __cpuidex(info, 1, 0); + features = info[2]; + if (cpuid_max >= 7) { + __cpuidex(info, 7, 0); + extended = info[1]; + } + } +#endif + return features | (uint64_t)extended << 32; +} +#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ + +#if T1HA0_RUNTIME_SELECT + +__cold t1ha0_function_t t1ha0_resolve(void) { + +#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) + uint64_t features = t1ha_ia32cpu_features(); + if (t1ha_ia32_AESNI_avail(features)) { + if (t1ha_ia32_AVX_avail(features)) + return t1ha_ia32_AVX2_avail(features) ? t1ha0_ia32aes_avx2 + : t1ha0_ia32aes_avx; + return t1ha0_ia32aes_noavx; + } +#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ + (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) +#ifndef T1HA1_DISABLED + return t1ha1_be; +#else + return t1ha2_atonce; +#endif /* T1HA1_DISABLED */ +#else + return t1ha0_32be; +#endif +#else /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */ +#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ + (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) +#ifndef T1HA1_DISABLED + return t1ha1_le; +#else + return t1ha2_atonce; +#endif /* T1HA1_DISABLED */ +#else + return t1ha0_32le; +#endif +#endif /* __BYTE_ORDER__ */ +} + +#if T1HA_USE_INDIRECT_FUNCTIONS +/* Use IFUNC (GNU ELF indirect functions) to choice implementation at runtime. + * For more info please see + * https://en.wikipedia.org/wiki/Executable_and_Linkable_Format + * and https://sourceware.org/glibc/wiki/GNU_IFUNC */ #if __has_attribute(__ifunc__) -uint64_t t1ha0(const void *data, size_t len, uint64_t seed) +uint64_t t1ha0(const void *data, size_t len, uint64_t seed) __attribute__((__ifunc__("t1ha0_resolve"))); -#else -__asm("\t.globl\tt1ha0\n\t.type\tt1ha0, " - "%gnu_indirect_function\n\t.set\tt1ha0,t1ha0_resolve"); +#else +__asm("\t.globl\tt1ha0\n\t.type\tt1ha0, " + "%gnu_indirect_function\n\t.set\tt1ha0,t1ha0_resolve"); #endif /* __has_attribute(__ifunc__) */ - + #elif __GNUC_PREREQ(4, 0) || __has_attribute(__constructor__) - + uint64_t (*t1ha0_funcptr)(const void *, size_t, uint64_t); - + static __cold void __attribute__((__constructor__)) t1ha0_init(void) { - t1ha0_funcptr = t1ha0_resolve(); -} - -#else /* T1HA_USE_INDIRECT_FUNCTIONS */ - -static __cold uint64_t t1ha0_proxy(const void *data, size_t len, - uint64_t seed) { - t1ha0_funcptr = t1ha0_resolve(); - return t1ha0_funcptr(data, len, seed); -} - -uint64_t (*t1ha0_funcptr)(const void *, size_t, uint64_t) = t1ha0_proxy; - -#endif /* !T1HA_USE_INDIRECT_FUNCTIONS */ -#endif /* T1HA0_RUNTIME_SELECT */ - -#endif /* T1HA0_DISABLED */ + t1ha0_funcptr = t1ha0_resolve(); +} + +#else /* T1HA_USE_INDIRECT_FUNCTIONS */ + +static __cold uint64_t t1ha0_proxy(const void *data, size_t len, + uint64_t seed) { + t1ha0_funcptr = t1ha0_resolve(); + return t1ha0_funcptr(data, len, seed); +} + +uint64_t (*t1ha0_funcptr)(const void *, size_t, uint64_t) = t1ha0_proxy; + +#endif /* !T1HA_USE_INDIRECT_FUNCTIONS */ +#endif /* T1HA0_RUNTIME_SELECT */ + +#endif /* T1HA0_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha0_ia32aes_a.h b/contrib/libs/t1ha/src/t1ha0_ia32aes_a.h index fa1a753f34..a2372d5201 100644 --- a/contrib/libs/t1ha/src/t1ha0_ia32aes_a.h +++ b/contrib/libs/t1ha/src/t1ha0_ia32aes_a.h @@ -1,182 +1,182 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -#if T1HA0_AESNI_AVAILABLE - -uint64_t T1HA_IA32AES_NAME(const void *data, size_t len, uint64_t seed) { - uint64_t a = seed; - uint64_t b = len; - - if (unlikely(len > 32)) { - __m128i x = _mm_set_epi64x(a, b); - __m128i y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_5, prime_6)); - - const __m128i *__restrict v = (const __m128i *)data; - const __m128i *__restrict const detent = - (const __m128i *)((const uint8_t *)data + len - 127); - - while (v < detent) { - __m128i v0 = _mm_loadu_si128(v + 0); - __m128i v1 = _mm_loadu_si128(v + 1); - __m128i v2 = _mm_loadu_si128(v + 2); - __m128i v3 = _mm_loadu_si128(v + 3); - __m128i v4 = _mm_loadu_si128(v + 4); - __m128i v5 = _mm_loadu_si128(v + 5); - __m128i v6 = _mm_loadu_si128(v + 6); - __m128i v7 = _mm_loadu_si128(v + 7); - - __m128i v0y = _mm_aesenc_si128(v0, y); - __m128i v2x6 = _mm_aesenc_si128(v2, _mm_xor_si128(x, v6)); - __m128i v45_67 = - _mm_xor_si128(_mm_aesenc_si128(v4, v5), _mm_add_epi64(v6, v7)); - - __m128i v0y7_1 = _mm_aesdec_si128(_mm_sub_epi64(v7, v0y), v1); - __m128i v2x6_3 = _mm_aesenc_si128(v2x6, v3); - - x = _mm_aesenc_si128(v45_67, _mm_add_epi64(x, y)); - y = _mm_aesenc_si128(v2x6_3, _mm_xor_si128(v0y7_1, v5)); - v += 8; - } - - if (len & 64) { - __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); - __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); - x = _mm_aesdec_si128(x, v0y); - y = _mm_aesdec_si128(y, v1x); - - __m128i v2y = _mm_add_epi64(y, _mm_loadu_si128(v++)); - __m128i v3x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); - x = _mm_aesdec_si128(x, v2y); - y = _mm_aesdec_si128(y, v3x); - } - - if (len & 32) { - __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); - __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); - x = _mm_aesdec_si128(x, v0y); - y = _mm_aesdec_si128(y, v1x); - } - - if (len & 16) { - y = _mm_add_epi64(x, y); - x = _mm_aesdec_si128(x, _mm_loadu_si128(v++)); - } - - x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y); -#if defined(__x86_64__) || defined(_M_X64) -#if defined(__SSE4_1__) || defined(__AVX__) - a = _mm_extract_epi64(x, 0); - b = _mm_extract_epi64(x, 1); -#else - a = _mm_cvtsi128_si64(x); - b = _mm_cvtsi128_si64(_mm_unpackhi_epi64(x, x)); -#endif -#else -#if defined(__SSE4_1__) || defined(__AVX__) - a = (uint32_t)_mm_extract_epi32(x, 0) | (uint64_t)_mm_extract_epi32(x, 1) - << 32; - b = (uint32_t)_mm_extract_epi32(x, 2) | (uint64_t)_mm_extract_epi32(x, 3) - << 32; -#else - a = (uint32_t)_mm_cvtsi128_si32(x); - a |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; - x = _mm_unpackhi_epi64(x, x); - b = (uint32_t)_mm_cvtsi128_si32(x); - b |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; -#endif -#endif -#ifdef __AVX__ - _mm256_zeroupper(); -#elif !(defined(_X86_64_) || defined(__x86_64__) || defined(_M_X64) || \ - defined(__e2k__)) - _mm_empty(); -#endif - data = v; - len &= 15; - } - - const uint64_t *v = (const uint64_t *)data; - switch (len) { - default: - mixup64(&a, &b, fetch64_le_unaligned(v++), prime_4); - /* fall through */ - case 24: - case 23: - case 22: - case 21: - case 20: - case 19: - case 18: - case 17: - mixup64(&b, &a, fetch64_le_unaligned(v++), prime_3); - /* fall through */ - case 16: - case 15: - case 14: - case 13: - case 12: - case 11: - case 10: - case 9: - mixup64(&a, &b, fetch64_le_unaligned(v++), prime_2); - /* fall through */ - case 8: - case 7: - case 6: - case 5: - case 4: - case 3: - case 2: - case 1: - mixup64(&b, &a, tail64_le_unaligned(v, len), prime_1); - /* fall through */ - case 0: - return final64(a, b); - } -} - -#endif /* T1HA0_AESNI_AVAILABLE */ -#undef T1HA_IA32AES_NAME + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +#if T1HA0_AESNI_AVAILABLE + +uint64_t T1HA_IA32AES_NAME(const void *data, size_t len, uint64_t seed) { + uint64_t a = seed; + uint64_t b = len; + + if (unlikely(len > 32)) { + __m128i x = _mm_set_epi64x(a, b); + __m128i y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_5, prime_6)); + + const __m128i *__restrict v = (const __m128i *)data; + const __m128i *__restrict const detent = + (const __m128i *)((const uint8_t *)data + len - 127); + + while (v < detent) { + __m128i v0 = _mm_loadu_si128(v + 0); + __m128i v1 = _mm_loadu_si128(v + 1); + __m128i v2 = _mm_loadu_si128(v + 2); + __m128i v3 = _mm_loadu_si128(v + 3); + __m128i v4 = _mm_loadu_si128(v + 4); + __m128i v5 = _mm_loadu_si128(v + 5); + __m128i v6 = _mm_loadu_si128(v + 6); + __m128i v7 = _mm_loadu_si128(v + 7); + + __m128i v0y = _mm_aesenc_si128(v0, y); + __m128i v2x6 = _mm_aesenc_si128(v2, _mm_xor_si128(x, v6)); + __m128i v45_67 = + _mm_xor_si128(_mm_aesenc_si128(v4, v5), _mm_add_epi64(v6, v7)); + + __m128i v0y7_1 = _mm_aesdec_si128(_mm_sub_epi64(v7, v0y), v1); + __m128i v2x6_3 = _mm_aesenc_si128(v2x6, v3); + + x = _mm_aesenc_si128(v45_67, _mm_add_epi64(x, y)); + y = _mm_aesenc_si128(v2x6_3, _mm_xor_si128(v0y7_1, v5)); + v += 8; + } + + if (len & 64) { + __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); + __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); + x = _mm_aesdec_si128(x, v0y); + y = _mm_aesdec_si128(y, v1x); + + __m128i v2y = _mm_add_epi64(y, _mm_loadu_si128(v++)); + __m128i v3x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); + x = _mm_aesdec_si128(x, v2y); + y = _mm_aesdec_si128(y, v3x); + } + + if (len & 32) { + __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); + __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); + x = _mm_aesdec_si128(x, v0y); + y = _mm_aesdec_si128(y, v1x); + } + + if (len & 16) { + y = _mm_add_epi64(x, y); + x = _mm_aesdec_si128(x, _mm_loadu_si128(v++)); + } + + x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y); +#if defined(__x86_64__) || defined(_M_X64) +#if defined(__SSE4_1__) || defined(__AVX__) + a = _mm_extract_epi64(x, 0); + b = _mm_extract_epi64(x, 1); +#else + a = _mm_cvtsi128_si64(x); + b = _mm_cvtsi128_si64(_mm_unpackhi_epi64(x, x)); +#endif +#else +#if defined(__SSE4_1__) || defined(__AVX__) + a = (uint32_t)_mm_extract_epi32(x, 0) | (uint64_t)_mm_extract_epi32(x, 1) + << 32; + b = (uint32_t)_mm_extract_epi32(x, 2) | (uint64_t)_mm_extract_epi32(x, 3) + << 32; +#else + a = (uint32_t)_mm_cvtsi128_si32(x); + a |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; + x = _mm_unpackhi_epi64(x, x); + b = (uint32_t)_mm_cvtsi128_si32(x); + b |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; +#endif +#endif +#ifdef __AVX__ + _mm256_zeroupper(); +#elif !(defined(_X86_64_) || defined(__x86_64__) || defined(_M_X64) || \ + defined(__e2k__)) + _mm_empty(); +#endif + data = v; + len &= 15; + } + + const uint64_t *v = (const uint64_t *)data; + switch (len) { + default: + mixup64(&a, &b, fetch64_le_unaligned(v++), prime_4); + /* fall through */ + case 24: + case 23: + case 22: + case 21: + case 20: + case 19: + case 18: + case 17: + mixup64(&b, &a, fetch64_le_unaligned(v++), prime_3); + /* fall through */ + case 16: + case 15: + case 14: + case 13: + case 12: + case 11: + case 10: + case 9: + mixup64(&a, &b, fetch64_le_unaligned(v++), prime_2); + /* fall through */ + case 8: + case 7: + case 6: + case 5: + case 4: + case 3: + case 2: + case 1: + mixup64(&b, &a, tail64_le_unaligned(v, len), prime_1); + /* fall through */ + case 0: + return final64(a, b); + } +} + +#endif /* T1HA0_AESNI_AVAILABLE */ +#undef T1HA_IA32AES_NAME diff --git a/contrib/libs/t1ha/src/t1ha0_ia32aes_avx.c b/contrib/libs/t1ha/src/t1ha0_ia32aes_avx.c index a19e7d9b4a..a344bfd98c 100644 --- a/contrib/libs/t1ha/src/t1ha0_ia32aes_avx.c +++ b/contrib/libs/t1ha/src/t1ha0_ia32aes_avx.c @@ -1,4 +1,4 @@ -#ifndef T1HA0_DISABLED -#define T1HA_IA32AES_NAME t1ha0_ia32aes_avx -#include "t1ha0_ia32aes_a.h" -#endif /* T1HA0_DISABLED */ +#ifndef T1HA0_DISABLED +#define T1HA_IA32AES_NAME t1ha0_ia32aes_avx +#include "t1ha0_ia32aes_a.h" +#endif /* T1HA0_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha0_ia32aes_avx2.c b/contrib/libs/t1ha/src/t1ha0_ia32aes_avx2.c index cd00f29290..a0b3a2d142 100644 --- a/contrib/libs/t1ha/src/t1ha0_ia32aes_avx2.c +++ b/contrib/libs/t1ha/src/t1ha0_ia32aes_avx2.c @@ -1,4 +1,4 @@ -#ifndef T1HA0_DISABLED -#define T1HA_IA32AES_NAME t1ha0_ia32aes_avx2 -#include "t1ha0_ia32aes_b.h" -#endif /* T1HA0_DISABLED */ +#ifndef T1HA0_DISABLED +#define T1HA_IA32AES_NAME t1ha0_ia32aes_avx2 +#include "t1ha0_ia32aes_b.h" +#endif /* T1HA0_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha0_ia32aes_b.h b/contrib/libs/t1ha/src/t1ha0_ia32aes_b.h index 9f63476c77..f8759dde82 100644 --- a/contrib/libs/t1ha/src/t1ha0_ia32aes_b.h +++ b/contrib/libs/t1ha/src/t1ha0_ia32aes_b.h @@ -1,167 +1,167 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -#if T1HA0_AESNI_AVAILABLE - -uint64_t T1HA_IA32AES_NAME(const void *data, size_t len, uint64_t seed) { - uint64_t a = seed; - uint64_t b = len; - - if (unlikely(len > 32)) { - __m128i x = _mm_set_epi64x(a, b); - __m128i y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_0, prime_1)); - - const __m128i *v = (const __m128i *)data; - const __m128i *const detent = - (const __m128i *)((const uint8_t *)data + (len & ~15ul)); - data = detent; - - if (len & 16) { - x = _mm_add_epi64(x, _mm_loadu_si128(v++)); - y = _mm_aesenc_si128(x, y); - } - len &= 15; - - if (v + 7 < detent) { - __m128i salt = y; - do { - __m128i t = _mm_aesenc_si128(_mm_loadu_si128(v++), salt); - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - - salt = _mm_add_epi64(salt, _mm_set_epi64x(prime_5, prime_6)); - t = _mm_aesenc_si128(x, t); - x = _mm_add_epi64(y, x); - y = t; - } while (v + 7 < detent); - } - - while (v < detent) { - __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); - __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); - x = _mm_aesdec_si128(x, v0y); - y = _mm_aesdec_si128(y, v1x); - } - - x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y); -#if defined(__x86_64__) || defined(_M_X64) -#if defined(__SSE4_1__) || defined(__AVX__) - a = _mm_extract_epi64(x, 0); - b = _mm_extract_epi64(x, 1); -#else - a = _mm_cvtsi128_si64(x); - b = _mm_cvtsi128_si64(_mm_unpackhi_epi64(x, x)); -#endif -#else -#if defined(__SSE4_1__) || defined(__AVX__) - a = (uint32_t)_mm_extract_epi32(x, 0) | (uint64_t)_mm_extract_epi32(x, 1) - << 32; - b = (uint32_t)_mm_extract_epi32(x, 2) | (uint64_t)_mm_extract_epi32(x, 3) - << 32; -#else - a = (uint32_t)_mm_cvtsi128_si32(x); - a |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; - x = _mm_unpackhi_epi64(x, x); - b = (uint32_t)_mm_cvtsi128_si32(x); - b |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; -#endif -#endif -#ifdef __AVX__ - _mm256_zeroupper(); -#elif !(defined(_X86_64_) || defined(__x86_64__) || defined(_M_X64) || \ - defined(__e2k__)) - _mm_empty(); -#endif - } - - const uint64_t *v = (const uint64_t *)data; - switch (len) { - default: - mixup64(&a, &b, fetch64_le_unaligned(v++), prime_4); - /* fall through */ - case 24: - case 23: - case 22: - case 21: - case 20: - case 19: - case 18: - case 17: - mixup64(&b, &a, fetch64_le_unaligned(v++), prime_3); - /* fall through */ - case 16: - case 15: - case 14: - case 13: - case 12: - case 11: - case 10: - case 9: - mixup64(&a, &b, fetch64_le_unaligned(v++), prime_2); - /* fall through */ - case 8: - case 7: - case 6: - case 5: - case 4: - case 3: - case 2: - case 1: - mixup64(&b, &a, tail64_le_unaligned(v, len), prime_1); - /* fall through */ - case 0: - return final64(a, b); - } -} - -#endif /* T1HA0_AESNI_AVAILABLE */ -#undef T1HA_IA32AES_NAME + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +#if T1HA0_AESNI_AVAILABLE + +uint64_t T1HA_IA32AES_NAME(const void *data, size_t len, uint64_t seed) { + uint64_t a = seed; + uint64_t b = len; + + if (unlikely(len > 32)) { + __m128i x = _mm_set_epi64x(a, b); + __m128i y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_0, prime_1)); + + const __m128i *v = (const __m128i *)data; + const __m128i *const detent = + (const __m128i *)((const uint8_t *)data + (len & ~15ul)); + data = detent; + + if (len & 16) { + x = _mm_add_epi64(x, _mm_loadu_si128(v++)); + y = _mm_aesenc_si128(x, y); + } + len &= 15; + + if (v + 7 < detent) { + __m128i salt = y; + do { + __m128i t = _mm_aesenc_si128(_mm_loadu_si128(v++), salt); + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + + salt = _mm_add_epi64(salt, _mm_set_epi64x(prime_5, prime_6)); + t = _mm_aesenc_si128(x, t); + x = _mm_add_epi64(y, x); + y = t; + } while (v + 7 < detent); + } + + while (v < detent) { + __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); + __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); + x = _mm_aesdec_si128(x, v0y); + y = _mm_aesdec_si128(y, v1x); + } + + x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y); +#if defined(__x86_64__) || defined(_M_X64) +#if defined(__SSE4_1__) || defined(__AVX__) + a = _mm_extract_epi64(x, 0); + b = _mm_extract_epi64(x, 1); +#else + a = _mm_cvtsi128_si64(x); + b = _mm_cvtsi128_si64(_mm_unpackhi_epi64(x, x)); +#endif +#else +#if defined(__SSE4_1__) || defined(__AVX__) + a = (uint32_t)_mm_extract_epi32(x, 0) | (uint64_t)_mm_extract_epi32(x, 1) + << 32; + b = (uint32_t)_mm_extract_epi32(x, 2) | (uint64_t)_mm_extract_epi32(x, 3) + << 32; +#else + a = (uint32_t)_mm_cvtsi128_si32(x); + a |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; + x = _mm_unpackhi_epi64(x, x); + b = (uint32_t)_mm_cvtsi128_si32(x); + b |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; +#endif +#endif +#ifdef __AVX__ + _mm256_zeroupper(); +#elif !(defined(_X86_64_) || defined(__x86_64__) || defined(_M_X64) || \ + defined(__e2k__)) + _mm_empty(); +#endif + } + + const uint64_t *v = (const uint64_t *)data; + switch (len) { + default: + mixup64(&a, &b, fetch64_le_unaligned(v++), prime_4); + /* fall through */ + case 24: + case 23: + case 22: + case 21: + case 20: + case 19: + case 18: + case 17: + mixup64(&b, &a, fetch64_le_unaligned(v++), prime_3); + /* fall through */ + case 16: + case 15: + case 14: + case 13: + case 12: + case 11: + case 10: + case 9: + mixup64(&a, &b, fetch64_le_unaligned(v++), prime_2); + /* fall through */ + case 8: + case 7: + case 6: + case 5: + case 4: + case 3: + case 2: + case 1: + mixup64(&b, &a, tail64_le_unaligned(v, len), prime_1); + /* fall through */ + case 0: + return final64(a, b); + } +} + +#endif /* T1HA0_AESNI_AVAILABLE */ +#undef T1HA_IA32AES_NAME diff --git a/contrib/libs/t1ha/src/t1ha0_ia32aes_noavx.c b/contrib/libs/t1ha/src/t1ha0_ia32aes_noavx.c index 250157b37d..fb6489fbff 100644 --- a/contrib/libs/t1ha/src/t1ha0_ia32aes_noavx.c +++ b/contrib/libs/t1ha/src/t1ha0_ia32aes_noavx.c @@ -1,4 +1,4 @@ -#ifndef T1HA0_DISABLED -#define T1HA_IA32AES_NAME t1ha0_ia32aes_noavx -#include "t1ha0_ia32aes_a.h" -#endif /* T1HA0_DISABLED */ +#ifndef T1HA0_DISABLED +#define T1HA_IA32AES_NAME t1ha0_ia32aes_noavx +#include "t1ha0_ia32aes_a.h" +#endif /* T1HA0_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha0_selfcheck.c b/contrib/libs/t1ha/src/t1ha0_selfcheck.c index 0230300b1f..d3c8e9a3fd 100644 --- a/contrib/libs/t1ha/src/t1ha0_selfcheck.c +++ b/contrib/libs/t1ha/src/t1ha0_selfcheck.c @@ -1,204 +1,204 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#ifndef T1HA0_DISABLED -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -/* *INDENT-OFF* */ -/* clang-format off */ - -const uint64_t t1ha_refval_32le[81] = { 0, - 0xC92229C10FAEA50E, 0x3DF1354B0DFDC443, 0x968F016D60417BB3, 0x85AAFB50C6DA770F, - 0x66CCE3BB6842C7D6, 0xDDAA39C11537C226, 0x35958D281F0C9C8C, 0x8C5D64B091DE608E, - 0x4094DF680D39786B, 0x1014F4AA2A2EDF4D, 0x39D21891615AA310, 0x7EF51F67C398C7C4, - 0x06163990DDBF319D, 0xE229CAA00C8D6F3F, 0xD2240B4B0D54E0F5, 0xEA2E7E905DDEAF94, - 0x8D4F8A887183A5CE, 0x44337F9A63C5820C, 0x94938D1E86A9B797, 0x96E9CABA5CA210CC, - 0x6EFBB9CC9E8F7708, 0x3D12EA0282FB8BBC, 0x5DA781EE205A2C48, 0xFA4A51A12677FE12, - 0x81D5F04E20660B28, 0x57258D043BCD3841, 0x5C9BEB62059C1ED2, 0x57A02162F9034B33, - 0xBA2A13E457CE19B8, 0xE593263BF9451F3A, 0x0BC1175539606BC5, 0xA3E2929E9C5F289F, - 0x86BDBD06835E35F7, 0xA180950AB48BAADC, 0x7812C994D9924028, 0x308366011415F46B, - 0x77FE9A9991C5F959, 0x925C340B70B0B1E3, 0xCD9C5BA4C41E2E10, 0x7CC4E7758B94CD93, - 0x898B235962EA4625, 0xD7E3E5BF22893286, 0x396F4CDD33056C64, 0x740AB2E32F17CD9F, - 0x60D12FF9CD15B321, 0xBEE3A6C9903A81D8, 0xB47040913B33C35E, 0x19EE8C2ACC013CFF, - 0x5DEC94C5783B55C4, 0x78DC122D562C5F1D, 0x6520F008DA1C181E, 0x77CAF155A36EBF7C, - 0x0A09E02BDB883CA6, 0xFD5D9ADA7E3FB895, 0xC6F5FDD9EEAB83B5, 0x84589BB29F52A92A, - 0x9B2517F13F8E9814, 0x6F752AF6A52E31EC, 0x8E717799E324CE8A, 0x84D90AEF39262D58, - 0x79C27B13FC28944D, 0xE6D6DF6438E0044A, 0x51B603E400D79CA4, 0x6A902B28C588B390, - 0x8D7F8DE9E6CB1D83, 0xCF1A4DC11CA7F044, 0xEF02E43C366786F1, 0x89915BCDBCFBE30F, - 0x5928B306F1A9CC7F, 0xA8B59092996851C5, 0x22050A20427E8B25, 0x6E6D64018941E7EE, - 0x9798C898B81AE846, 0x80EF218CDC30124A, 0xFCE45E60D55B0284, 0x4010E735D3147C35, - 0xEB647D999FD8DC7E, 0xD3544DCAB14FE907, 0xB588B27D8438700C, 0xA49EBFC43E057A4C -}; - -const uint64_t t1ha_refval_32be[81] = { 0, - 0xC92229C10FAEA50E, 0x0FE212630DD87E0F, 0x968F016D60417BB3, 0xE6B12B2C889913AB, - 0xAA3787887A9DA368, 0x06EE7202D53CEF39, 0x6149AFB2C296664B, 0x86C893210F9A5805, - 0x8379E5DA988AA04C, 0x24763AA7CE411A60, 0x9CF9C64B395A4CF8, 0xFFC192C338DDE904, - 0x094575BAB319E5F5, 0xBBBACFE7728C6511, 0x36B8C3CEBE4EF409, 0xAA0BA8A3397BA4D0, - 0xF9F85CF7124EE653, 0x3ADF4F7DF2A887AE, 0xAA2A0F5964AA9A7A, 0xF18B563F42D36EB8, - 0x034366CEF8334F5C, 0xAE2E85180E330E5F, 0xA5CE9FBFDF5C65B8, 0x5E509F25A9CA9B0B, - 0xE30D1358C2013BD2, 0xBB3A04D5EB8111FE, 0xB04234E82A15A28D, 0x87426A56D0EA0E2F, - 0x095086668E07F9F8, 0xF4CD3A43B6A6AEA5, 0x73F9B9B674D472A6, 0x558344229A1E4DCF, - 0x0AD4C95B2279181A, 0x5E3D19D80821CA6B, 0x652492D25BEBA258, 0xEFA84B02EAB849B1, - 0x81AD2D253059AC2C, 0x1400CCB0DFB2F457, 0x5688DC72A839860E, 0x67CC130E0FD1B0A7, - 0x0A851E3A94E21E69, 0x2EA0000B6A073907, 0xAE9776FF9BF1D02E, 0xC0A96B66B160631C, - 0xA93341DE4ED7C8F0, 0x6FBADD8F5B85E141, 0xB7D295F1C21E0CBA, 0x6D6114591B8E434F, - 0xF5B6939B63D97BE7, 0x3C80D5053F0E5DB4, 0xAC520ACC6B73F62D, 0xD1051F5841CF3966, - 0x62245AEA644AE760, 0x0CD56BE15497C62D, 0x5BB93435C4988FB6, 0x5FADB88EB18DB512, - 0xC897CAE2242475CC, 0xF1A094EF846DC9BB, 0x2B1D8B24924F79B6, 0xC6DF0C0E8456EB53, - 0xE6A40128303A9B9C, 0x64D37AF5EFFA7BD9, 0x90FEB70A5AE2A598, 0xEC3BA5F126D9FF4B, - 0x3121C8EC3AC51B29, 0x3B41C4D422166EC1, 0xB4878DDCBF48ED76, 0x5CB850D77CB762E4, - 0x9A27A43CC1DD171F, 0x2FDFFC6F99CB424A, 0xF54A57E09FDEA7BB, 0x5F78E5EE2CAB7039, - 0xB8BA95883DB31CBA, 0x131C61EB84AF86C3, 0x84B1F64E9C613DA7, 0xE94C1888C0C37C02, - 0xEA08F8BFB2039CDE, 0xCCC6D04D243EC753, 0x8977D105298B0629, 0x7AAA976494A5905E -}; - -#if T1HA0_AESNI_AVAILABLE -const uint64_t t1ha_refval_ia32aes_a[81] = { 0, - 0x772C7311BE32FF42, 0xB231AC660E5B23B5, 0x71F6DF5DA3B4F532, 0x555859635365F660, - 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A, - 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D, - 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D, - 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148, - 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C, - 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0, - 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9, - 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0xBB34C6A4396695D2, 0x7F46E1981C3256AD, - 0x4B25A9B217A6C5B4, 0x7A0A6BCDD2321DA9, 0x0A1F55E690A7B44E, 0x8F451A91D7F05244, - 0x624D5D3C9B9800A7, 0x09DDC2B6409DDC25, 0x3E155765865622B6, 0x96519FAC9511B381, - 0x512E58482FE4FBF0, 0x1AB260EA7D54AE1C, 0x67976F12CC28BBBD, 0x0607B5B2E6250156, - 0x7E700BEA717AD36E, 0x06A058D9D61CABB3, 0x57DA5324A824972F, 0x1193BA74DBEBF7E7, - 0xC18DC3140E7002D4, 0x9F7CCC11DFA0EF17, 0xC487D6C20666A13A, 0xB67190E4B50EF0C8, - 0xA53DAA608DF0B9A5, 0x7E13101DE87F9ED3, 0x7F8955AE2F05088B, 0x2DF7E5A097AD383F, - 0xF027683A21EA14B5, 0x9BB8AEC3E3360942, 0x92BE39B54967E7FE, 0x978C6D332E7AFD27, - 0xED512FE96A4FAE81, 0x9E1099B8140D7BA3, 0xDFD5A5BE1E6FE9A6, 0x1D82600E23B66DD4, - 0x3FA3C3B7EE7B52CE, 0xEE84F7D2A655EF4C, 0x2A4361EC769E3BEB, 0x22E4B38916636702, - 0x0063096F5D39A115, 0x6C51B24DAAFA5434, 0xBAFB1DB1B411E344, 0xFF529F161AE0C4B0, - 0x1290EAE3AC0A686F, 0xA7B0D4585447D1BE, 0xAED3D18CB6CCAD53, 0xFC73D46F8B41BEC6 -}; - -const uint64_t t1ha_refval_ia32aes_b[81] = { 0, - 0x772C7311BE32FF42, 0x4398F62A8CB6F72A, 0x71F6DF5DA3B4F532, 0x555859635365F660, - 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A, - 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D, - 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D, - 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148, - 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C, - 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0, - 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9, - 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0xE810F88E85CEA11A, 0x4814F8F3B83E4394, - 0x9CABA22D10A2F690, 0x0D10032511F58111, 0xE9A36EF5EEA3CD58, 0xC79242DE194D9D7C, - 0xC3871AA0435EE5C8, 0x52890BED43CCF4CD, 0x07A1D0861ACCD373, 0x227B816FF0FEE9ED, - 0x59FFBF73AACFC0C4, 0x09AB564F2BEDAD0C, 0xC05F744F2EE38318, 0x7B50B621D547C661, - 0x0C1F71CB4E68E5D1, 0x0E33A47881D4DBAA, 0xF5C3BF198E9A7C2E, 0x16328FD8C0F68A91, - 0xA3E399C9AB3E9A59, 0x163AE71CBCBB18B8, 0x18F17E4A8C79F7AB, 0x9250E2EA37014B45, - 0x7BBBB111D60B03E4, 0x3DAA4A3071A0BD88, 0xA28828D790A2D6DC, 0xBC70FC88F64BE3F1, - 0xA3E48008BA4333C7, 0x739E435ACAFC79F7, 0x42BBB360BE007CC6, 0x4FFB6FD2AF74EC92, - 0x2A799A2994673146, 0xBE0A045B69D48E9F, 0x549432F54FC6A278, 0x371D3C60369FC702, - 0xDB4557D415B08CA7, 0xE8692F0A83850B37, 0x022E46AEB36E9AAB, 0x117AC9B814E4652D, - 0xA361041267AE9048, 0x277CB51C961C3DDA, 0xAFFC96F377CB8A8D, 0x83CC79FA01DD1BA7, - 0xA494842ACF4B802C, 0xFC6D9CDDE2C34A3F, 0x4ED6863CE455F7A7, 0x630914D0DB7AAE98 -}; -#endif /* T1HA0_AESNI_AVAILABLE */ - -/* *INDENT-ON* */ -/* clang-format on */ - -__cold int t1ha_selfcheck__t1ha0_32le(void) { - return t1ha_selfcheck(t1ha0_32le, t1ha_refval_32le); -} - -__cold int t1ha_selfcheck__t1ha0_32be(void) { - return t1ha_selfcheck(t1ha0_32be, t1ha_refval_32be); -} - -#if T1HA0_AESNI_AVAILABLE -__cold int t1ha_selfcheck__t1ha0_ia32aes_noavx(void) { - return t1ha_selfcheck(t1ha0_ia32aes_noavx, t1ha_refval_ia32aes_a); -} - -__cold int t1ha_selfcheck__t1ha0_ia32aes_avx(void) { - return t1ha_selfcheck(t1ha0_ia32aes_avx, t1ha_refval_ia32aes_a); -} - -#ifndef __e2k__ -__cold int t1ha_selfcheck__t1ha0_ia32aes_avx2(void) { - return t1ha_selfcheck(t1ha0_ia32aes_avx2, t1ha_refval_ia32aes_b); -} -#endif /* ! __e2k__ */ -#endif /* if T1HA0_AESNI_AVAILABLE */ - -__cold int t1ha_selfcheck__t1ha0(void) { - int rc = t1ha_selfcheck__t1ha0_32le() | t1ha_selfcheck__t1ha0_32be(); - -#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ - (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) -#if defined(T1HA1_DISABLED) - rc |= t1ha_selfcheck__t1ha2(); -#else - rc |= t1ha_selfcheck__t1ha1(); -#endif /* T1HA1_DISABLED */ -#endif /* 32/64 */ - -#if T1HA0_AESNI_AVAILABLE -#ifdef __e2k__ - rc |= t1ha_selfcheck__t1ha0_ia32aes_noavx(); - rc |= t1ha_selfcheck__t1ha0_ia32aes_avx(); -#else - uint64_t features = t1ha_ia32cpu_features(); - if (t1ha_ia32_AESNI_avail(features)) { - rc |= t1ha_selfcheck__t1ha0_ia32aes_noavx(); - if (t1ha_ia32_AVX_avail(features)) { - rc |= t1ha_selfcheck__t1ha0_ia32aes_avx(); - if (t1ha_ia32_AVX2_avail(features)) - rc |= t1ha_selfcheck__t1ha0_ia32aes_avx2(); - } - } -#endif /* __e2k__ */ -#endif /* T1HA0_AESNI_AVAILABLE */ - - return rc; -} - -#endif /* T1HA0_DISABLED */ + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#ifndef T1HA0_DISABLED +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +/* *INDENT-OFF* */ +/* clang-format off */ + +const uint64_t t1ha_refval_32le[81] = { 0, + 0xC92229C10FAEA50E, 0x3DF1354B0DFDC443, 0x968F016D60417BB3, 0x85AAFB50C6DA770F, + 0x66CCE3BB6842C7D6, 0xDDAA39C11537C226, 0x35958D281F0C9C8C, 0x8C5D64B091DE608E, + 0x4094DF680D39786B, 0x1014F4AA2A2EDF4D, 0x39D21891615AA310, 0x7EF51F67C398C7C4, + 0x06163990DDBF319D, 0xE229CAA00C8D6F3F, 0xD2240B4B0D54E0F5, 0xEA2E7E905DDEAF94, + 0x8D4F8A887183A5CE, 0x44337F9A63C5820C, 0x94938D1E86A9B797, 0x96E9CABA5CA210CC, + 0x6EFBB9CC9E8F7708, 0x3D12EA0282FB8BBC, 0x5DA781EE205A2C48, 0xFA4A51A12677FE12, + 0x81D5F04E20660B28, 0x57258D043BCD3841, 0x5C9BEB62059C1ED2, 0x57A02162F9034B33, + 0xBA2A13E457CE19B8, 0xE593263BF9451F3A, 0x0BC1175539606BC5, 0xA3E2929E9C5F289F, + 0x86BDBD06835E35F7, 0xA180950AB48BAADC, 0x7812C994D9924028, 0x308366011415F46B, + 0x77FE9A9991C5F959, 0x925C340B70B0B1E3, 0xCD9C5BA4C41E2E10, 0x7CC4E7758B94CD93, + 0x898B235962EA4625, 0xD7E3E5BF22893286, 0x396F4CDD33056C64, 0x740AB2E32F17CD9F, + 0x60D12FF9CD15B321, 0xBEE3A6C9903A81D8, 0xB47040913B33C35E, 0x19EE8C2ACC013CFF, + 0x5DEC94C5783B55C4, 0x78DC122D562C5F1D, 0x6520F008DA1C181E, 0x77CAF155A36EBF7C, + 0x0A09E02BDB883CA6, 0xFD5D9ADA7E3FB895, 0xC6F5FDD9EEAB83B5, 0x84589BB29F52A92A, + 0x9B2517F13F8E9814, 0x6F752AF6A52E31EC, 0x8E717799E324CE8A, 0x84D90AEF39262D58, + 0x79C27B13FC28944D, 0xE6D6DF6438E0044A, 0x51B603E400D79CA4, 0x6A902B28C588B390, + 0x8D7F8DE9E6CB1D83, 0xCF1A4DC11CA7F044, 0xEF02E43C366786F1, 0x89915BCDBCFBE30F, + 0x5928B306F1A9CC7F, 0xA8B59092996851C5, 0x22050A20427E8B25, 0x6E6D64018941E7EE, + 0x9798C898B81AE846, 0x80EF218CDC30124A, 0xFCE45E60D55B0284, 0x4010E735D3147C35, + 0xEB647D999FD8DC7E, 0xD3544DCAB14FE907, 0xB588B27D8438700C, 0xA49EBFC43E057A4C +}; + +const uint64_t t1ha_refval_32be[81] = { 0, + 0xC92229C10FAEA50E, 0x0FE212630DD87E0F, 0x968F016D60417BB3, 0xE6B12B2C889913AB, + 0xAA3787887A9DA368, 0x06EE7202D53CEF39, 0x6149AFB2C296664B, 0x86C893210F9A5805, + 0x8379E5DA988AA04C, 0x24763AA7CE411A60, 0x9CF9C64B395A4CF8, 0xFFC192C338DDE904, + 0x094575BAB319E5F5, 0xBBBACFE7728C6511, 0x36B8C3CEBE4EF409, 0xAA0BA8A3397BA4D0, + 0xF9F85CF7124EE653, 0x3ADF4F7DF2A887AE, 0xAA2A0F5964AA9A7A, 0xF18B563F42D36EB8, + 0x034366CEF8334F5C, 0xAE2E85180E330E5F, 0xA5CE9FBFDF5C65B8, 0x5E509F25A9CA9B0B, + 0xE30D1358C2013BD2, 0xBB3A04D5EB8111FE, 0xB04234E82A15A28D, 0x87426A56D0EA0E2F, + 0x095086668E07F9F8, 0xF4CD3A43B6A6AEA5, 0x73F9B9B674D472A6, 0x558344229A1E4DCF, + 0x0AD4C95B2279181A, 0x5E3D19D80821CA6B, 0x652492D25BEBA258, 0xEFA84B02EAB849B1, + 0x81AD2D253059AC2C, 0x1400CCB0DFB2F457, 0x5688DC72A839860E, 0x67CC130E0FD1B0A7, + 0x0A851E3A94E21E69, 0x2EA0000B6A073907, 0xAE9776FF9BF1D02E, 0xC0A96B66B160631C, + 0xA93341DE4ED7C8F0, 0x6FBADD8F5B85E141, 0xB7D295F1C21E0CBA, 0x6D6114591B8E434F, + 0xF5B6939B63D97BE7, 0x3C80D5053F0E5DB4, 0xAC520ACC6B73F62D, 0xD1051F5841CF3966, + 0x62245AEA644AE760, 0x0CD56BE15497C62D, 0x5BB93435C4988FB6, 0x5FADB88EB18DB512, + 0xC897CAE2242475CC, 0xF1A094EF846DC9BB, 0x2B1D8B24924F79B6, 0xC6DF0C0E8456EB53, + 0xE6A40128303A9B9C, 0x64D37AF5EFFA7BD9, 0x90FEB70A5AE2A598, 0xEC3BA5F126D9FF4B, + 0x3121C8EC3AC51B29, 0x3B41C4D422166EC1, 0xB4878DDCBF48ED76, 0x5CB850D77CB762E4, + 0x9A27A43CC1DD171F, 0x2FDFFC6F99CB424A, 0xF54A57E09FDEA7BB, 0x5F78E5EE2CAB7039, + 0xB8BA95883DB31CBA, 0x131C61EB84AF86C3, 0x84B1F64E9C613DA7, 0xE94C1888C0C37C02, + 0xEA08F8BFB2039CDE, 0xCCC6D04D243EC753, 0x8977D105298B0629, 0x7AAA976494A5905E +}; + +#if T1HA0_AESNI_AVAILABLE +const uint64_t t1ha_refval_ia32aes_a[81] = { 0, + 0x772C7311BE32FF42, 0xB231AC660E5B23B5, 0x71F6DF5DA3B4F532, 0x555859635365F660, + 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A, + 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D, + 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D, + 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148, + 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C, + 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0, + 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9, + 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0xBB34C6A4396695D2, 0x7F46E1981C3256AD, + 0x4B25A9B217A6C5B4, 0x7A0A6BCDD2321DA9, 0x0A1F55E690A7B44E, 0x8F451A91D7F05244, + 0x624D5D3C9B9800A7, 0x09DDC2B6409DDC25, 0x3E155765865622B6, 0x96519FAC9511B381, + 0x512E58482FE4FBF0, 0x1AB260EA7D54AE1C, 0x67976F12CC28BBBD, 0x0607B5B2E6250156, + 0x7E700BEA717AD36E, 0x06A058D9D61CABB3, 0x57DA5324A824972F, 0x1193BA74DBEBF7E7, + 0xC18DC3140E7002D4, 0x9F7CCC11DFA0EF17, 0xC487D6C20666A13A, 0xB67190E4B50EF0C8, + 0xA53DAA608DF0B9A5, 0x7E13101DE87F9ED3, 0x7F8955AE2F05088B, 0x2DF7E5A097AD383F, + 0xF027683A21EA14B5, 0x9BB8AEC3E3360942, 0x92BE39B54967E7FE, 0x978C6D332E7AFD27, + 0xED512FE96A4FAE81, 0x9E1099B8140D7BA3, 0xDFD5A5BE1E6FE9A6, 0x1D82600E23B66DD4, + 0x3FA3C3B7EE7B52CE, 0xEE84F7D2A655EF4C, 0x2A4361EC769E3BEB, 0x22E4B38916636702, + 0x0063096F5D39A115, 0x6C51B24DAAFA5434, 0xBAFB1DB1B411E344, 0xFF529F161AE0C4B0, + 0x1290EAE3AC0A686F, 0xA7B0D4585447D1BE, 0xAED3D18CB6CCAD53, 0xFC73D46F8B41BEC6 +}; + +const uint64_t t1ha_refval_ia32aes_b[81] = { 0, + 0x772C7311BE32FF42, 0x4398F62A8CB6F72A, 0x71F6DF5DA3B4F532, 0x555859635365F660, + 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A, + 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D, + 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D, + 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148, + 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C, + 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0, + 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9, + 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0xE810F88E85CEA11A, 0x4814F8F3B83E4394, + 0x9CABA22D10A2F690, 0x0D10032511F58111, 0xE9A36EF5EEA3CD58, 0xC79242DE194D9D7C, + 0xC3871AA0435EE5C8, 0x52890BED43CCF4CD, 0x07A1D0861ACCD373, 0x227B816FF0FEE9ED, + 0x59FFBF73AACFC0C4, 0x09AB564F2BEDAD0C, 0xC05F744F2EE38318, 0x7B50B621D547C661, + 0x0C1F71CB4E68E5D1, 0x0E33A47881D4DBAA, 0xF5C3BF198E9A7C2E, 0x16328FD8C0F68A91, + 0xA3E399C9AB3E9A59, 0x163AE71CBCBB18B8, 0x18F17E4A8C79F7AB, 0x9250E2EA37014B45, + 0x7BBBB111D60B03E4, 0x3DAA4A3071A0BD88, 0xA28828D790A2D6DC, 0xBC70FC88F64BE3F1, + 0xA3E48008BA4333C7, 0x739E435ACAFC79F7, 0x42BBB360BE007CC6, 0x4FFB6FD2AF74EC92, + 0x2A799A2994673146, 0xBE0A045B69D48E9F, 0x549432F54FC6A278, 0x371D3C60369FC702, + 0xDB4557D415B08CA7, 0xE8692F0A83850B37, 0x022E46AEB36E9AAB, 0x117AC9B814E4652D, + 0xA361041267AE9048, 0x277CB51C961C3DDA, 0xAFFC96F377CB8A8D, 0x83CC79FA01DD1BA7, + 0xA494842ACF4B802C, 0xFC6D9CDDE2C34A3F, 0x4ED6863CE455F7A7, 0x630914D0DB7AAE98 +}; +#endif /* T1HA0_AESNI_AVAILABLE */ + +/* *INDENT-ON* */ +/* clang-format on */ + +__cold int t1ha_selfcheck__t1ha0_32le(void) { + return t1ha_selfcheck(t1ha0_32le, t1ha_refval_32le); +} + +__cold int t1ha_selfcheck__t1ha0_32be(void) { + return t1ha_selfcheck(t1ha0_32be, t1ha_refval_32be); +} + +#if T1HA0_AESNI_AVAILABLE +__cold int t1ha_selfcheck__t1ha0_ia32aes_noavx(void) { + return t1ha_selfcheck(t1ha0_ia32aes_noavx, t1ha_refval_ia32aes_a); +} + +__cold int t1ha_selfcheck__t1ha0_ia32aes_avx(void) { + return t1ha_selfcheck(t1ha0_ia32aes_avx, t1ha_refval_ia32aes_a); +} + +#ifndef __e2k__ +__cold int t1ha_selfcheck__t1ha0_ia32aes_avx2(void) { + return t1ha_selfcheck(t1ha0_ia32aes_avx2, t1ha_refval_ia32aes_b); +} +#endif /* ! __e2k__ */ +#endif /* if T1HA0_AESNI_AVAILABLE */ + +__cold int t1ha_selfcheck__t1ha0(void) { + int rc = t1ha_selfcheck__t1ha0_32le() | t1ha_selfcheck__t1ha0_32be(); + +#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ + (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) +#if defined(T1HA1_DISABLED) + rc |= t1ha_selfcheck__t1ha2(); +#else + rc |= t1ha_selfcheck__t1ha1(); +#endif /* T1HA1_DISABLED */ +#endif /* 32/64 */ + +#if T1HA0_AESNI_AVAILABLE +#ifdef __e2k__ + rc |= t1ha_selfcheck__t1ha0_ia32aes_noavx(); + rc |= t1ha_selfcheck__t1ha0_ia32aes_avx(); +#else + uint64_t features = t1ha_ia32cpu_features(); + if (t1ha_ia32_AESNI_avail(features)) { + rc |= t1ha_selfcheck__t1ha0_ia32aes_noavx(); + if (t1ha_ia32_AVX_avail(features)) { + rc |= t1ha_selfcheck__t1ha0_ia32aes_avx(); + if (t1ha_ia32_AVX2_avail(features)) + rc |= t1ha_selfcheck__t1ha0_ia32aes_avx2(); + } + } +#endif /* __e2k__ */ +#endif /* T1HA0_AESNI_AVAILABLE */ + + return rc; +} + +#endif /* T1HA0_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha1.c b/contrib/libs/t1ha/src/t1ha1.c index 11275d9f0a..da6899c221 100644 --- a/contrib/libs/t1ha/src/t1ha1.c +++ b/contrib/libs/t1ha/src/t1ha1.c @@ -1,161 +1,161 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#ifndef T1HA1_DISABLED -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -/* xor-mul-xor mixer */ -static __inline uint64_t mix64(uint64_t v, uint64_t p) { - v *= p; - return v ^ rot64(v, 41); -} - -static __inline uint64_t final_weak_avalanche(uint64_t a, uint64_t b) { - /* LY: for performance reason on a some not high-end CPUs - * I replaced the second mux64() operation by mix64(). - * Unfortunately this approach fails the "strict avalanche criteria", - * see test results at https://github.com/demerphq/smhasher. */ - return mux64(rot64(a + b, 17), prime_4) + mix64(a ^ b, prime_0); -} - -/* TODO: C++ template in the next version */ -#define T1HA1_BODY(ENDIANNES, ALIGNESS) \ - const uint64_t *v = (const uint64_t *)data; \ - if (unlikely(len > 32)) { \ - uint64_t c = rot64(len, 17) + seed; \ - uint64_t d = len ^ rot64(seed, 17); \ - const uint64_t *detent = \ - (const uint64_t *)((const uint8_t *)data + len - 31); \ - do { \ - const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \ - const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \ - const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \ - const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \ - v += 4; \ - prefetch(v); \ - \ - const uint64_t d02 = w0 ^ rot64(w2 + d, 17); \ - const uint64_t c13 = w1 ^ rot64(w3 + c, 17); \ - d -= b ^ rot64(w1, 31); \ - c += a ^ rot64(w0, 41); \ - b ^= prime_0 * (c13 + w2); \ - a ^= prime_1 * (d02 + w3); \ - } while (likely(v < detent)); \ - \ - a ^= prime_6 * (rot64(c, 17) + d); \ - b ^= prime_5 * (c + rot64(d, 17)); \ - len &= 31; \ - } \ - \ - switch (len) { \ - default: \ - b += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_4); \ - /* fall through */ \ - case 24: \ - case 23: \ - case 22: \ - case 21: \ - case 20: \ - case 19: \ - case 18: \ - case 17: \ - a += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_3); \ - /* fall through */ \ - case 16: \ - case 15: \ - case 14: \ - case 13: \ - case 12: \ - case 11: \ - case 10: \ - case 9: \ - b += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_2); \ - /* fall through */ \ - case 8: \ - case 7: \ - case 6: \ - case 5: \ - case 4: \ - case 3: \ - case 2: \ - case 1: \ - a += mux64(tail64_##ENDIANNES##_##ALIGNESS(v, len), prime_1); \ - /* fall through */ \ - case 0: \ - return final_weak_avalanche(a, b); \ - } - -uint64_t t1ha1_le(const void *data, size_t len, uint64_t seed) { - uint64_t a = seed; - uint64_t b = len; - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - T1HA1_BODY(le, unaligned); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; - if (misaligned) { - T1HA1_BODY(le, unaligned); - } else { - T1HA1_BODY(le, aligned); - } -#endif -} - -uint64_t t1ha1_be(const void *data, size_t len, uint64_t seed) { - uint64_t a = seed; - uint64_t b = len; - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - T1HA1_BODY(be, unaligned); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; - if (misaligned) { - T1HA1_BODY(be, unaligned); - } else { - T1HA1_BODY(be, aligned); - } -#endif -} - -#endif /* T1HA1_DISABLED */ + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#ifndef T1HA1_DISABLED +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +/* xor-mul-xor mixer */ +static __inline uint64_t mix64(uint64_t v, uint64_t p) { + v *= p; + return v ^ rot64(v, 41); +} + +static __inline uint64_t final_weak_avalanche(uint64_t a, uint64_t b) { + /* LY: for performance reason on a some not high-end CPUs + * I replaced the second mux64() operation by mix64(). + * Unfortunately this approach fails the "strict avalanche criteria", + * see test results at https://github.com/demerphq/smhasher. */ + return mux64(rot64(a + b, 17), prime_4) + mix64(a ^ b, prime_0); +} + +/* TODO: C++ template in the next version */ +#define T1HA1_BODY(ENDIANNES, ALIGNESS) \ + const uint64_t *v = (const uint64_t *)data; \ + if (unlikely(len > 32)) { \ + uint64_t c = rot64(len, 17) + seed; \ + uint64_t d = len ^ rot64(seed, 17); \ + const uint64_t *detent = \ + (const uint64_t *)((const uint8_t *)data + len - 31); \ + do { \ + const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \ + const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \ + const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \ + const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \ + v += 4; \ + prefetch(v); \ + \ + const uint64_t d02 = w0 ^ rot64(w2 + d, 17); \ + const uint64_t c13 = w1 ^ rot64(w3 + c, 17); \ + d -= b ^ rot64(w1, 31); \ + c += a ^ rot64(w0, 41); \ + b ^= prime_0 * (c13 + w2); \ + a ^= prime_1 * (d02 + w3); \ + } while (likely(v < detent)); \ + \ + a ^= prime_6 * (rot64(c, 17) + d); \ + b ^= prime_5 * (c + rot64(d, 17)); \ + len &= 31; \ + } \ + \ + switch (len) { \ + default: \ + b += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_4); \ + /* fall through */ \ + case 24: \ + case 23: \ + case 22: \ + case 21: \ + case 20: \ + case 19: \ + case 18: \ + case 17: \ + a += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_3); \ + /* fall through */ \ + case 16: \ + case 15: \ + case 14: \ + case 13: \ + case 12: \ + case 11: \ + case 10: \ + case 9: \ + b += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_2); \ + /* fall through */ \ + case 8: \ + case 7: \ + case 6: \ + case 5: \ + case 4: \ + case 3: \ + case 2: \ + case 1: \ + a += mux64(tail64_##ENDIANNES##_##ALIGNESS(v, len), prime_1); \ + /* fall through */ \ + case 0: \ + return final_weak_avalanche(a, b); \ + } + +uint64_t t1ha1_le(const void *data, size_t len, uint64_t seed) { + uint64_t a = seed; + uint64_t b = len; + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + T1HA1_BODY(le, unaligned); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; + if (misaligned) { + T1HA1_BODY(le, unaligned); + } else { + T1HA1_BODY(le, aligned); + } +#endif +} + +uint64_t t1ha1_be(const void *data, size_t len, uint64_t seed) { + uint64_t a = seed; + uint64_t b = len; + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + T1HA1_BODY(be, unaligned); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; + if (misaligned) { + T1HA1_BODY(be, unaligned); + } else { + T1HA1_BODY(be, aligned); + } +#endif +} + +#endif /* T1HA1_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha1_selfcheck.c b/contrib/libs/t1ha/src/t1ha1_selfcheck.c index 9f9a19ae26..5cf49632ed 100644 --- a/contrib/libs/t1ha/src/t1ha1_selfcheck.c +++ b/contrib/libs/t1ha/src/t1ha1_selfcheck.c @@ -1,112 +1,112 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#ifndef T1HA1_DISABLED -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -/* *INDENT-OFF* */ -/* clang-format off */ - -const uint64_t t1ha_refval_64le[81] = { 0, - 0x6A580668D6048674, 0xA2FE904AFF0D0879, 0xE3AB9C06FAF4D023, 0x6AF1C60874C95442, - 0xB3557E561A6C5D82, 0x0AE73C696F3D37C0, 0x5EF25F7062324941, 0x9B784F3B4CE6AF33, - 0x6993BB206A74F070, 0xF1E95DF109076C4C, 0x4E1EB70C58E48540, 0x5FDD7649D8EC44E4, - 0x559122C706343421, 0x380133D58665E93D, 0x9CE74296C8C55AE4, 0x3556F9A5757AB6D0, - 0xF62751F7F25C469E, 0x851EEC67F6516D94, 0xED463EE3848A8695, 0xDC8791FEFF8ED3AC, - 0x2569C744E1A282CF, 0xF90EB7C1D70A80B9, 0x68DFA6A1B8050A4C, 0x94CCA5E8210D2134, - 0xF5CC0BEABC259F52, 0x40DBC1F51618FDA7, 0x0807945BF0FB52C6, 0xE5EF7E09DE70848D, - 0x63E1DF35FEBE994A, 0x2025E73769720D5A, 0xAD6120B2B8A152E1, 0x2A71D9F13959F2B7, - 0x8A20849A27C32548, 0x0BCBC9FE3B57884E, 0x0E028D255667AEAD, 0xBE66DAD3043AB694, - 0xB00E4C1238F9E2D4, 0x5C54BDE5AE280E82, 0x0E22B86754BC3BC4, 0x016707EBF858B84D, - 0x990015FBC9E095EE, 0x8B9AF0A3E71F042F, 0x6AA56E88BD380564, 0xAACE57113E681A0F, - 0x19F81514AFA9A22D, 0x80DABA3D62BEAC79, 0x715210412CABBF46, 0xD8FA0B9E9D6AA93F, - 0x6C2FC5A4109FD3A2, 0x5B3E60EEB51DDCD8, 0x0A7C717017756FE7, 0xA73773805CA31934, - 0x4DBD6BB7A31E85FD, 0x24F619D3D5BC2DB4, 0x3E4AF35A1678D636, 0x84A1A8DF8D609239, - 0x359C862CD3BE4FCD, 0xCF3A39F5C27DC125, 0xC0FF62F8FD5F4C77, 0x5E9F2493DDAA166C, - 0x17424152BE1CA266, 0xA78AFA5AB4BBE0CD, 0x7BFB2E2CEF118346, 0x647C3E0FF3E3D241, - 0x0352E4055C13242E, 0x6F42FC70EB660E38, 0x0BEBAD4FABF523BA, 0x9269F4214414D61D, - 0x1CA8760277E6006C, 0x7BAD25A859D87B5D, 0xAD645ADCF7414F1D, 0xB07F517E88D7AFB3, - 0xB321C06FB5FFAB5C, 0xD50F162A1EFDD844, 0x1DFD3D1924FBE319, 0xDFAEAB2F09EF7E78, - 0xA7603B5AF07A0B1E, 0x41CD044C0E5A4EE3, 0xF64D2F86E813BF33, 0xFF9FDB99305EB06A -}; - -const uint64_t t1ha_refval_64be[81] = { 0, - 0x6A580668D6048674, 0xDECC975A0E3B8177, 0xE3AB9C06FAF4D023, 0xE401FA8F1B6AF969, - 0x67DB1DAE56FB94E3, 0x1106266A09B7A073, 0x550339B1EF2C7BBB, 0x290A2BAF590045BB, - 0xA182C1258C09F54A, 0x137D53C34BE7143A, 0xF6D2B69C6F42BEDC, 0x39643EAF2CA2E4B4, - 0x22A81F139A2C9559, 0x5B3D6AEF0AF33807, 0x56E3F80A68643C08, 0x9E423BE502378780, - 0xCDB0986F9A5B2FD5, 0xD5B3C84E7933293F, 0xE5FB8C90399E9742, 0x5D393C1F77B2CF3D, - 0xC8C82F5B2FF09266, 0xACA0230CA6F7B593, 0xCB5805E2960D1655, 0x7E2AD5B704D77C95, - 0xC5E903CDB8B9EB5D, 0x4CC7D0D21CC03511, 0x8385DF382CFB3E93, 0xF17699D0564D348A, - 0xF77EE7F8274A4C8D, 0xB9D8CEE48903BABE, 0xFE0EBD2A82B9CFE9, 0xB49FB6397270F565, - 0x173735C8C342108E, 0xA37C7FBBEEC0A2EA, 0xC13F66F462BB0B6E, 0x0C04F3C2B551467E, - 0x76A9CB156810C96E, 0x2038850919B0B151, 0xCEA19F2B6EED647B, 0x6746656D2FA109A4, - 0xF05137F221007F37, 0x892FA9E13A3B4948, 0x4D57B70D37548A32, 0x1A7CFB3D566580E6, - 0x7CB30272A45E3FAC, 0x137CCFFD9D51423F, 0xB87D96F3B82DF266, 0x33349AEE7472ED37, - 0x5CC0D3C99555BC07, 0x4A8F4FA196D964EF, 0xE82A0D64F281FBFA, 0x38A1BAC2C36823E1, - 0x77D197C239FD737E, 0xFB07746B4E07DF26, 0xC8A2198E967672BD, 0x5F1A146D143FA05A, - 0x26B877A1201AB7AC, 0x74E5B145214723F8, 0xE9CE10E3C70254BC, 0x299393A0C05B79E8, - 0xFD2D2B9822A5E7E2, 0x85424FEA50C8E50A, 0xE6839E714B1FFFE5, 0x27971CCB46F9112A, - 0xC98695A2E0715AA9, 0x338E1CBB4F858226, 0xFC6B5C5CF7A8D806, 0x8973CAADDE8DA50C, - 0x9C6D47AE32EBAE72, 0x1EBF1F9F21D26D78, 0x80A9704B8E153859, 0x6AFD20A939F141FB, - 0xC35F6C2B3B553EEF, 0x59529E8B0DC94C1A, 0x1569DF036EBC4FA1, 0xDA32B88593C118F9, - 0xF01E4155FF5A5660, 0x765A2522DCE2B185, 0xCEE95554128073EF, 0x60F072A5CA51DE2F -}; - -/* *INDENT-ON* */ -/* clang-format on */ - -__cold int t1ha_selfcheck__t1ha1_le(void) { - return t1ha_selfcheck(t1ha1_le, t1ha_refval_64le); -} - -__cold int t1ha_selfcheck__t1ha1_be(void) { - return t1ha_selfcheck(t1ha1_be, t1ha_refval_64be); -} - -__cold int t1ha_selfcheck__t1ha1(void) { - return t1ha_selfcheck__t1ha1_le() | t1ha_selfcheck__t1ha1_be(); -} - -#endif /* T1HA1_DISABLED */ + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#ifndef T1HA1_DISABLED +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +/* *INDENT-OFF* */ +/* clang-format off */ + +const uint64_t t1ha_refval_64le[81] = { 0, + 0x6A580668D6048674, 0xA2FE904AFF0D0879, 0xE3AB9C06FAF4D023, 0x6AF1C60874C95442, + 0xB3557E561A6C5D82, 0x0AE73C696F3D37C0, 0x5EF25F7062324941, 0x9B784F3B4CE6AF33, + 0x6993BB206A74F070, 0xF1E95DF109076C4C, 0x4E1EB70C58E48540, 0x5FDD7649D8EC44E4, + 0x559122C706343421, 0x380133D58665E93D, 0x9CE74296C8C55AE4, 0x3556F9A5757AB6D0, + 0xF62751F7F25C469E, 0x851EEC67F6516D94, 0xED463EE3848A8695, 0xDC8791FEFF8ED3AC, + 0x2569C744E1A282CF, 0xF90EB7C1D70A80B9, 0x68DFA6A1B8050A4C, 0x94CCA5E8210D2134, + 0xF5CC0BEABC259F52, 0x40DBC1F51618FDA7, 0x0807945BF0FB52C6, 0xE5EF7E09DE70848D, + 0x63E1DF35FEBE994A, 0x2025E73769720D5A, 0xAD6120B2B8A152E1, 0x2A71D9F13959F2B7, + 0x8A20849A27C32548, 0x0BCBC9FE3B57884E, 0x0E028D255667AEAD, 0xBE66DAD3043AB694, + 0xB00E4C1238F9E2D4, 0x5C54BDE5AE280E82, 0x0E22B86754BC3BC4, 0x016707EBF858B84D, + 0x990015FBC9E095EE, 0x8B9AF0A3E71F042F, 0x6AA56E88BD380564, 0xAACE57113E681A0F, + 0x19F81514AFA9A22D, 0x80DABA3D62BEAC79, 0x715210412CABBF46, 0xD8FA0B9E9D6AA93F, + 0x6C2FC5A4109FD3A2, 0x5B3E60EEB51DDCD8, 0x0A7C717017756FE7, 0xA73773805CA31934, + 0x4DBD6BB7A31E85FD, 0x24F619D3D5BC2DB4, 0x3E4AF35A1678D636, 0x84A1A8DF8D609239, + 0x359C862CD3BE4FCD, 0xCF3A39F5C27DC125, 0xC0FF62F8FD5F4C77, 0x5E9F2493DDAA166C, + 0x17424152BE1CA266, 0xA78AFA5AB4BBE0CD, 0x7BFB2E2CEF118346, 0x647C3E0FF3E3D241, + 0x0352E4055C13242E, 0x6F42FC70EB660E38, 0x0BEBAD4FABF523BA, 0x9269F4214414D61D, + 0x1CA8760277E6006C, 0x7BAD25A859D87B5D, 0xAD645ADCF7414F1D, 0xB07F517E88D7AFB3, + 0xB321C06FB5FFAB5C, 0xD50F162A1EFDD844, 0x1DFD3D1924FBE319, 0xDFAEAB2F09EF7E78, + 0xA7603B5AF07A0B1E, 0x41CD044C0E5A4EE3, 0xF64D2F86E813BF33, 0xFF9FDB99305EB06A +}; + +const uint64_t t1ha_refval_64be[81] = { 0, + 0x6A580668D6048674, 0xDECC975A0E3B8177, 0xE3AB9C06FAF4D023, 0xE401FA8F1B6AF969, + 0x67DB1DAE56FB94E3, 0x1106266A09B7A073, 0x550339B1EF2C7BBB, 0x290A2BAF590045BB, + 0xA182C1258C09F54A, 0x137D53C34BE7143A, 0xF6D2B69C6F42BEDC, 0x39643EAF2CA2E4B4, + 0x22A81F139A2C9559, 0x5B3D6AEF0AF33807, 0x56E3F80A68643C08, 0x9E423BE502378780, + 0xCDB0986F9A5B2FD5, 0xD5B3C84E7933293F, 0xE5FB8C90399E9742, 0x5D393C1F77B2CF3D, + 0xC8C82F5B2FF09266, 0xACA0230CA6F7B593, 0xCB5805E2960D1655, 0x7E2AD5B704D77C95, + 0xC5E903CDB8B9EB5D, 0x4CC7D0D21CC03511, 0x8385DF382CFB3E93, 0xF17699D0564D348A, + 0xF77EE7F8274A4C8D, 0xB9D8CEE48903BABE, 0xFE0EBD2A82B9CFE9, 0xB49FB6397270F565, + 0x173735C8C342108E, 0xA37C7FBBEEC0A2EA, 0xC13F66F462BB0B6E, 0x0C04F3C2B551467E, + 0x76A9CB156810C96E, 0x2038850919B0B151, 0xCEA19F2B6EED647B, 0x6746656D2FA109A4, + 0xF05137F221007F37, 0x892FA9E13A3B4948, 0x4D57B70D37548A32, 0x1A7CFB3D566580E6, + 0x7CB30272A45E3FAC, 0x137CCFFD9D51423F, 0xB87D96F3B82DF266, 0x33349AEE7472ED37, + 0x5CC0D3C99555BC07, 0x4A8F4FA196D964EF, 0xE82A0D64F281FBFA, 0x38A1BAC2C36823E1, + 0x77D197C239FD737E, 0xFB07746B4E07DF26, 0xC8A2198E967672BD, 0x5F1A146D143FA05A, + 0x26B877A1201AB7AC, 0x74E5B145214723F8, 0xE9CE10E3C70254BC, 0x299393A0C05B79E8, + 0xFD2D2B9822A5E7E2, 0x85424FEA50C8E50A, 0xE6839E714B1FFFE5, 0x27971CCB46F9112A, + 0xC98695A2E0715AA9, 0x338E1CBB4F858226, 0xFC6B5C5CF7A8D806, 0x8973CAADDE8DA50C, + 0x9C6D47AE32EBAE72, 0x1EBF1F9F21D26D78, 0x80A9704B8E153859, 0x6AFD20A939F141FB, + 0xC35F6C2B3B553EEF, 0x59529E8B0DC94C1A, 0x1569DF036EBC4FA1, 0xDA32B88593C118F9, + 0xF01E4155FF5A5660, 0x765A2522DCE2B185, 0xCEE95554128073EF, 0x60F072A5CA51DE2F +}; + +/* *INDENT-ON* */ +/* clang-format on */ + +__cold int t1ha_selfcheck__t1ha1_le(void) { + return t1ha_selfcheck(t1ha1_le, t1ha_refval_64le); +} + +__cold int t1ha_selfcheck__t1ha1_be(void) { + return t1ha_selfcheck(t1ha1_be, t1ha_refval_64be); +} + +__cold int t1ha_selfcheck__t1ha1(void) { + return t1ha_selfcheck__t1ha1_le() | t1ha_selfcheck__t1ha1_be(); +} + +#endif /* T1HA1_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha2.c b/contrib/libs/t1ha/src/t1ha2.c index 68a1ea4eb9..009f922751 100644 --- a/contrib/libs/t1ha/src/t1ha2.c +++ b/contrib/libs/t1ha/src/t1ha2.c @@ -1,383 +1,383 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#ifndef T1HA2_DISABLED -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -static __always_inline void init_ab(t1ha_state256_t *s, uint64_t x, - uint64_t y) { - s->n.a = x; - s->n.b = y; -} - -static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x, - uint64_t y) { - s->n.c = rot64(y, 23) + ~x; - s->n.d = ~y + rot64(x, 19); -} - -/* TODO: C++ template in the next version */ -#define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v) \ - do { \ - t1ha_state256_t *const s = state; \ - const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \ - const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \ - const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \ - const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \ - \ - const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56); \ - const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19); \ - s->n.d ^= s->n.b + rot64(w1, 38); \ - s->n.c ^= s->n.a + rot64(w0, 57); \ - s->n.b ^= prime_6 * (c13 + w2); \ - s->n.a ^= prime_5 * (d02 + w3); \ - } while (0) - -static __always_inline void squash(t1ha_state256_t *s) { - s->n.a ^= prime_6 * (s->n.c + rot64(s->n.d, 23)); - s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d); -} - -/* TODO: C++ template in the next version */ -#define T1HA2_LOOP(ENDIANNES, ALIGNESS, state, data, len) \ - do { \ - const void *detent = (const uint8_t *)data + len - 31; \ - do { \ - const uint64_t *v = (const uint64_t *)data; \ - data = (const uint64_t *)data + 4; \ - prefetch(data); \ - T1HA2_UPDATE(le, ALIGNESS, state, v); \ - } while (likely(data < detent)); \ - } while (0) - -/* TODO: C++ template in the next version */ -#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, state, data, len) \ - do { \ - t1ha_state256_t *const s = state; \ - const uint64_t *v = (const uint64_t *)data; \ - switch (len) { \ - default: \ - mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_4); \ - /* fall through */ \ - case 24: \ - case 23: \ - case 22: \ - case 21: \ - case 20: \ - case 19: \ - case 18: \ - case 17: \ - mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_3); \ - /* fall through */ \ - case 16: \ - case 15: \ - case 14: \ - case 13: \ - case 12: \ - case 11: \ - case 10: \ - case 9: \ - mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_2); \ - /* fall through */ \ - case 8: \ - case 7: \ - case 6: \ - case 5: \ - case 4: \ - case 3: \ - case 2: \ - case 1: \ - mixup64(&s->n.b, &s->n.a, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ - prime_1); \ - /* fall through */ \ - case 0: \ - return final64(s->n.a, s->n.b); \ - } \ - } while (0) - -/* TODO: C++ template in the next version */ -#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, state, data, len) \ - do { \ - t1ha_state256_t *const s = state; \ - const uint64_t *v = (const uint64_t *)data; \ - switch (len) { \ - default: \ - mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_4); \ - /* fall through */ \ - case 24: \ - case 23: \ - case 22: \ - case 21: \ - case 20: \ - case 19: \ - case 18: \ - case 17: \ - mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_3); \ - /* fall through */ \ - case 16: \ - case 15: \ - case 14: \ - case 13: \ - case 12: \ - case 11: \ - case 10: \ - case 9: \ - mixup64(&s->n.c, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_2); \ - /* fall through */ \ - case 8: \ - case 7: \ - case 6: \ - case 5: \ - case 4: \ - case 3: \ - case 2: \ - case 1: \ - mixup64(&s->n.d, &s->n.c, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ - prime_1); \ - /* fall through */ \ - case 0: \ - return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result); \ - } \ - } while (0) - -static __always_inline uint64_t final128(uint64_t a, uint64_t b, uint64_t c, - uint64_t d, uint64_t *h) { - mixup64(&a, &b, rot64(c, 41) ^ d, prime_0); - mixup64(&b, &c, rot64(d, 23) ^ a, prime_6); - mixup64(&c, &d, rot64(a, 19) ^ b, prime_5); - mixup64(&d, &a, rot64(b, 31) ^ c, prime_4); - *h = c + d; - return a ^ b; -} - -//------------------------------------------------------------------------------ - -uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) { - t1ha_state256_t state; - init_ab(&state, seed, length); - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - if (unlikely(length > 32)) { - init_cd(&state, seed, length); + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#ifndef T1HA2_DISABLED +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +static __always_inline void init_ab(t1ha_state256_t *s, uint64_t x, + uint64_t y) { + s->n.a = x; + s->n.b = y; +} + +static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x, + uint64_t y) { + s->n.c = rot64(y, 23) + ~x; + s->n.d = ~y + rot64(x, 19); +} + +/* TODO: C++ template in the next version */ +#define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v) \ + do { \ + t1ha_state256_t *const s = state; \ + const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \ + const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \ + const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \ + const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \ + \ + const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56); \ + const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19); \ + s->n.d ^= s->n.b + rot64(w1, 38); \ + s->n.c ^= s->n.a + rot64(w0, 57); \ + s->n.b ^= prime_6 * (c13 + w2); \ + s->n.a ^= prime_5 * (d02 + w3); \ + } while (0) + +static __always_inline void squash(t1ha_state256_t *s) { + s->n.a ^= prime_6 * (s->n.c + rot64(s->n.d, 23)); + s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d); +} + +/* TODO: C++ template in the next version */ +#define T1HA2_LOOP(ENDIANNES, ALIGNESS, state, data, len) \ + do { \ + const void *detent = (const uint8_t *)data + len - 31; \ + do { \ + const uint64_t *v = (const uint64_t *)data; \ + data = (const uint64_t *)data + 4; \ + prefetch(data); \ + T1HA2_UPDATE(le, ALIGNESS, state, v); \ + } while (likely(data < detent)); \ + } while (0) + +/* TODO: C++ template in the next version */ +#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, state, data, len) \ + do { \ + t1ha_state256_t *const s = state; \ + const uint64_t *v = (const uint64_t *)data; \ + switch (len) { \ + default: \ + mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_4); \ + /* fall through */ \ + case 24: \ + case 23: \ + case 22: \ + case 21: \ + case 20: \ + case 19: \ + case 18: \ + case 17: \ + mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_3); \ + /* fall through */ \ + case 16: \ + case 15: \ + case 14: \ + case 13: \ + case 12: \ + case 11: \ + case 10: \ + case 9: \ + mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_2); \ + /* fall through */ \ + case 8: \ + case 7: \ + case 6: \ + case 5: \ + case 4: \ + case 3: \ + case 2: \ + case 1: \ + mixup64(&s->n.b, &s->n.a, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ + prime_1); \ + /* fall through */ \ + case 0: \ + return final64(s->n.a, s->n.b); \ + } \ + } while (0) + +/* TODO: C++ template in the next version */ +#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, state, data, len) \ + do { \ + t1ha_state256_t *const s = state; \ + const uint64_t *v = (const uint64_t *)data; \ + switch (len) { \ + default: \ + mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_4); \ + /* fall through */ \ + case 24: \ + case 23: \ + case 22: \ + case 21: \ + case 20: \ + case 19: \ + case 18: \ + case 17: \ + mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_3); \ + /* fall through */ \ + case 16: \ + case 15: \ + case 14: \ + case 13: \ + case 12: \ + case 11: \ + case 10: \ + case 9: \ + mixup64(&s->n.c, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_2); \ + /* fall through */ \ + case 8: \ + case 7: \ + case 6: \ + case 5: \ + case 4: \ + case 3: \ + case 2: \ + case 1: \ + mixup64(&s->n.d, &s->n.c, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ + prime_1); \ + /* fall through */ \ + case 0: \ + return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result); \ + } \ + } while (0) + +static __always_inline uint64_t final128(uint64_t a, uint64_t b, uint64_t c, + uint64_t d, uint64_t *h) { + mixup64(&a, &b, rot64(c, 41) ^ d, prime_0); + mixup64(&b, &c, rot64(d, 23) ^ a, prime_6); + mixup64(&c, &d, rot64(a, 19) ^ b, prime_5); + mixup64(&d, &a, rot64(b, 31) ^ c, prime_4); + *h = c + d; + return a ^ b; +} + +//------------------------------------------------------------------------------ + +uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) { + t1ha_state256_t state; + init_ab(&state, seed, length); + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + if (unlikely(length > 32)) { + init_cd(&state, seed, length); #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &state, data, length); - squash(&state); - length &= 31; - } - T1HA2_TAIL_AB(le, unaligned, &state, data, length); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; - if (misaligned) { - if (unlikely(length > 32)) { - init_cd(&state, seed, length); + T1HA2_LOOP(le, unaligned, &state, data, length); + squash(&state); + length &= 31; + } + T1HA2_TAIL_AB(le, unaligned, &state, data, length); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; + if (misaligned) { + if (unlikely(length > 32)) { + init_cd(&state, seed, length); #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &state, data, length); - squash(&state); - length &= 31; - } - T1HA2_TAIL_AB(le, unaligned, &state, data, length); - } else { - if (unlikely(length > 32)) { - init_cd(&state, seed, length); + T1HA2_LOOP(le, unaligned, &state, data, length); + squash(&state); + length &= 31; + } + T1HA2_TAIL_AB(le, unaligned, &state, data, length); + } else { + if (unlikely(length > 32)) { + init_cd(&state, seed, length); #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, aligned, &state, data, length); - squash(&state); - length &= 31; - } - T1HA2_TAIL_AB(le, aligned, &state, data, length); - } -#endif -} - -uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result, - const void *__restrict data, size_t length, - uint64_t seed) { - t1ha_state256_t state; - init_ab(&state, seed, length); - init_cd(&state, seed, length); - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - if (unlikely(length > 32)) { + T1HA2_LOOP(le, aligned, &state, data, length); + squash(&state); + length &= 31; + } + T1HA2_TAIL_AB(le, aligned, &state, data, length); + } +#endif +} + +uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result, + const void *__restrict data, size_t length, + uint64_t seed) { + t1ha_state256_t state; + init_ab(&state, seed, length); + init_cd(&state, seed, length); + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + if (unlikely(length > 32)) { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &state, data, length); - length &= 31; - } - T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; - if (misaligned) { - if (unlikely(length > 32)) { + T1HA2_LOOP(le, unaligned, &state, data, length); + length &= 31; + } + T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; + if (misaligned) { + if (unlikely(length > 32)) { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &state, data, length); - length &= 31; - } - T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); - } else { - if (unlikely(length > 32)) { + T1HA2_LOOP(le, unaligned, &state, data, length); + length &= 31; + } + T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); + } else { + if (unlikely(length > 32)) { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, aligned, &state, data, length); - length &= 31; - } - T1HA2_TAIL_ABCD(le, aligned, &state, data, length); - } -#endif -} - -//------------------------------------------------------------------------------ - -void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y) { - init_ab(&ctx->state, seed_x, seed_y); - init_cd(&ctx->state, seed_x, seed_y); - ctx->partial = 0; - ctx->total = 0; -} - -void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data, - size_t length) { - ctx->total += length; - - if (ctx->partial) { - const size_t left = 32 - ctx->partial; - const size_t chunk = (length >= left) ? left : length; - memcpy(ctx->buffer.bytes + ctx->partial, data, chunk); - ctx->partial += chunk; - if (ctx->partial < 32) { - assert(left >= length); - return; - } - ctx->partial = 0; - data = (const uint8_t *)data + chunk; - length -= chunk; - T1HA2_UPDATE(le, aligned, &ctx->state, ctx->buffer.u64); - } - - if (length >= 32) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + T1HA2_LOOP(le, aligned, &state, data, length); + length &= 31; + } + T1HA2_TAIL_ABCD(le, aligned, &state, data, length); + } +#endif +} + +//------------------------------------------------------------------------------ + +void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y) { + init_ab(&ctx->state, seed_x, seed_y); + init_cd(&ctx->state, seed_x, seed_y); + ctx->partial = 0; + ctx->total = 0; +} + +void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data, + size_t length) { + ctx->total += length; + + if (ctx->partial) { + const size_t left = 32 - ctx->partial; + const size_t chunk = (length >= left) ? left : length; + memcpy(ctx->buffer.bytes + ctx->partial, data, chunk); + ctx->partial += chunk; + if (ctx->partial < 32) { + assert(left >= length); + return; + } + ctx->partial = 0; + data = (const uint8_t *)data + chunk; + length -= chunk; + T1HA2_UPDATE(le, aligned, &ctx->state, ctx->buffer.u64); + } + + if (length >= 32) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &ctx->state, data, length); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; - if (misaligned) { + T1HA2_LOOP(le, unaligned, &ctx->state, data, length); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; + if (misaligned) { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &ctx->state, data, length); - } else { + T1HA2_LOOP(le, unaligned, &ctx->state, data, length); + } else { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, aligned, &ctx->state, data, length); - } -#endif - length &= 31; - } - - if (length) - memcpy(ctx->buffer.bytes, data, ctx->partial = length); -} - -uint64_t t1ha2_final(t1ha_context_t *__restrict ctx, - uint64_t *__restrict extra_result) { - uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63); -#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ - bits = bswap64(bits); -#endif - t1ha2_update(ctx, &bits, 8); - - if (likely(!extra_result)) { - squash(&ctx->state); - T1HA2_TAIL_AB(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); - } - - T1HA2_TAIL_ABCD(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); -} - -#endif /* T1HA2_DISABLED */ + T1HA2_LOOP(le, aligned, &ctx->state, data, length); + } +#endif + length &= 31; + } + + if (length) + memcpy(ctx->buffer.bytes, data, ctx->partial = length); +} + +uint64_t t1ha2_final(t1ha_context_t *__restrict ctx, + uint64_t *__restrict extra_result) { + uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63); +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + bits = bswap64(bits); +#endif + t1ha2_update(ctx, &bits, 8); + + if (likely(!extra_result)) { + squash(&ctx->state); + T1HA2_TAIL_AB(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); + } + + T1HA2_TAIL_ABCD(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); +} + +#endif /* T1HA2_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha2_selfcheck.c b/contrib/libs/t1ha/src/t1ha2_selfcheck.c index 275422fa64..1a01f99512 100644 --- a/contrib/libs/t1ha/src/t1ha2_selfcheck.c +++ b/contrib/libs/t1ha/src/t1ha2_selfcheck.c @@ -1,187 +1,187 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#ifndef T1HA2_DISABLED -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -/* *INDENT-OFF* */ -/* clang-format off */ - -const uint64_t t1ha_refval_2atonce[81] = { 0, - 0x772C7311BE32FF42, 0x444753D23F207E03, 0x71F6DF5DA3B4F532, 0x555859635365F660, - 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A, - 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D, - 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D, - 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148, - 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C, - 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0, - 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9, - 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0x25A7201C85D9E2A3, 0x911573EDA15299AA, - 0x5C0062B669E18E4C, 0x17734ADE08D54E28, 0xFFF036E33883F43B, 0xFE0756E7777DF11E, - 0x37972472D023F129, 0x6CFCE201B55C7F57, 0xE019D1D89F02B3E1, 0xAE5CC580FA1BB7E6, - 0x295695FB7E59FC3A, 0x76B6C820A40DD35E, 0xB1680A1768462B17, 0x2FB6AF279137DADA, - 0x28FB6B4366C78535, 0xEC278E53924541B1, 0x164F8AAB8A2A28B5, 0xB6C330AEAC4578AD, - 0x7F6F371070085084, 0x94DEAD60C0F448D3, 0x99737AC232C559EF, 0x6F54A6F9CA8EDD57, - 0x979B01E926BFCE0C, 0xF7D20BC85439C5B4, 0x64EDB27CD8087C12, 0x11488DE5F79C0BE2, - 0x25541DDD1680B5A4, 0x8B633D33BE9D1973, 0x404A3113ACF7F6C6, 0xC59DBDEF8550CD56, - 0x039D23C68F4F992C, 0x5BBB48E4BDD6FD86, 0x41E312248780DF5A, 0xD34791CE75D4E94F, - 0xED523E5D04DCDCFF, 0x7A6BCE0B6182D879, 0x21FB37483CAC28D8, 0x19A1B66E8DA878AD, - 0x6F804C5295B09ABE, 0x2A4BE5014115BA81, 0xA678ECC5FC924BE0, 0x50F7A54A99A36F59, - 0x0FD7E63A39A66452, 0x5AB1B213DD29C4E4, 0xF3ED80D9DF6534C5, 0xC736B12EF90615FD -}; - -const uint64_t t1ha_refval_2atonce128[81] = { 0x4EC7F6A48E33B00A, - 0xB7B7FAA5BD7D8C1E, 0x3269533F66534A76, 0x6C3EC6B687923BFC, 0xC096F5E7EFA471A9, - 0x79D8AFB550CEA471, 0xCEE0507A20FD5119, 0xFB04CFFC14A9F4BF, 0xBD4406E923807AF2, - 0x375C02FF11010491, 0xA6EA4C2A59E173FF, 0xE0A606F0002CADDF, 0xE13BEAE6EBC07897, - 0xF069C2463E48EA10, 0x75BEE1A97089B5FA, 0x378F22F8DE0B8085, 0x9C726FC4D53D0D8B, - 0x71F6130A2D08F788, 0x7A9B20433FF6CF69, 0xFF49B7CD59BF6D61, 0xCCAAEE0D1CA9C6B3, - 0xC77889D86039D2AD, 0x7B378B5BEA9B0475, 0x6520BFA79D59AD66, 0x2441490CB8A37267, - 0xA715A66B7D5CF473, 0x9AE892C88334FD67, 0xD2FFE9AEC1D2169A, 0x790B993F18B18CBB, - 0xA0D02FBCF6A7B1AD, 0xA90833E6F151D0C1, 0x1AC7AFA37BD79BE0, 0xD5383628B2881A24, - 0xE5526F9D63F9F8F1, 0xC1F165A01A6D1F4D, 0x6CCEF8FF3FCFA3F2, 0x2030F18325E6DF48, - 0x289207230E3FB17A, 0x077B66F713A3C4B9, 0x9F39843CAF871754, 0x512FDA0F808ACCF3, - 0xF4D9801CD0CD1F14, 0x28A0C749ED323638, 0x94844CAFA671F01C, 0xD0E261876B8ACA51, - 0x8FC2A648A4792EA2, 0x8EF87282136AF5FE, 0x5FE6A54A9FBA6B40, 0xA3CC5B8FE6223D54, - 0xA8C3C0DD651BB01C, 0x625E9FDD534716F3, 0x1AB2604083C33AC5, 0xDE098853F8692F12, - 0x4B0813891BD87624, 0x4AB89C4553D182AD, 0x92C15AA2A3C27ADA, 0xFF2918D68191F5D9, - 0x06363174F641C325, 0x667112ADA74A2059, 0x4BD605D6B5E53D7D, 0xF2512C53663A14C8, - 0x21857BCB1852667C, 0xAFBEBD0369AEE228, 0x7049340E48FBFD6B, 0x50710E1924F46954, - 0x869A75E04A976A3F, 0x5A41ABBDD6373889, 0xA781778389B4B188, 0x21A3AFCED6C925B6, - 0x107226192EC10B42, 0x62A862E84EC2F9B1, 0x2B15E91659606DD7, 0x613934D1F9EC5A42, - 0x4DC3A96DC5361BAF, 0xC80BBA4CB5F12903, 0x3E3EDAE99A7D6987, 0x8F97B2D55941DCB0, - 0x4C9787364C3E4EC1, 0xEF0A2D07BEA90CA7, 0x5FABF32C70AEEAFB, 0x3356A5CFA8F23BF4 -}; - -const uint64_t t1ha_refval_2stream[81] = { 0x3C8426E33CB41606, - 0xFD74BE70EE73E617, 0xF43DE3CDD8A20486, 0x882FBCB37E8EA3BB, 0x1AA2CDD34CAA3D4B, - 0xEE755B2BFAE07ED5, 0xD4E225250D92E213, 0xA09B49083205965B, 0xD47B21724EF9EC9E, - 0xAC888FC3858CEE11, 0x94F820D85736F244, 0x1707951CCA920932, 0x8E0E45603F7877F0, - 0x9FD2592C0E3A7212, 0x9A66370F3AE3D427, 0xD33382D2161DE2B7, 0x9A35BE079DA7115F, - 0x73457C7FF58B4EC3, 0xBE8610BD53D7CE98, 0x65506DFE5CCD5371, 0x286A321AF9D5D9FA, - 0xB81EF9A7EF3C536D, 0x2CFDB5E6825C6E86, 0xB2A58CBFDFDD303A, 0xD26094A42B950635, - 0xA34D666A5F02AD9A, 0x0151E013EBCC72E5, 0x9254A6EA7FCB6BB5, 0x10C9361B3869DC2B, - 0xD7EC55A060606276, 0xA2FF7F8BF8976FFD, 0xB5181BB6852DCC88, 0x0EE394BB6178BAFF, - 0x3A8B4B400D21B89C, 0xEC270461970960FD, 0x615967FAB053877E, 0xFA51BF1CFEB4714C, - 0x29FDA8383070F375, 0xC3B663061BC52EDA, 0x192BBAF1F1A57923, 0x6D193B52F93C53AF, - 0x7F6F5639FE87CA1E, 0x69F7F9140B32EDC8, 0xD0F2416FB24325B6, 0x62C0E37FEDD49FF3, - 0x57866A4B809D373D, 0x9848D24BD935E137, 0xDFC905B66734D50A, 0x9A938DD194A68529, - 0x8276C44DF0625228, 0xA4B35D00AD67C0AB, 0x3D9CB359842DB452, 0x4241BFA8C23B267F, - 0x650FA517BEF15952, 0x782DE2ABD8C7B1E1, 0x4EAE456166CA3E15, 0x40CDF3A02614E337, - 0xAD84092C46102172, 0x0C68479B03F9A167, 0x7E1BA046749E181C, 0x3F3AB41A697382C1, - 0xC5E5DD6586EBFDC4, 0xFF926CD4EB02555C, 0x035CFE67F89E709B, 0x89F06AB6464A1B9D, - 0x8EFF58F3F7DEA758, 0x8B54AC657902089F, 0xC6C4F1F9F8DA4D64, 0xBDB729048AAAC93A, - 0xEA76BA628F5E5CD6, 0x742159B728B8A979, 0x6D151CD3C720E53D, 0xE97FFF9368FCDC42, - 0xCA5B38314914FBDA, 0xDD92C91D8B858EAE, 0x66E5F07CF647CBF2, 0xD4CF9B42F4985AFB, - 0x72AE17AC7D92F6B7, 0xB8206B22AB0472E1, 0x385876B5CFD42479, 0x03294A249EBE6B26 -}; - -const uint64_t t1ha_refval_2stream128[81] = { 0xCD2801D3B92237D6, - 0x10E4D47BD821546D, 0x9100704B9D65CD06, 0xD6951CB4016313EF, 0x24DB636F96F474DA, - 0x3F4AF7DF3C49E422, 0xBFF25B8AF143459B, 0xA157EC13538BE549, 0xD3F5F52C47DBD419, - 0x0EF3D7D735AF1575, 0x46B7B892823F7B1B, 0xEE22EA4655213289, 0x56AD76F02FE929BC, - 0x9CF6CD1AC886546E, 0xAF45CE47AEA0B933, 0x535F9DC09F3996B7, 0x1F0C3C01694AE128, - 0x18495069BE0766F7, 0x37E5FFB3D72A4CB1, 0x6D6C2E9299F30709, 0x4F39E693F50B41E3, - 0xB11FC4EF0658E116, 0x48BFAACB78E5079B, 0xE1B4C89C781B3AD0, 0x81D2F34888D333A1, - 0xF6D02270D2EA449C, 0xC884C3C2C3CE1503, 0x711AE16BA157A9B9, 0x1E6140C642558C9D, - 0x35AB3D238F5DC55B, 0x33F07B6AEF051177, 0xE57336776EEFA71C, 0x6D445F8318BA3752, - 0xD4F5F6631934C988, 0xD5E260085727C4A2, 0x5B54B41EC180B4FA, 0x7F5D75769C15A898, - 0xAE5A6DB850CA33C6, 0x038CCB8044663403, 0xDA16310133DC92B8, 0x6A2FFB7AB2B7CE2B, - 0xDC1832D9229BAE20, 0x8C62C479F5ABC9E4, 0x5EB7B617857C9CCB, 0xB79CF7D749A1E80D, - 0xDE7FAC3798324FD3, 0x8178911813685D06, 0x6A726CBD394D4410, 0x6CBE6B3280DA1113, - 0x6829BA4410CF1148, 0xFA7E417EB26C5BC6, 0x22ED87884D6E3A49, 0x15F1472D5115669D, - 0x2EA0B4C8BF69D318, 0xDFE87070AA545503, 0x6B4C14B5F7144AB9, 0xC1ED49C06126551A, - 0x351919FC425C3899, 0x7B569C0FA6F1BD3E, 0x713AC2350844CFFD, 0xE9367F9A638C2FF3, - 0x97F17D325AEA0786, 0xBCB907CC6CF75F91, 0x0CB7517DAF247719, 0xBE16093CC45BE8A9, - 0x786EEE97359AD6AB, 0xB7AFA4F326B97E78, 0x2694B67FE23E502E, 0x4CB492826E98E0B4, - 0x838D119F74A416C7, 0x70D6A91E4E5677FD, 0xF3E4027AD30000E6, 0x9BDF692795807F77, - 0x6A371F966E034A54, 0x8789CF41AE4D67EF, 0x02688755484D60AE, 0xD5834B3A4BF5CE42, - 0x9405FC61440DE25D, 0x35EB280A157979B6, 0x48D40D6A525297AC, 0x6A87DC185054BADA -}; - -/* *INDENT-ON* */ -/* clang-format on */ - -__cold int t1ha_selfcheck__t1ha2_atonce(void) { - return t1ha_selfcheck(t1ha2_atonce, t1ha_refval_2atonce); -} - -__cold static uint64_t thunk_atonce128(const void *data, size_t len, - uint64_t seed) { - uint64_t unused; - return t1ha2_atonce128(&unused, data, len, seed); -} - -__cold int t1ha_selfcheck__t1ha2_atonce128(void) { - return t1ha_selfcheck(thunk_atonce128, t1ha_refval_2atonce128); -} - -__cold static uint64_t thunk_stream(const void *data, size_t len, - uint64_t seed) { - t1ha_context_t ctx; - t1ha2_init(&ctx, seed, seed); - t1ha2_update(&ctx, data, len); - return t1ha2_final(&ctx, NULL); -} - -__cold static uint64_t thunk_stream128(const void *data, size_t len, - uint64_t seed) { - t1ha_context_t ctx; - t1ha2_init(&ctx, seed, seed); - t1ha2_update(&ctx, data, len); - uint64_t unused; - return t1ha2_final(&ctx, &unused); -} - -__cold int t1ha_selfcheck__t1ha2_stream(void) { - return t1ha_selfcheck(thunk_stream, t1ha_refval_2stream) | - t1ha_selfcheck(thunk_stream128, t1ha_refval_2stream128); -} - -__cold int t1ha_selfcheck__t1ha2(void) { - return t1ha_selfcheck__t1ha2_atonce() | t1ha_selfcheck__t1ha2_atonce128() | - t1ha_selfcheck__t1ha2_stream(); -} - -#endif /* T1HA2_DISABLED */ + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#ifndef T1HA2_DISABLED +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +/* *INDENT-OFF* */ +/* clang-format off */ + +const uint64_t t1ha_refval_2atonce[81] = { 0, + 0x772C7311BE32FF42, 0x444753D23F207E03, 0x71F6DF5DA3B4F532, 0x555859635365F660, + 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A, + 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D, + 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D, + 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148, + 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C, + 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0, + 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9, + 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0x25A7201C85D9E2A3, 0x911573EDA15299AA, + 0x5C0062B669E18E4C, 0x17734ADE08D54E28, 0xFFF036E33883F43B, 0xFE0756E7777DF11E, + 0x37972472D023F129, 0x6CFCE201B55C7F57, 0xE019D1D89F02B3E1, 0xAE5CC580FA1BB7E6, + 0x295695FB7E59FC3A, 0x76B6C820A40DD35E, 0xB1680A1768462B17, 0x2FB6AF279137DADA, + 0x28FB6B4366C78535, 0xEC278E53924541B1, 0x164F8AAB8A2A28B5, 0xB6C330AEAC4578AD, + 0x7F6F371070085084, 0x94DEAD60C0F448D3, 0x99737AC232C559EF, 0x6F54A6F9CA8EDD57, + 0x979B01E926BFCE0C, 0xF7D20BC85439C5B4, 0x64EDB27CD8087C12, 0x11488DE5F79C0BE2, + 0x25541DDD1680B5A4, 0x8B633D33BE9D1973, 0x404A3113ACF7F6C6, 0xC59DBDEF8550CD56, + 0x039D23C68F4F992C, 0x5BBB48E4BDD6FD86, 0x41E312248780DF5A, 0xD34791CE75D4E94F, + 0xED523E5D04DCDCFF, 0x7A6BCE0B6182D879, 0x21FB37483CAC28D8, 0x19A1B66E8DA878AD, + 0x6F804C5295B09ABE, 0x2A4BE5014115BA81, 0xA678ECC5FC924BE0, 0x50F7A54A99A36F59, + 0x0FD7E63A39A66452, 0x5AB1B213DD29C4E4, 0xF3ED80D9DF6534C5, 0xC736B12EF90615FD +}; + +const uint64_t t1ha_refval_2atonce128[81] = { 0x4EC7F6A48E33B00A, + 0xB7B7FAA5BD7D8C1E, 0x3269533F66534A76, 0x6C3EC6B687923BFC, 0xC096F5E7EFA471A9, + 0x79D8AFB550CEA471, 0xCEE0507A20FD5119, 0xFB04CFFC14A9F4BF, 0xBD4406E923807AF2, + 0x375C02FF11010491, 0xA6EA4C2A59E173FF, 0xE0A606F0002CADDF, 0xE13BEAE6EBC07897, + 0xF069C2463E48EA10, 0x75BEE1A97089B5FA, 0x378F22F8DE0B8085, 0x9C726FC4D53D0D8B, + 0x71F6130A2D08F788, 0x7A9B20433FF6CF69, 0xFF49B7CD59BF6D61, 0xCCAAEE0D1CA9C6B3, + 0xC77889D86039D2AD, 0x7B378B5BEA9B0475, 0x6520BFA79D59AD66, 0x2441490CB8A37267, + 0xA715A66B7D5CF473, 0x9AE892C88334FD67, 0xD2FFE9AEC1D2169A, 0x790B993F18B18CBB, + 0xA0D02FBCF6A7B1AD, 0xA90833E6F151D0C1, 0x1AC7AFA37BD79BE0, 0xD5383628B2881A24, + 0xE5526F9D63F9F8F1, 0xC1F165A01A6D1F4D, 0x6CCEF8FF3FCFA3F2, 0x2030F18325E6DF48, + 0x289207230E3FB17A, 0x077B66F713A3C4B9, 0x9F39843CAF871754, 0x512FDA0F808ACCF3, + 0xF4D9801CD0CD1F14, 0x28A0C749ED323638, 0x94844CAFA671F01C, 0xD0E261876B8ACA51, + 0x8FC2A648A4792EA2, 0x8EF87282136AF5FE, 0x5FE6A54A9FBA6B40, 0xA3CC5B8FE6223D54, + 0xA8C3C0DD651BB01C, 0x625E9FDD534716F3, 0x1AB2604083C33AC5, 0xDE098853F8692F12, + 0x4B0813891BD87624, 0x4AB89C4553D182AD, 0x92C15AA2A3C27ADA, 0xFF2918D68191F5D9, + 0x06363174F641C325, 0x667112ADA74A2059, 0x4BD605D6B5E53D7D, 0xF2512C53663A14C8, + 0x21857BCB1852667C, 0xAFBEBD0369AEE228, 0x7049340E48FBFD6B, 0x50710E1924F46954, + 0x869A75E04A976A3F, 0x5A41ABBDD6373889, 0xA781778389B4B188, 0x21A3AFCED6C925B6, + 0x107226192EC10B42, 0x62A862E84EC2F9B1, 0x2B15E91659606DD7, 0x613934D1F9EC5A42, + 0x4DC3A96DC5361BAF, 0xC80BBA4CB5F12903, 0x3E3EDAE99A7D6987, 0x8F97B2D55941DCB0, + 0x4C9787364C3E4EC1, 0xEF0A2D07BEA90CA7, 0x5FABF32C70AEEAFB, 0x3356A5CFA8F23BF4 +}; + +const uint64_t t1ha_refval_2stream[81] = { 0x3C8426E33CB41606, + 0xFD74BE70EE73E617, 0xF43DE3CDD8A20486, 0x882FBCB37E8EA3BB, 0x1AA2CDD34CAA3D4B, + 0xEE755B2BFAE07ED5, 0xD4E225250D92E213, 0xA09B49083205965B, 0xD47B21724EF9EC9E, + 0xAC888FC3858CEE11, 0x94F820D85736F244, 0x1707951CCA920932, 0x8E0E45603F7877F0, + 0x9FD2592C0E3A7212, 0x9A66370F3AE3D427, 0xD33382D2161DE2B7, 0x9A35BE079DA7115F, + 0x73457C7FF58B4EC3, 0xBE8610BD53D7CE98, 0x65506DFE5CCD5371, 0x286A321AF9D5D9FA, + 0xB81EF9A7EF3C536D, 0x2CFDB5E6825C6E86, 0xB2A58CBFDFDD303A, 0xD26094A42B950635, + 0xA34D666A5F02AD9A, 0x0151E013EBCC72E5, 0x9254A6EA7FCB6BB5, 0x10C9361B3869DC2B, + 0xD7EC55A060606276, 0xA2FF7F8BF8976FFD, 0xB5181BB6852DCC88, 0x0EE394BB6178BAFF, + 0x3A8B4B400D21B89C, 0xEC270461970960FD, 0x615967FAB053877E, 0xFA51BF1CFEB4714C, + 0x29FDA8383070F375, 0xC3B663061BC52EDA, 0x192BBAF1F1A57923, 0x6D193B52F93C53AF, + 0x7F6F5639FE87CA1E, 0x69F7F9140B32EDC8, 0xD0F2416FB24325B6, 0x62C0E37FEDD49FF3, + 0x57866A4B809D373D, 0x9848D24BD935E137, 0xDFC905B66734D50A, 0x9A938DD194A68529, + 0x8276C44DF0625228, 0xA4B35D00AD67C0AB, 0x3D9CB359842DB452, 0x4241BFA8C23B267F, + 0x650FA517BEF15952, 0x782DE2ABD8C7B1E1, 0x4EAE456166CA3E15, 0x40CDF3A02614E337, + 0xAD84092C46102172, 0x0C68479B03F9A167, 0x7E1BA046749E181C, 0x3F3AB41A697382C1, + 0xC5E5DD6586EBFDC4, 0xFF926CD4EB02555C, 0x035CFE67F89E709B, 0x89F06AB6464A1B9D, + 0x8EFF58F3F7DEA758, 0x8B54AC657902089F, 0xC6C4F1F9F8DA4D64, 0xBDB729048AAAC93A, + 0xEA76BA628F5E5CD6, 0x742159B728B8A979, 0x6D151CD3C720E53D, 0xE97FFF9368FCDC42, + 0xCA5B38314914FBDA, 0xDD92C91D8B858EAE, 0x66E5F07CF647CBF2, 0xD4CF9B42F4985AFB, + 0x72AE17AC7D92F6B7, 0xB8206B22AB0472E1, 0x385876B5CFD42479, 0x03294A249EBE6B26 +}; + +const uint64_t t1ha_refval_2stream128[81] = { 0xCD2801D3B92237D6, + 0x10E4D47BD821546D, 0x9100704B9D65CD06, 0xD6951CB4016313EF, 0x24DB636F96F474DA, + 0x3F4AF7DF3C49E422, 0xBFF25B8AF143459B, 0xA157EC13538BE549, 0xD3F5F52C47DBD419, + 0x0EF3D7D735AF1575, 0x46B7B892823F7B1B, 0xEE22EA4655213289, 0x56AD76F02FE929BC, + 0x9CF6CD1AC886546E, 0xAF45CE47AEA0B933, 0x535F9DC09F3996B7, 0x1F0C3C01694AE128, + 0x18495069BE0766F7, 0x37E5FFB3D72A4CB1, 0x6D6C2E9299F30709, 0x4F39E693F50B41E3, + 0xB11FC4EF0658E116, 0x48BFAACB78E5079B, 0xE1B4C89C781B3AD0, 0x81D2F34888D333A1, + 0xF6D02270D2EA449C, 0xC884C3C2C3CE1503, 0x711AE16BA157A9B9, 0x1E6140C642558C9D, + 0x35AB3D238F5DC55B, 0x33F07B6AEF051177, 0xE57336776EEFA71C, 0x6D445F8318BA3752, + 0xD4F5F6631934C988, 0xD5E260085727C4A2, 0x5B54B41EC180B4FA, 0x7F5D75769C15A898, + 0xAE5A6DB850CA33C6, 0x038CCB8044663403, 0xDA16310133DC92B8, 0x6A2FFB7AB2B7CE2B, + 0xDC1832D9229BAE20, 0x8C62C479F5ABC9E4, 0x5EB7B617857C9CCB, 0xB79CF7D749A1E80D, + 0xDE7FAC3798324FD3, 0x8178911813685D06, 0x6A726CBD394D4410, 0x6CBE6B3280DA1113, + 0x6829BA4410CF1148, 0xFA7E417EB26C5BC6, 0x22ED87884D6E3A49, 0x15F1472D5115669D, + 0x2EA0B4C8BF69D318, 0xDFE87070AA545503, 0x6B4C14B5F7144AB9, 0xC1ED49C06126551A, + 0x351919FC425C3899, 0x7B569C0FA6F1BD3E, 0x713AC2350844CFFD, 0xE9367F9A638C2FF3, + 0x97F17D325AEA0786, 0xBCB907CC6CF75F91, 0x0CB7517DAF247719, 0xBE16093CC45BE8A9, + 0x786EEE97359AD6AB, 0xB7AFA4F326B97E78, 0x2694B67FE23E502E, 0x4CB492826E98E0B4, + 0x838D119F74A416C7, 0x70D6A91E4E5677FD, 0xF3E4027AD30000E6, 0x9BDF692795807F77, + 0x6A371F966E034A54, 0x8789CF41AE4D67EF, 0x02688755484D60AE, 0xD5834B3A4BF5CE42, + 0x9405FC61440DE25D, 0x35EB280A157979B6, 0x48D40D6A525297AC, 0x6A87DC185054BADA +}; + +/* *INDENT-ON* */ +/* clang-format on */ + +__cold int t1ha_selfcheck__t1ha2_atonce(void) { + return t1ha_selfcheck(t1ha2_atonce, t1ha_refval_2atonce); +} + +__cold static uint64_t thunk_atonce128(const void *data, size_t len, + uint64_t seed) { + uint64_t unused; + return t1ha2_atonce128(&unused, data, len, seed); +} + +__cold int t1ha_selfcheck__t1ha2_atonce128(void) { + return t1ha_selfcheck(thunk_atonce128, t1ha_refval_2atonce128); +} + +__cold static uint64_t thunk_stream(const void *data, size_t len, + uint64_t seed) { + t1ha_context_t ctx; + t1ha2_init(&ctx, seed, seed); + t1ha2_update(&ctx, data, len); + return t1ha2_final(&ctx, NULL); +} + +__cold static uint64_t thunk_stream128(const void *data, size_t len, + uint64_t seed) { + t1ha_context_t ctx; + t1ha2_init(&ctx, seed, seed); + t1ha2_update(&ctx, data, len); + uint64_t unused; + return t1ha2_final(&ctx, &unused); +} + +__cold int t1ha_selfcheck__t1ha2_stream(void) { + return t1ha_selfcheck(thunk_stream, t1ha_refval_2stream) | + t1ha_selfcheck(thunk_stream128, t1ha_refval_2stream128); +} + +__cold int t1ha_selfcheck__t1ha2(void) { + return t1ha_selfcheck__t1ha2_atonce() | t1ha_selfcheck__t1ha2_atonce128() | + t1ha_selfcheck__t1ha2_stream(); +} + +#endif /* T1HA2_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha_bits.h b/contrib/libs/t1ha/src/t1ha_bits.h index 5cd34a7496..93b6b51a54 100644 --- a/contrib/libs/t1ha/src/t1ha_bits.h +++ b/contrib/libs/t1ha/src/t1ha_bits.h @@ -1,406 +1,406 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#pragma once - -#if defined(_MSC_VER) -#pragma warning(disable : 4201) /* nameless struct/union */ -#if _MSC_VER > 1800 -#pragma warning(disable : 4464) /* relative include path contains '..' */ -#endif /* 1800 */ -#endif /* MSVC */ -#include "../t1ha.h" - -#ifndef T1HA_USE_FAST_ONESHOT_READ -/* Define it to 1 for little bit faster code. - * Unfortunately this may triggering a false-positive alarms from Valgrind, - * AddressSanitizer and other similar tool. - * So, define it to 0 for calmness if doubt. */ -#define T1HA_USE_FAST_ONESHOT_READ 1 -#endif /* T1HA_USE_FAST_ONESHOT_READ */ - -/*****************************************************************************/ - -#include <assert.h> /* for assert() */ -#include <stdbool.h> /* for bool */ -#include <string.h> /* for memcpy() */ - -#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ && \ - __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ -#error Unsupported byte order. -#endif - -#define T1HA_UNALIGNED_ACCESS__UNABLE 0 -#define T1HA_UNALIGNED_ACCESS__SLOW 1 -#define T1HA_UNALIGNED_ACCESS__EFFICIENT 2 - -#ifndef T1HA_SYS_UNALIGNED_ACCESS -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) -#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT -#elif defined(__ia32__) -#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT -#elif defined(__e2k__) -#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__SLOW -#elif defined(__ARM_FEATURE_UNALIGNED) -#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT -#else -#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__UNABLE -#endif -#endif /* T1HA_SYS_UNALIGNED_ACCESS */ - -#define ALIGNMENT_16 2 -#define ALIGNMENT_32 4 -#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul -#define ALIGNMENT_64 8 -#else -#define ALIGNMENT_64 4 -#endif - -#ifndef PAGESIZE -#define PAGESIZE 4096 -#endif /* PAGESIZE */ - -/***************************************************************************/ - -#ifndef __has_builtin -#define __has_builtin(x) (0) -#endif - -#ifndef __has_warning -#define __has_warning(x) (0) -#endif - -#ifndef __has_feature -#define __has_feature(x) (0) -#endif - -#ifndef __has_extension -#define __has_extension(x) (0) -#endif - -#if __has_feature(address_sanitizer) -#define __SANITIZE_ADDRESS__ 1 -#endif - -#ifndef __optimize + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#pragma once + +#if defined(_MSC_VER) +#pragma warning(disable : 4201) /* nameless struct/union */ +#if _MSC_VER > 1800 +#pragma warning(disable : 4464) /* relative include path contains '..' */ +#endif /* 1800 */ +#endif /* MSVC */ +#include "../t1ha.h" + +#ifndef T1HA_USE_FAST_ONESHOT_READ +/* Define it to 1 for little bit faster code. + * Unfortunately this may triggering a false-positive alarms from Valgrind, + * AddressSanitizer and other similar tool. + * So, define it to 0 for calmness if doubt. */ +#define T1HA_USE_FAST_ONESHOT_READ 1 +#endif /* T1HA_USE_FAST_ONESHOT_READ */ + +/*****************************************************************************/ + +#include <assert.h> /* for assert() */ +#include <stdbool.h> /* for bool */ +#include <string.h> /* for memcpy() */ + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ && \ + __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ +#error Unsupported byte order. +#endif + +#define T1HA_UNALIGNED_ACCESS__UNABLE 0 +#define T1HA_UNALIGNED_ACCESS__SLOW 1 +#define T1HA_UNALIGNED_ACCESS__EFFICIENT 2 + +#ifndef T1HA_SYS_UNALIGNED_ACCESS +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT +#elif defined(__ia32__) +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT +#elif defined(__e2k__) +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__SLOW +#elif defined(__ARM_FEATURE_UNALIGNED) +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT +#else +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__UNABLE +#endif +#endif /* T1HA_SYS_UNALIGNED_ACCESS */ + +#define ALIGNMENT_16 2 +#define ALIGNMENT_32 4 +#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul +#define ALIGNMENT_64 8 +#else +#define ALIGNMENT_64 4 +#endif + +#ifndef PAGESIZE +#define PAGESIZE 4096 +#endif /* PAGESIZE */ + +/***************************************************************************/ + +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + +#ifndef __has_warning +#define __has_warning(x) (0) +#endif + +#ifndef __has_feature +#define __has_feature(x) (0) +#endif + +#ifndef __has_extension +#define __has_extension(x) (0) +#endif + +#if __has_feature(address_sanitizer) +#define __SANITIZE_ADDRESS__ 1 +#endif + +#ifndef __optimize #if defined(__clang__) && !__has_attribute(__optimize__) -#define __optimize(ops) +#define __optimize(ops) #elif defined(__GNUC__) || __has_attribute(__optimize__) #define __optimize(ops) __attribute__((__optimize__(ops))) -#else -#define __optimize(ops) -#endif -#endif /* __optimize */ - -#ifndef __cold -#if defined(__OPTIMIZE__) -#if defined(__e2k__) +#else +#define __optimize(ops) +#endif +#endif /* __optimize */ + +#ifndef __cold +#if defined(__OPTIMIZE__) +#if defined(__e2k__) #define __cold __optimize(1) __attribute__((__cold__)) #elif defined(__clang__) && !__has_attribute(__cold__) && \ __has_attribute(__section__) -/* just put infrequently used functions in separate section */ +/* just put infrequently used functions in separate section */ #define __cold __attribute__((__section__("text.unlikely"))) __optimize("Os") #elif defined(__GNUC__) || __has_attribute(__cold__) #define __cold __attribute__((__cold__)) __optimize("Os") -#else -#define __cold __optimize("Os") -#endif -#else -#define __cold -#endif -#endif /* __cold */ - -#if __GNUC_PREREQ(4, 4) || defined(__clang__) - -#if defined(__ia32__) || defined(__e2k__) -#include <x86intrin.h> -#endif - -#if defined(__ia32__) && !defined(__cpuid_count) -#include <cpuid.h> -#endif - -#if defined(__e2k__) +#else +#define __cold __optimize("Os") +#endif +#else +#define __cold +#endif +#endif /* __cold */ + +#if __GNUC_PREREQ(4, 4) || defined(__clang__) + +#if defined(__ia32__) || defined(__e2k__) +#include <x86intrin.h> +#endif + +#if defined(__ia32__) && !defined(__cpuid_count) +#include <cpuid.h> +#endif + +#if defined(__e2k__) #include <e2kbuiltin.h> -#endif - -#ifndef likely -#define likely(cond) __builtin_expect(!!(cond), 1) -#endif - -#ifndef unlikely -#define unlikely(cond) __builtin_expect(!!(cond), 0) -#endif - -#if __GNUC_PREREQ(4, 5) || __has_builtin(__builtin_unreachable) -#define unreachable() __builtin_unreachable() -#endif - -#define bswap64(v) __builtin_bswap64(v) -#define bswap32(v) __builtin_bswap32(v) -#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) -#define bswap16(v) __builtin_bswap16(v) -#endif - +#endif + +#ifndef likely +#define likely(cond) __builtin_expect(!!(cond), 1) +#endif + +#ifndef unlikely +#define unlikely(cond) __builtin_expect(!!(cond), 0) +#endif + +#if __GNUC_PREREQ(4, 5) || __has_builtin(__builtin_unreachable) +#define unreachable() __builtin_unreachable() +#endif + +#define bswap64(v) __builtin_bswap64(v) +#define bswap32(v) __builtin_bswap32(v) +#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) +#define bswap16(v) __builtin_bswap16(v) +#endif + #if !defined(__maybe_unused) && \ (__GNUC_PREREQ(4, 3) || __has_attribute(__unused__)) #define __maybe_unused __attribute__((__unused__)) -#endif - -#if !defined(__always_inline) && \ +#endif + +#if !defined(__always_inline) && \ (__GNUC_PREREQ(3, 2) || __has_attribute(__always_inline__)) #define __always_inline __inline __attribute__((__always_inline__)) -#endif - -#if defined(__e2k__) - -#if __iset__ >= 3 -#define mul_64x64_high(a, b) __builtin_e2k_umulhd(a, b) -#endif /* __iset__ >= 3 */ - -#if __iset__ >= 5 -static __maybe_unused __always_inline unsigned -e2k_add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { - *sum = base + addend; - return (unsigned)__builtin_e2k_addcd_c(base, addend, 0); -} -#define add64carry_first(base, addend, sum) \ - e2k_add64carry_first(base, addend, sum) - -static __maybe_unused __always_inline unsigned -e2k_add64carry_next(unsigned carry, uint64_t base, uint64_t addend, - uint64_t *sum) { - *sum = __builtin_e2k_addcd(base, addend, carry); - return (unsigned)__builtin_e2k_addcd_c(base, addend, carry); -} -#define add64carry_next(carry, base, addend, sum) \ - e2k_add64carry_next(carry, base, addend, sum) - -static __maybe_unused __always_inline void e2k_add64carry_last(unsigned carry, - uint64_t base, - uint64_t addend, - uint64_t *sum) { - *sum = __builtin_e2k_addcd(base, addend, carry); -} -#define add64carry_last(carry, base, addend, sum) \ - e2k_add64carry_last(carry, base, addend, sum) -#endif /* __iset__ >= 5 */ - -#define fetch64_be_aligned(ptr) ((uint64_t)__builtin_e2k_ld_64s_be(ptr)) -#define fetch32_be_aligned(ptr) ((uint32_t)__builtin_e2k_ld_32u_be(ptr)) - -#endif /* __e2k__ Elbrus */ - -#elif defined(_MSC_VER) - -#if _MSC_FULL_VER < 190024234 && defined(_M_IX86) -#pragma message( \ - "For AES-NI at least \"Microsoft C/C++ Compiler\" version 19.00.24234 (Visual Studio 2015 Update 3) is required.") -#endif -#if _MSC_FULL_VER < 191526730 -#pragma message( \ - "It is recommended to use \"Microsoft C/C++ Compiler\" version 19.15.26730 (Visual Studio 2017 15.8) or newer.") -#endif -#if _MSC_FULL_VER < 180040629 -#error At least "Microsoft C/C++ Compiler" version 18.00.40629 (Visual Studio 2013 Update 5) is required. -#endif - -#pragma warning(push, 1) - -#include <intrin.h> -#include <stdlib.h> -#define likely(cond) (cond) -#define unlikely(cond) (cond) -#define unreachable() __assume(0) -#define bswap64(v) _byteswap_uint64(v) -#define bswap32(v) _byteswap_ulong(v) -#define bswap16(v) _byteswap_ushort(v) -#define rot64(v, s) _rotr64(v, s) -#define rot32(v, s) _rotr(v, s) -#define __always_inline __forceinline - -#if defined(_M_X64) || defined(_M_IA64) -#pragma intrinsic(_umul128) -#define mul_64x64_128(a, b, ph) _umul128(a, b, ph) -#pragma intrinsic(_addcarry_u64) -#define add64carry_first(base, addend, sum) _addcarry_u64(0, base, addend, sum) -#define add64carry_next(carry, base, addend, sum) \ - _addcarry_u64(carry, base, addend, sum) -#define add64carry_last(carry, base, addend, sum) \ - (void)_addcarry_u64(carry, base, addend, sum) -#endif - -#if defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) -#pragma intrinsic(__umulh) -#define mul_64x64_high(a, b) __umulh(a, b) -#endif - -#if defined(_M_IX86) -#pragma intrinsic(__emulu) -#define mul_32x32_64(a, b) __emulu(a, b) - -#if _MSC_VER >= 1915 /* LY: workaround for SSA-optimizer bug */ -#pragma intrinsic(_addcarry_u32) -#define add32carry_first(base, addend, sum) _addcarry_u32(0, base, addend, sum) -#define add32carry_next(carry, base, addend, sum) \ - _addcarry_u32(carry, base, addend, sum) -#define add32carry_last(carry, base, addend, sum) \ - (void)_addcarry_u32(carry, base, addend, sum) - -static __forceinline char -msvc32_add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { - uint32_t *const sum32 = (uint32_t *)sum; - const uint32_t base_32l = (uint32_t)base; - const uint32_t base_32h = (uint32_t)(base >> 32); - const uint32_t addend_32l = (uint32_t)addend; - const uint32_t addend_32h = (uint32_t)(addend >> 32); - return add32carry_next(add32carry_first(base_32l, addend_32l, sum32), - base_32h, addend_32h, sum32 + 1); -} -#define add64carry_first(base, addend, sum) \ - msvc32_add64carry_first(base, addend, sum) - -static __forceinline char msvc32_add64carry_next(char carry, uint64_t base, - uint64_t addend, - uint64_t *sum) { - uint32_t *const sum32 = (uint32_t *)sum; - const uint32_t base_32l = (uint32_t)base; - const uint32_t base_32h = (uint32_t)(base >> 32); - const uint32_t addend_32l = (uint32_t)addend; - const uint32_t addend_32h = (uint32_t)(addend >> 32); - return add32carry_next(add32carry_next(carry, base_32l, addend_32l, sum32), - base_32h, addend_32h, sum32 + 1); -} -#define add64carry_next(carry, base, addend, sum) \ - msvc32_add64carry_next(carry, base, addend, sum) - -static __forceinline void msvc32_add64carry_last(char carry, uint64_t base, - uint64_t addend, - uint64_t *sum) { - uint32_t *const sum32 = (uint32_t *)sum; - const uint32_t base_32l = (uint32_t)base; - const uint32_t base_32h = (uint32_t)(base >> 32); - const uint32_t addend_32l = (uint32_t)addend; - const uint32_t addend_32h = (uint32_t)(addend >> 32); - add32carry_last(add32carry_next(carry, base_32l, addend_32l, sum32), base_32h, - addend_32h, sum32 + 1); -} -#define add64carry_last(carry, base, addend, sum) \ - msvc32_add64carry_last(carry, base, addend, sum) -#endif /* _MSC_FULL_VER >= 190024231 */ - -#elif defined(_M_ARM) -#define mul_32x32_64(a, b) _arm_umull(a, b) -#endif - -#pragma warning(pop) -#pragma warning(disable : 4514) /* 'xyz': unreferenced inline function \ - has been removed */ -#pragma warning(disable : 4710) /* 'xyz': function not inlined */ -#pragma warning(disable : 4711) /* function 'xyz' selected for \ - automatic inline expansion */ -#pragma warning(disable : 4127) /* conditional expression is constant */ -#pragma warning(disable : 4702) /* unreachable code */ -#endif /* Compiler */ - -#ifndef likely -#define likely(cond) (cond) -#endif -#ifndef unlikely -#define unlikely(cond) (cond) -#endif -#ifndef __maybe_unused -#define __maybe_unused -#endif -#ifndef __always_inline -#define __always_inline __inline -#endif -#ifndef unreachable -#define unreachable() \ - do { \ - } while (1) -#endif - -#ifndef bswap64 -#if defined(bswap_64) -#define bswap64 bswap_64 -#elif defined(__bswap_64) -#define bswap64 __bswap_64 -#else -static __always_inline uint64_t bswap64(uint64_t v) { - return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | - ((v << 24) & UINT64_C(0x0000ff0000000000)) | - ((v << 8) & UINT64_C(0x000000ff00000000)) | - ((v >> 8) & UINT64_C(0x00000000ff000000)) | - ((v >> 24) & UINT64_C(0x0000000000ff0000)) | - ((v >> 40) & UINT64_C(0x000000000000ff00)); -} -#endif -#endif /* bswap64 */ - -#ifndef bswap32 -#if defined(bswap_32) -#define bswap32 bswap_32 -#elif defined(__bswap_32) -#define bswap32 __bswap_32 -#else -static __always_inline uint32_t bswap32(uint32_t v) { - return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) | - ((v >> 8) & UINT32_C(0x0000ff00)); -} -#endif -#endif /* bswap32 */ - -#ifndef bswap16 -#if defined(bswap_16) -#define bswap16 bswap_16 -#elif defined(__bswap_16) -#define bswap16 __bswap_16 -#else -static __always_inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; } -#endif -#endif /* bswap16 */ - +#endif + +#if defined(__e2k__) + +#if __iset__ >= 3 +#define mul_64x64_high(a, b) __builtin_e2k_umulhd(a, b) +#endif /* __iset__ >= 3 */ + +#if __iset__ >= 5 +static __maybe_unused __always_inline unsigned +e2k_add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { + *sum = base + addend; + return (unsigned)__builtin_e2k_addcd_c(base, addend, 0); +} +#define add64carry_first(base, addend, sum) \ + e2k_add64carry_first(base, addend, sum) + +static __maybe_unused __always_inline unsigned +e2k_add64carry_next(unsigned carry, uint64_t base, uint64_t addend, + uint64_t *sum) { + *sum = __builtin_e2k_addcd(base, addend, carry); + return (unsigned)__builtin_e2k_addcd_c(base, addend, carry); +} +#define add64carry_next(carry, base, addend, sum) \ + e2k_add64carry_next(carry, base, addend, sum) + +static __maybe_unused __always_inline void e2k_add64carry_last(unsigned carry, + uint64_t base, + uint64_t addend, + uint64_t *sum) { + *sum = __builtin_e2k_addcd(base, addend, carry); +} +#define add64carry_last(carry, base, addend, sum) \ + e2k_add64carry_last(carry, base, addend, sum) +#endif /* __iset__ >= 5 */ + +#define fetch64_be_aligned(ptr) ((uint64_t)__builtin_e2k_ld_64s_be(ptr)) +#define fetch32_be_aligned(ptr) ((uint32_t)__builtin_e2k_ld_32u_be(ptr)) + +#endif /* __e2k__ Elbrus */ + +#elif defined(_MSC_VER) + +#if _MSC_FULL_VER < 190024234 && defined(_M_IX86) +#pragma message( \ + "For AES-NI at least \"Microsoft C/C++ Compiler\" version 19.00.24234 (Visual Studio 2015 Update 3) is required.") +#endif +#if _MSC_FULL_VER < 191526730 +#pragma message( \ + "It is recommended to use \"Microsoft C/C++ Compiler\" version 19.15.26730 (Visual Studio 2017 15.8) or newer.") +#endif +#if _MSC_FULL_VER < 180040629 +#error At least "Microsoft C/C++ Compiler" version 18.00.40629 (Visual Studio 2013 Update 5) is required. +#endif + +#pragma warning(push, 1) + +#include <intrin.h> +#include <stdlib.h> +#define likely(cond) (cond) +#define unlikely(cond) (cond) +#define unreachable() __assume(0) +#define bswap64(v) _byteswap_uint64(v) +#define bswap32(v) _byteswap_ulong(v) +#define bswap16(v) _byteswap_ushort(v) +#define rot64(v, s) _rotr64(v, s) +#define rot32(v, s) _rotr(v, s) +#define __always_inline __forceinline + +#if defined(_M_X64) || defined(_M_IA64) +#pragma intrinsic(_umul128) +#define mul_64x64_128(a, b, ph) _umul128(a, b, ph) +#pragma intrinsic(_addcarry_u64) +#define add64carry_first(base, addend, sum) _addcarry_u64(0, base, addend, sum) +#define add64carry_next(carry, base, addend, sum) \ + _addcarry_u64(carry, base, addend, sum) +#define add64carry_last(carry, base, addend, sum) \ + (void)_addcarry_u64(carry, base, addend, sum) +#endif + +#if defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) +#pragma intrinsic(__umulh) +#define mul_64x64_high(a, b) __umulh(a, b) +#endif + +#if defined(_M_IX86) +#pragma intrinsic(__emulu) +#define mul_32x32_64(a, b) __emulu(a, b) + +#if _MSC_VER >= 1915 /* LY: workaround for SSA-optimizer bug */ +#pragma intrinsic(_addcarry_u32) +#define add32carry_first(base, addend, sum) _addcarry_u32(0, base, addend, sum) +#define add32carry_next(carry, base, addend, sum) \ + _addcarry_u32(carry, base, addend, sum) +#define add32carry_last(carry, base, addend, sum) \ + (void)_addcarry_u32(carry, base, addend, sum) + +static __forceinline char +msvc32_add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { + uint32_t *const sum32 = (uint32_t *)sum; + const uint32_t base_32l = (uint32_t)base; + const uint32_t base_32h = (uint32_t)(base >> 32); + const uint32_t addend_32l = (uint32_t)addend; + const uint32_t addend_32h = (uint32_t)(addend >> 32); + return add32carry_next(add32carry_first(base_32l, addend_32l, sum32), + base_32h, addend_32h, sum32 + 1); +} +#define add64carry_first(base, addend, sum) \ + msvc32_add64carry_first(base, addend, sum) + +static __forceinline char msvc32_add64carry_next(char carry, uint64_t base, + uint64_t addend, + uint64_t *sum) { + uint32_t *const sum32 = (uint32_t *)sum; + const uint32_t base_32l = (uint32_t)base; + const uint32_t base_32h = (uint32_t)(base >> 32); + const uint32_t addend_32l = (uint32_t)addend; + const uint32_t addend_32h = (uint32_t)(addend >> 32); + return add32carry_next(add32carry_next(carry, base_32l, addend_32l, sum32), + base_32h, addend_32h, sum32 + 1); +} +#define add64carry_next(carry, base, addend, sum) \ + msvc32_add64carry_next(carry, base, addend, sum) + +static __forceinline void msvc32_add64carry_last(char carry, uint64_t base, + uint64_t addend, + uint64_t *sum) { + uint32_t *const sum32 = (uint32_t *)sum; + const uint32_t base_32l = (uint32_t)base; + const uint32_t base_32h = (uint32_t)(base >> 32); + const uint32_t addend_32l = (uint32_t)addend; + const uint32_t addend_32h = (uint32_t)(addend >> 32); + add32carry_last(add32carry_next(carry, base_32l, addend_32l, sum32), base_32h, + addend_32h, sum32 + 1); +} +#define add64carry_last(carry, base, addend, sum) \ + msvc32_add64carry_last(carry, base, addend, sum) +#endif /* _MSC_FULL_VER >= 190024231 */ + +#elif defined(_M_ARM) +#define mul_32x32_64(a, b) _arm_umull(a, b) +#endif + +#pragma warning(pop) +#pragma warning(disable : 4514) /* 'xyz': unreferenced inline function \ + has been removed */ +#pragma warning(disable : 4710) /* 'xyz': function not inlined */ +#pragma warning(disable : 4711) /* function 'xyz' selected for \ + automatic inline expansion */ +#pragma warning(disable : 4127) /* conditional expression is constant */ +#pragma warning(disable : 4702) /* unreachable code */ +#endif /* Compiler */ + +#ifndef likely +#define likely(cond) (cond) +#endif +#ifndef unlikely +#define unlikely(cond) (cond) +#endif +#ifndef __maybe_unused +#define __maybe_unused +#endif +#ifndef __always_inline +#define __always_inline __inline +#endif +#ifndef unreachable +#define unreachable() \ + do { \ + } while (1) +#endif + +#ifndef bswap64 +#if defined(bswap_64) +#define bswap64 bswap_64 +#elif defined(__bswap_64) +#define bswap64 __bswap_64 +#else +static __always_inline uint64_t bswap64(uint64_t v) { + return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | + ((v << 24) & UINT64_C(0x0000ff0000000000)) | + ((v << 8) & UINT64_C(0x000000ff00000000)) | + ((v >> 8) & UINT64_C(0x00000000ff000000)) | + ((v >> 24) & UINT64_C(0x0000000000ff0000)) | + ((v >> 40) & UINT64_C(0x000000000000ff00)); +} +#endif +#endif /* bswap64 */ + +#ifndef bswap32 +#if defined(bswap_32) +#define bswap32 bswap_32 +#elif defined(__bswap_32) +#define bswap32 __bswap_32 +#else +static __always_inline uint32_t bswap32(uint32_t v) { + return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) | + ((v >> 8) & UINT32_C(0x0000ff00)); +} +#endif +#endif /* bswap32 */ + +#ifndef bswap16 +#if defined(bswap_16) +#define bswap16 bswap_16 +#elif defined(__bswap_16) +#define bswap16 __bswap_16 +#else +static __always_inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; } +#endif +#endif /* bswap16 */ + #if defined(__ia32__) || \ T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT /* The __builtin_assume_aligned() leads gcc/clang to load values into the @@ -411,844 +411,844 @@ static __always_inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; } #define read_aligned(ptr, bits) (*(const uint##bits##_t *__restrict)(ptr)) #endif /* __ia32__ */ -#ifndef read_unaligned +#ifndef read_unaligned #if defined(__GNUC__) || __has_attribute(__packed__) -typedef struct { - uint8_t unaligned_8; - uint16_t unaligned_16; - uint32_t unaligned_32; - uint64_t unaligned_64; +typedef struct { + uint8_t unaligned_8; + uint16_t unaligned_16; + uint32_t unaligned_32; + uint64_t unaligned_64; } __attribute__((__packed__)) t1ha_unaligned_proxy; -#define read_unaligned(ptr, bits) \ - (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \ - t1ha_unaligned_proxy, unaligned_##bits))) \ - ->unaligned_##bits) -#elif defined(_MSC_VER) -#pragma warning( \ - disable : 4235) /* nonstandard extension used: '__unaligned' \ - * keyword not supported on this architecture */ -#define read_unaligned(ptr, bits) (*(const __unaligned uint##bits##_t *)(ptr)) -#else -#pragma pack(push, 1) -typedef struct { - uint8_t unaligned_8; - uint16_t unaligned_16; - uint32_t unaligned_32; - uint64_t unaligned_64; -} t1ha_unaligned_proxy; -#pragma pack(pop) -#define read_unaligned(ptr, bits) \ - (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \ - t1ha_unaligned_proxy, unaligned_##bits))) \ - ->unaligned_##bits) -#endif -#endif /* read_unaligned */ - -#ifndef read_aligned -#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_assume_aligned) -#define read_aligned(ptr, bits) \ - (*(const uint##bits##_t *)__builtin_assume_aligned(ptr, ALIGNMENT_##bits)) +#define read_unaligned(ptr, bits) \ + (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \ + t1ha_unaligned_proxy, unaligned_##bits))) \ + ->unaligned_##bits) +#elif defined(_MSC_VER) +#pragma warning( \ + disable : 4235) /* nonstandard extension used: '__unaligned' \ + * keyword not supported on this architecture */ +#define read_unaligned(ptr, bits) (*(const __unaligned uint##bits##_t *)(ptr)) +#else +#pragma pack(push, 1) +typedef struct { + uint8_t unaligned_8; + uint16_t unaligned_16; + uint32_t unaligned_32; + uint64_t unaligned_64; +} t1ha_unaligned_proxy; +#pragma pack(pop) +#define read_unaligned(ptr, bits) \ + (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \ + t1ha_unaligned_proxy, unaligned_##bits))) \ + ->unaligned_##bits) +#endif +#endif /* read_unaligned */ + +#ifndef read_aligned +#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_assume_aligned) +#define read_aligned(ptr, bits) \ + (*(const uint##bits##_t *)__builtin_assume_aligned(ptr, ALIGNMENT_##bits)) #elif (__GNUC_PREREQ(3, 3) || __has_attribute(__aligned__)) && \ !defined(__clang__) -#define read_aligned(ptr, bits) \ +#define read_aligned(ptr, bits) \ (*(const uint##bits##_t \ __attribute__((__aligned__(ALIGNMENT_##bits))) *)(ptr)) #elif __has_attribute(__assume_aligned__) - -static __always_inline const + +static __always_inline const uint16_t *__attribute__((__assume_aligned__(ALIGNMENT_16))) - cast_aligned_16(const void *ptr) { - return (const uint16_t *)ptr; -} -static __always_inline const + cast_aligned_16(const void *ptr) { + return (const uint16_t *)ptr; +} +static __always_inline const uint32_t *__attribute__((__assume_aligned__(ALIGNMENT_32))) - cast_aligned_32(const void *ptr) { - return (const uint32_t *)ptr; -} -static __always_inline const + cast_aligned_32(const void *ptr) { + return (const uint32_t *)ptr; +} +static __always_inline const uint64_t *__attribute__((__assume_aligned__(ALIGNMENT_64))) - cast_aligned_64(const void *ptr) { - return (const uint64_t *)ptr; -} - -#define read_aligned(ptr, bits) (*cast_aligned_##bits(ptr)) - -#elif defined(_MSC_VER) -#define read_aligned(ptr, bits) \ - (*(const __declspec(align(ALIGNMENT_##bits)) uint##bits##_t *)(ptr)) -#else -#define read_aligned(ptr, bits) (*(const uint##bits##_t *)(ptr)) -#endif -#endif /* read_aligned */ - -#ifndef prefetch -#if (__GNUC_PREREQ(4, 0) || __has_builtin(__builtin_prefetch)) && \ - !defined(__ia32__) -#define prefetch(ptr) __builtin_prefetch(ptr) -#elif defined(_M_ARM64) || defined(_M_ARM) -#define prefetch(ptr) __prefetch(ptr) -#else -#define prefetch(ptr) \ - do { \ - (void)(ptr); \ - } while (0) -#endif -#endif /* prefetch */ - -#if __has_warning("-Wconstant-logical-operand") -#if defined(__clang__) -#pragma clang diagnostic ignored "-Wconstant-logical-operand" -#elif defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wconstant-logical-operand" -#else -#pragma warning disable "constant-logical-operand" -#endif -#endif /* -Wconstant-logical-operand */ - -#if __has_warning("-Wtautological-pointer-compare") -#if defined(__clang__) -#pragma clang diagnostic ignored "-Wtautological-pointer-compare" -#elif defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wtautological-pointer-compare" -#else -#pragma warning disable "tautological-pointer-compare" -#endif -#endif /* -Wtautological-pointer-compare */ - -/***************************************************************************/ - -#if __GNUC_PREREQ(4, 0) -#pragma GCC visibility push(hidden) -#endif /* __GNUC_PREREQ(4,0) */ - -/*---------------------------------------------------------- Little Endian */ - -#ifndef fetch16_le_aligned + cast_aligned_64(const void *ptr) { + return (const uint64_t *)ptr; +} + +#define read_aligned(ptr, bits) (*cast_aligned_##bits(ptr)) + +#elif defined(_MSC_VER) +#define read_aligned(ptr, bits) \ + (*(const __declspec(align(ALIGNMENT_##bits)) uint##bits##_t *)(ptr)) +#else +#define read_aligned(ptr, bits) (*(const uint##bits##_t *)(ptr)) +#endif +#endif /* read_aligned */ + +#ifndef prefetch +#if (__GNUC_PREREQ(4, 0) || __has_builtin(__builtin_prefetch)) && \ + !defined(__ia32__) +#define prefetch(ptr) __builtin_prefetch(ptr) +#elif defined(_M_ARM64) || defined(_M_ARM) +#define prefetch(ptr) __prefetch(ptr) +#else +#define prefetch(ptr) \ + do { \ + (void)(ptr); \ + } while (0) +#endif +#endif /* prefetch */ + +#if __has_warning("-Wconstant-logical-operand") +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wconstant-logical-operand" +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wconstant-logical-operand" +#else +#pragma warning disable "constant-logical-operand" +#endif +#endif /* -Wconstant-logical-operand */ + +#if __has_warning("-Wtautological-pointer-compare") +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wtautological-pointer-compare" +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wtautological-pointer-compare" +#else +#pragma warning disable "tautological-pointer-compare" +#endif +#endif /* -Wtautological-pointer-compare */ + +/***************************************************************************/ + +#if __GNUC_PREREQ(4, 0) +#pragma GCC visibility push(hidden) +#endif /* __GNUC_PREREQ(4,0) */ + +/*---------------------------------------------------------- Little Endian */ + +#ifndef fetch16_le_aligned static __maybe_unused __always_inline uint16_t fetch16_le_aligned(const void *v) { - assert(((uintptr_t)v) % ALIGNMENT_16 == 0); -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return read_aligned(v, 16); -#else - return bswap16(read_aligned(v, 16)); -#endif -} -#endif /* fetch16_le_aligned */ - -#ifndef fetch16_le_unaligned + assert(((uintptr_t)v) % ALIGNMENT_16 == 0); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_aligned(v, 16); +#else + return bswap16(read_aligned(v, 16)); +#endif +} +#endif /* fetch16_le_aligned */ + +#ifndef fetch16_le_unaligned static __maybe_unused __always_inline uint16_t fetch16_le_unaligned(const void *v) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE - const uint8_t *p = (const uint8_t *)v; - return p[0] | (uint16_t)p[1] << 8; -#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return read_unaligned(v, 16); -#else - return bswap16(read_unaligned(v, 16)); -#endif -} -#endif /* fetch16_le_unaligned */ - -#ifndef fetch32_le_aligned +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + const uint8_t *p = (const uint8_t *)v; + return p[0] | (uint16_t)p[1] << 8; +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_unaligned(v, 16); +#else + return bswap16(read_unaligned(v, 16)); +#endif +} +#endif /* fetch16_le_unaligned */ + +#ifndef fetch32_le_aligned static __maybe_unused __always_inline uint32_t fetch32_le_aligned(const void *v) { - assert(((uintptr_t)v) % ALIGNMENT_32 == 0); -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return read_aligned(v, 32); -#else - return bswap32(read_aligned(v, 32)); -#endif -} -#endif /* fetch32_le_aligned */ - -#ifndef fetch32_le_unaligned + assert(((uintptr_t)v) % ALIGNMENT_32 == 0); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_aligned(v, 32); +#else + return bswap32(read_aligned(v, 32)); +#endif +} +#endif /* fetch32_le_aligned */ + +#ifndef fetch32_le_unaligned static __maybe_unused __always_inline uint32_t fetch32_le_unaligned(const void *v) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE - return fetch16_le_unaligned(v) | - (uint32_t)fetch16_le_unaligned((const uint8_t *)v + 2) << 16; -#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return read_unaligned(v, 32); -#else - return bswap32(read_unaligned(v, 32)); -#endif -} -#endif /* fetch32_le_unaligned */ - -#ifndef fetch64_le_aligned +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + return fetch16_le_unaligned(v) | + (uint32_t)fetch16_le_unaligned((const uint8_t *)v + 2) << 16; +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_unaligned(v, 32); +#else + return bswap32(read_unaligned(v, 32)); +#endif +} +#endif /* fetch32_le_unaligned */ + +#ifndef fetch64_le_aligned static __maybe_unused __always_inline uint64_t fetch64_le_aligned(const void *v) { - assert(((uintptr_t)v) % ALIGNMENT_64 == 0); -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return read_aligned(v, 64); -#else - return bswap64(read_aligned(v, 64)); -#endif -} -#endif /* fetch64_le_aligned */ - -#ifndef fetch64_le_unaligned + assert(((uintptr_t)v) % ALIGNMENT_64 == 0); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_aligned(v, 64); +#else + return bswap64(read_aligned(v, 64)); +#endif +} +#endif /* fetch64_le_aligned */ + +#ifndef fetch64_le_unaligned static __maybe_unused __always_inline uint64_t fetch64_le_unaligned(const void *v) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE - return fetch32_le_unaligned(v) | - (uint64_t)fetch32_le_unaligned((const uint8_t *)v + 4) << 32; -#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return read_unaligned(v, 64); -#else - return bswap64(read_unaligned(v, 64)); -#endif -} -#endif /* fetch64_le_unaligned */ - +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + return fetch32_le_unaligned(v) | + (uint64_t)fetch32_le_unaligned((const uint8_t *)v + 4) << 32; +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_unaligned(v, 64); +#else + return bswap64(read_unaligned(v, 64)); +#endif +} +#endif /* fetch64_le_unaligned */ + static __maybe_unused __always_inline uint64_t tail64_le_aligned(const void *v, size_t tail) { - const uint8_t *const p = (const uint8_t *)v; -#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) - /* We can perform a 'oneshot' read, which is little bit faster. */ - const unsigned shift = ((8 - tail) & 7) << 3; - return fetch64_le_aligned(p) & ((~UINT64_C(0)) >> shift); -#else - uint64_t r = 0; - switch (tail & 7) { - default: - unreachable(); -/* fall through */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - /* For most CPUs this code is better when not needed byte reordering. */ - case 0: - return fetch64_le_aligned(p); - case 7: - r = (uint64_t)p[6] << 8; - /* fall through */ - case 6: - r += p[5]; - r <<= 8; - /* fall through */ - case 5: - r += p[4]; - r <<= 32; - /* fall through */ - case 4: - return r + fetch32_le_aligned(p); - case 3: - r = (uint64_t)p[2] << 16; - /* fall through */ - case 2: - return r + fetch16_le_aligned(p); - case 1: - return p[0]; -#else - case 0: - r = p[7] << 8; - /* fall through */ - case 7: - r += p[6]; - r <<= 8; - /* fall through */ - case 6: - r += p[5]; - r <<= 8; - /* fall through */ - case 5: - r += p[4]; - r <<= 8; - /* fall through */ - case 4: - r += p[3]; - r <<= 8; - /* fall through */ - case 3: - r += p[2]; - r <<= 8; - /* fall through */ - case 2: - r += p[1]; - r <<= 8; - /* fall through */ - case 1: - return r + p[0]; -#endif - } -#endif /* T1HA_USE_FAST_ONESHOT_READ */ -} - -#if T1HA_USE_FAST_ONESHOT_READ && \ - T1HA_SYS_UNALIGNED_ACCESS != T1HA_UNALIGNED_ACCESS__UNABLE && \ - defined(PAGESIZE) && PAGESIZE > 42 && !defined(__SANITIZE_ADDRESS__) -#define can_read_underside(ptr, size) \ - (((PAGESIZE - (size)) & (uintptr_t)(ptr)) != 0) -#endif /* T1HA_USE_FAST_ONESHOT_READ */ - + const uint8_t *const p = (const uint8_t *)v; +#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) + /* We can perform a 'oneshot' read, which is little bit faster. */ + const unsigned shift = ((8 - tail) & 7) << 3; + return fetch64_le_aligned(p) & ((~UINT64_C(0)) >> shift); +#else + uint64_t r = 0; + switch (tail & 7) { + default: + unreachable(); +/* fall through */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + /* For most CPUs this code is better when not needed byte reordering. */ + case 0: + return fetch64_le_aligned(p); + case 7: + r = (uint64_t)p[6] << 8; + /* fall through */ + case 6: + r += p[5]; + r <<= 8; + /* fall through */ + case 5: + r += p[4]; + r <<= 32; + /* fall through */ + case 4: + return r + fetch32_le_aligned(p); + case 3: + r = (uint64_t)p[2] << 16; + /* fall through */ + case 2: + return r + fetch16_le_aligned(p); + case 1: + return p[0]; +#else + case 0: + r = p[7] << 8; + /* fall through */ + case 7: + r += p[6]; + r <<= 8; + /* fall through */ + case 6: + r += p[5]; + r <<= 8; + /* fall through */ + case 5: + r += p[4]; + r <<= 8; + /* fall through */ + case 4: + r += p[3]; + r <<= 8; + /* fall through */ + case 3: + r += p[2]; + r <<= 8; + /* fall through */ + case 2: + r += p[1]; + r <<= 8; + /* fall through */ + case 1: + return r + p[0]; +#endif + } +#endif /* T1HA_USE_FAST_ONESHOT_READ */ +} + +#if T1HA_USE_FAST_ONESHOT_READ && \ + T1HA_SYS_UNALIGNED_ACCESS != T1HA_UNALIGNED_ACCESS__UNABLE && \ + defined(PAGESIZE) && PAGESIZE > 42 && !defined(__SANITIZE_ADDRESS__) +#define can_read_underside(ptr, size) \ + (((PAGESIZE - (size)) & (uintptr_t)(ptr)) != 0) +#endif /* T1HA_USE_FAST_ONESHOT_READ */ + static __maybe_unused __always_inline uint64_t tail64_le_unaligned(const void *v, size_t tail) { - const uint8_t *p = (const uint8_t *)v; -#if defined(can_read_underside) && \ - (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) - /* On some systems (e.g. x86_64) we can perform a 'oneshot' read, which - * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> - * for the reminder. */ - const unsigned offset = (8 - tail) & 7; - const unsigned shift = offset << 3; - if (likely(can_read_underside(p, 8))) { - p -= offset; - return fetch64_le_unaligned(p) >> shift; - } - return fetch64_le_unaligned(p) & ((~UINT64_C(0)) >> shift); -#else - uint64_t r = 0; - switch (tail & 7) { - default: - unreachable(); -/* fall through */ -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ - __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - /* For most CPUs this code is better when not needed - * copying for alignment or byte reordering. */ - case 0: - return fetch64_le_unaligned(p); - case 7: - r = (uint64_t)p[6] << 8; - /* fall through */ - case 6: - r += p[5]; - r <<= 8; - /* fall through */ - case 5: - r += p[4]; - r <<= 32; - /* fall through */ - case 4: - return r + fetch32_le_unaligned(p); - case 3: - r = (uint64_t)p[2] << 16; - /* fall through */ - case 2: - return r + fetch16_le_unaligned(p); - case 1: - return p[0]; -#else - /* For most CPUs this code is better than a - * copying for alignment and/or byte reordering. */ - case 0: - r = p[7] << 8; - /* fall through */ - case 7: - r += p[6]; - r <<= 8; - /* fall through */ - case 6: - r += p[5]; - r <<= 8; - /* fall through */ - case 5: - r += p[4]; - r <<= 8; - /* fall through */ - case 4: - r += p[3]; - r <<= 8; - /* fall through */ - case 3: - r += p[2]; - r <<= 8; - /* fall through */ - case 2: - r += p[1]; - r <<= 8; - /* fall through */ - case 1: - return r + p[0]; -#endif - } -#endif /* can_read_underside */ -} - -/*------------------------------------------------------------- Big Endian */ - -#ifndef fetch16_be_aligned -static __maybe_unused __always_inline uint16_t -fetch16_be_aligned(const void *v) { - assert(((uintptr_t)v) % ALIGNMENT_16 == 0); -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return read_aligned(v, 16); -#else - return bswap16(read_aligned(v, 16)); -#endif -} -#endif /* fetch16_be_aligned */ - -#ifndef fetch16_be_unaligned -static __maybe_unused __always_inline uint16_t -fetch16_be_unaligned(const void *v) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE - const uint8_t *p = (const uint8_t *)v; - return (uint16_t)p[0] << 8 | p[1]; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return read_unaligned(v, 16); -#else - return bswap16(read_unaligned(v, 16)); -#endif -} -#endif /* fetch16_be_unaligned */ - -#ifndef fetch32_be_aligned -static __maybe_unused __always_inline uint32_t -fetch32_be_aligned(const void *v) { - assert(((uintptr_t)v) % ALIGNMENT_32 == 0); -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return read_aligned(v, 32); -#else - return bswap32(read_aligned(v, 32)); -#endif -} -#endif /* fetch32_be_aligned */ - -#ifndef fetch32_be_unaligned -static __maybe_unused __always_inline uint32_t -fetch32_be_unaligned(const void *v) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE - return (uint32_t)fetch16_be_unaligned(v) << 16 | - fetch16_be_unaligned((const uint8_t *)v + 2); -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return read_unaligned(v, 32); -#else - return bswap32(read_unaligned(v, 32)); -#endif -} -#endif /* fetch32_be_unaligned */ - -#ifndef fetch64_be_aligned -static __maybe_unused __always_inline uint64_t -fetch64_be_aligned(const void *v) { - assert(((uintptr_t)v) % ALIGNMENT_64 == 0); -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return read_aligned(v, 64); -#else - return bswap64(read_aligned(v, 64)); -#endif -} -#endif /* fetch64_be_aligned */ - -#ifndef fetch64_be_unaligned -static __maybe_unused __always_inline uint64_t -fetch64_be_unaligned(const void *v) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE - return (uint64_t)fetch32_be_unaligned(v) << 32 | - fetch32_be_unaligned((const uint8_t *)v + 4); -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return read_unaligned(v, 64); -#else - return bswap64(read_unaligned(v, 64)); -#endif -} -#endif /* fetch64_be_unaligned */ - -static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v, - size_t tail) { - const uint8_t *const p = (const uint8_t *)v; -#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) - /* We can perform a 'oneshot' read, which is little bit faster. */ - const unsigned shift = ((8 - tail) & 7) << 3; - return fetch64_be_aligned(p) >> shift; -#else - switch (tail & 7) { - default: - unreachable(); -/* fall through */ -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - /* For most CPUs this code is better when not byte reordering. */ - case 1: - return p[0]; - case 2: - return fetch16_be_aligned(p); - case 3: - return (uint32_t)fetch16_be_aligned(p) << 8 | p[2]; - case 4: - return fetch32_be_aligned(p); - case 5: - return (uint64_t)fetch32_be_aligned(p) << 8 | p[4]; - case 6: - return (uint64_t)fetch32_be_aligned(p) << 16 | fetch16_be_aligned(p + 4); - case 7: - return (uint64_t)fetch32_be_aligned(p) << 24 | - (uint32_t)fetch16_be_aligned(p + 4) << 8 | p[6]; - case 0: - return fetch64_be_aligned(p); -#else - case 1: - return p[0]; - case 2: - return p[1] | (uint32_t)p[0] << 8; - case 3: - return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; - case 4: - return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | - (uint32_t)p[0] << 24; - case 5: - return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 | - (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32; - case 6: - return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 | - (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40; - case 7: - return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 | - (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 | - (uint64_t)p[0] << 48; - case 0: - return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 | - (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 | - (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56; -#endif - } -#endif /* T1HA_USE_FAST_ONESHOT_READ */ -} - -static __maybe_unused __always_inline uint64_t -tail64_be_unaligned(const void *v, size_t tail) { - const uint8_t *p = (const uint8_t *)v; -#if defined(can_read_underside) && \ - (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) - /* On some systems (e.g. x86_64) we can perform a 'oneshot' read, which - * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> - * for the reminder. */ - const unsigned offset = (8 - tail) & 7; - const unsigned shift = offset << 3; - if (likely(can_read_underside(p, 8))) { - p -= offset; - return fetch64_be_unaligned(p) & ((~UINT64_C(0)) >> shift); - } - return fetch64_be_unaligned(p) >> shift; -#else - switch (tail & 7) { - default: - unreachable(); -/* fall through */ -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ - __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - /* For most CPUs this code is better when not needed - * copying for alignment or byte reordering. */ - case 1: - return p[0]; - case 2: - return fetch16_be_unaligned(p); - case 3: - return (uint32_t)fetch16_be_unaligned(p) << 8 | p[2]; - case 4: - return fetch32_be(p); - case 5: - return (uint64_t)fetch32_be_unaligned(p) << 8 | p[4]; - case 6: - return (uint64_t)fetch32_be_unaligned(p) << 16 | - fetch16_be_unaligned(p + 4); - case 7: - return (uint64_t)fetch32_be_unaligned(p) << 24 | - (uint32_t)fetch16_be_unaligned(p + 4) << 8 | p[6]; - case 0: - return fetch64_be_unaligned(p); -#else - /* For most CPUs this code is better than a - * copying for alignment and/or byte reordering. */ - case 1: - return p[0]; - case 2: - return p[1] | (uint32_t)p[0] << 8; - case 3: - return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; - case 4: - return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | - (uint32_t)p[0] << 24; - case 5: - return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 | - (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32; - case 6: - return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 | - (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40; - case 7: - return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 | - (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 | - (uint64_t)p[0] << 48; - case 0: - return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 | - (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 | - (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56; -#endif - } -#endif /* can_read_underside */ -} - -/***************************************************************************/ - -#ifndef rot64 + const uint8_t *p = (const uint8_t *)v; +#if defined(can_read_underside) && \ + (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) + /* On some systems (e.g. x86_64) we can perform a 'oneshot' read, which + * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> + * for the reminder. */ + const unsigned offset = (8 - tail) & 7; + const unsigned shift = offset << 3; + if (likely(can_read_underside(p, 8))) { + p -= offset; + return fetch64_le_unaligned(p) >> shift; + } + return fetch64_le_unaligned(p) & ((~UINT64_C(0)) >> shift); +#else + uint64_t r = 0; + switch (tail & 7) { + default: + unreachable(); +/* fall through */ +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 0: + return fetch64_le_unaligned(p); + case 7: + r = (uint64_t)p[6] << 8; + /* fall through */ + case 6: + r += p[5]; + r <<= 8; + /* fall through */ + case 5: + r += p[4]; + r <<= 32; + /* fall through */ + case 4: + return r + fetch32_le_unaligned(p); + case 3: + r = (uint64_t)p[2] << 16; + /* fall through */ + case 2: + return r + fetch16_le_unaligned(p); + case 1: + return p[0]; +#else + /* For most CPUs this code is better than a + * copying for alignment and/or byte reordering. */ + case 0: + r = p[7] << 8; + /* fall through */ + case 7: + r += p[6]; + r <<= 8; + /* fall through */ + case 6: + r += p[5]; + r <<= 8; + /* fall through */ + case 5: + r += p[4]; + r <<= 8; + /* fall through */ + case 4: + r += p[3]; + r <<= 8; + /* fall through */ + case 3: + r += p[2]; + r <<= 8; + /* fall through */ + case 2: + r += p[1]; + r <<= 8; + /* fall through */ + case 1: + return r + p[0]; +#endif + } +#endif /* can_read_underside */ +} + +/*------------------------------------------------------------- Big Endian */ + +#ifndef fetch16_be_aligned +static __maybe_unused __always_inline uint16_t +fetch16_be_aligned(const void *v) { + assert(((uintptr_t)v) % ALIGNMENT_16 == 0); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_aligned(v, 16); +#else + return bswap16(read_aligned(v, 16)); +#endif +} +#endif /* fetch16_be_aligned */ + +#ifndef fetch16_be_unaligned +static __maybe_unused __always_inline uint16_t +fetch16_be_unaligned(const void *v) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + const uint8_t *p = (const uint8_t *)v; + return (uint16_t)p[0] << 8 | p[1]; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_unaligned(v, 16); +#else + return bswap16(read_unaligned(v, 16)); +#endif +} +#endif /* fetch16_be_unaligned */ + +#ifndef fetch32_be_aligned +static __maybe_unused __always_inline uint32_t +fetch32_be_aligned(const void *v) { + assert(((uintptr_t)v) % ALIGNMENT_32 == 0); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_aligned(v, 32); +#else + return bswap32(read_aligned(v, 32)); +#endif +} +#endif /* fetch32_be_aligned */ + +#ifndef fetch32_be_unaligned +static __maybe_unused __always_inline uint32_t +fetch32_be_unaligned(const void *v) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + return (uint32_t)fetch16_be_unaligned(v) << 16 | + fetch16_be_unaligned((const uint8_t *)v + 2); +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_unaligned(v, 32); +#else + return bswap32(read_unaligned(v, 32)); +#endif +} +#endif /* fetch32_be_unaligned */ + +#ifndef fetch64_be_aligned +static __maybe_unused __always_inline uint64_t +fetch64_be_aligned(const void *v) { + assert(((uintptr_t)v) % ALIGNMENT_64 == 0); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_aligned(v, 64); +#else + return bswap64(read_aligned(v, 64)); +#endif +} +#endif /* fetch64_be_aligned */ + +#ifndef fetch64_be_unaligned +static __maybe_unused __always_inline uint64_t +fetch64_be_unaligned(const void *v) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + return (uint64_t)fetch32_be_unaligned(v) << 32 | + fetch32_be_unaligned((const uint8_t *)v + 4); +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_unaligned(v, 64); +#else + return bswap64(read_unaligned(v, 64)); +#endif +} +#endif /* fetch64_be_unaligned */ + +static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v, + size_t tail) { + const uint8_t *const p = (const uint8_t *)v; +#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) + /* We can perform a 'oneshot' read, which is little bit faster. */ + const unsigned shift = ((8 - tail) & 7) << 3; + return fetch64_be_aligned(p) >> shift; +#else + switch (tail & 7) { + default: + unreachable(); +/* fall through */ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + /* For most CPUs this code is better when not byte reordering. */ + case 1: + return p[0]; + case 2: + return fetch16_be_aligned(p); + case 3: + return (uint32_t)fetch16_be_aligned(p) << 8 | p[2]; + case 4: + return fetch32_be_aligned(p); + case 5: + return (uint64_t)fetch32_be_aligned(p) << 8 | p[4]; + case 6: + return (uint64_t)fetch32_be_aligned(p) << 16 | fetch16_be_aligned(p + 4); + case 7: + return (uint64_t)fetch32_be_aligned(p) << 24 | + (uint32_t)fetch16_be_aligned(p + 4) << 8 | p[6]; + case 0: + return fetch64_be_aligned(p); +#else + case 1: + return p[0]; + case 2: + return p[1] | (uint32_t)p[0] << 8; + case 3: + return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; + case 4: + return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | + (uint32_t)p[0] << 24; + case 5: + return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 | + (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32; + case 6: + return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 | + (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40; + case 7: + return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 | + (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 | + (uint64_t)p[0] << 48; + case 0: + return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 | + (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 | + (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56; +#endif + } +#endif /* T1HA_USE_FAST_ONESHOT_READ */ +} + +static __maybe_unused __always_inline uint64_t +tail64_be_unaligned(const void *v, size_t tail) { + const uint8_t *p = (const uint8_t *)v; +#if defined(can_read_underside) && \ + (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) + /* On some systems (e.g. x86_64) we can perform a 'oneshot' read, which + * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> + * for the reminder. */ + const unsigned offset = (8 - tail) & 7; + const unsigned shift = offset << 3; + if (likely(can_read_underside(p, 8))) { + p -= offset; + return fetch64_be_unaligned(p) & ((~UINT64_C(0)) >> shift); + } + return fetch64_be_unaligned(p) >> shift; +#else + switch (tail & 7) { + default: + unreachable(); +/* fall through */ +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 1: + return p[0]; + case 2: + return fetch16_be_unaligned(p); + case 3: + return (uint32_t)fetch16_be_unaligned(p) << 8 | p[2]; + case 4: + return fetch32_be(p); + case 5: + return (uint64_t)fetch32_be_unaligned(p) << 8 | p[4]; + case 6: + return (uint64_t)fetch32_be_unaligned(p) << 16 | + fetch16_be_unaligned(p + 4); + case 7: + return (uint64_t)fetch32_be_unaligned(p) << 24 | + (uint32_t)fetch16_be_unaligned(p + 4) << 8 | p[6]; + case 0: + return fetch64_be_unaligned(p); +#else + /* For most CPUs this code is better than a + * copying for alignment and/or byte reordering. */ + case 1: + return p[0]; + case 2: + return p[1] | (uint32_t)p[0] << 8; + case 3: + return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; + case 4: + return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | + (uint32_t)p[0] << 24; + case 5: + return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 | + (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32; + case 6: + return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 | + (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40; + case 7: + return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 | + (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 | + (uint64_t)p[0] << 48; + case 0: + return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 | + (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 | + (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56; +#endif + } +#endif /* can_read_underside */ +} + +/***************************************************************************/ + +#ifndef rot64 static __maybe_unused __always_inline uint64_t rot64(uint64_t v, unsigned s) { - return (v >> s) | (v << (64 - s)); -} -#endif /* rot64 */ - -#ifndef mul_32x32_64 + return (v >> s) | (v << (64 - s)); +} +#endif /* rot64 */ + +#ifndef mul_32x32_64 static __maybe_unused __always_inline uint64_t mul_32x32_64(uint32_t a, uint32_t b) { - return a * (uint64_t)b; -} -#endif /* mul_32x32_64 */ - -#ifndef add64carry_first -static __maybe_unused __always_inline unsigned -add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { -#if __has_builtin(__builtin_addcll) - unsigned long long carryout; - *sum = __builtin_addcll(base, addend, 0, &carryout); - return (unsigned)carryout; -#else - *sum = base + addend; - return *sum < addend; -#endif /* __has_builtin(__builtin_addcll) */ -} -#endif /* add64carry_fist */ - -#ifndef add64carry_next -static __maybe_unused __always_inline unsigned -add64carry_next(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { -#if __has_builtin(__builtin_addcll) - unsigned long long carryout; - *sum = __builtin_addcll(base, addend, carry, &carryout); - return (unsigned)carryout; -#else - *sum = base + addend + carry; - return *sum < addend || (carry && *sum == addend); -#endif /* __has_builtin(__builtin_addcll) */ -} -#endif /* add64carry_next */ - -#ifndef add64carry_last -static __maybe_unused __always_inline void -add64carry_last(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { -#if __has_builtin(__builtin_addcll) - unsigned long long carryout; - *sum = __builtin_addcll(base, addend, carry, &carryout); - (void)carryout; -#else - *sum = base + addend + carry; -#endif /* __has_builtin(__builtin_addcll) */ -} -#endif /* add64carry_last */ - -#ifndef mul_64x64_128 -static __maybe_unused __always_inline uint64_t mul_64x64_128(uint64_t a, - uint64_t b, - uint64_t *h) { + return a * (uint64_t)b; +} +#endif /* mul_32x32_64 */ + +#ifndef add64carry_first +static __maybe_unused __always_inline unsigned +add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { +#if __has_builtin(__builtin_addcll) + unsigned long long carryout; + *sum = __builtin_addcll(base, addend, 0, &carryout); + return (unsigned)carryout; +#else + *sum = base + addend; + return *sum < addend; +#endif /* __has_builtin(__builtin_addcll) */ +} +#endif /* add64carry_fist */ + +#ifndef add64carry_next +static __maybe_unused __always_inline unsigned +add64carry_next(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { +#if __has_builtin(__builtin_addcll) + unsigned long long carryout; + *sum = __builtin_addcll(base, addend, carry, &carryout); + return (unsigned)carryout; +#else + *sum = base + addend + carry; + return *sum < addend || (carry && *sum == addend); +#endif /* __has_builtin(__builtin_addcll) */ +} +#endif /* add64carry_next */ + +#ifndef add64carry_last +static __maybe_unused __always_inline void +add64carry_last(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { +#if __has_builtin(__builtin_addcll) + unsigned long long carryout; + *sum = __builtin_addcll(base, addend, carry, &carryout); + (void)carryout; +#else + *sum = base + addend + carry; +#endif /* __has_builtin(__builtin_addcll) */ +} +#endif /* add64carry_last */ + +#ifndef mul_64x64_128 +static __maybe_unused __always_inline uint64_t mul_64x64_128(uint64_t a, + uint64_t b, + uint64_t *h) { #if (defined(__SIZEOF_INT128__) || \ (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)) && \ (!defined(__LCC__) || __LCC__ != 124) - __uint128_t r = (__uint128_t)a * (__uint128_t)b; - /* modern GCC could nicely optimize this */ - *h = (uint64_t)(r >> 64); - return (uint64_t)r; -#elif defined(mul_64x64_high) - *h = mul_64x64_high(a, b); - return a * b; -#else - /* performs 64x64 to 128 bit multiplication */ - const uint64_t ll = mul_32x32_64((uint32_t)a, (uint32_t)b); - const uint64_t lh = mul_32x32_64(a >> 32, (uint32_t)b); - const uint64_t hl = mul_32x32_64((uint32_t)a, b >> 32); - const uint64_t hh = mul_32x32_64(a >> 32, b >> 32); - - /* Few simplification are possible here for 32-bit architectures, - * but thus we would lost compatibility with the original 64-bit - * version. Think is very bad idea, because then 32-bit t1ha will - * still (relatively) very slowly and well yet not compatible. */ - uint64_t l; - add64carry_last(add64carry_first(ll, lh << 32, &l), hh, lh >> 32, h); - add64carry_last(add64carry_first(l, hl << 32, &l), *h, hl >> 32, h); - return l; -#endif -} -#endif /* mul_64x64_128() */ - -#ifndef mul_64x64_high -static __maybe_unused __always_inline uint64_t mul_64x64_high(uint64_t a, - uint64_t b) { - uint64_t h; - mul_64x64_128(a, b, &h); - return h; -} -#endif /* mul_64x64_high */ - -/***************************************************************************/ - -/* 'magic' primes */ -static const uint64_t prime_0 = UINT64_C(0xEC99BF0D8372CAAB); -static const uint64_t prime_1 = UINT64_C(0x82434FE90EDCEF39); -static const uint64_t prime_2 = UINT64_C(0xD4F06DB99D67BE4B); -static const uint64_t prime_3 = UINT64_C(0xBD9CACC22C6E9571); -static const uint64_t prime_4 = UINT64_C(0x9C06FAF4D023E3AB); -static const uint64_t prime_5 = UINT64_C(0xC060724A8424F345); -static const uint64_t prime_6 = UINT64_C(0xCB5AF53AE3AAAC31); - -/* xor high and low parts of full 128-bit product */ -static __maybe_unused __always_inline uint64_t mux64(uint64_t v, - uint64_t prime) { - uint64_t l, h; - l = mul_64x64_128(v, prime, &h); - return l ^ h; -} - + __uint128_t r = (__uint128_t)a * (__uint128_t)b; + /* modern GCC could nicely optimize this */ + *h = (uint64_t)(r >> 64); + return (uint64_t)r; +#elif defined(mul_64x64_high) + *h = mul_64x64_high(a, b); + return a * b; +#else + /* performs 64x64 to 128 bit multiplication */ + const uint64_t ll = mul_32x32_64((uint32_t)a, (uint32_t)b); + const uint64_t lh = mul_32x32_64(a >> 32, (uint32_t)b); + const uint64_t hl = mul_32x32_64((uint32_t)a, b >> 32); + const uint64_t hh = mul_32x32_64(a >> 32, b >> 32); + + /* Few simplification are possible here for 32-bit architectures, + * but thus we would lost compatibility with the original 64-bit + * version. Think is very bad idea, because then 32-bit t1ha will + * still (relatively) very slowly and well yet not compatible. */ + uint64_t l; + add64carry_last(add64carry_first(ll, lh << 32, &l), hh, lh >> 32, h); + add64carry_last(add64carry_first(l, hl << 32, &l), *h, hl >> 32, h); + return l; +#endif +} +#endif /* mul_64x64_128() */ + +#ifndef mul_64x64_high +static __maybe_unused __always_inline uint64_t mul_64x64_high(uint64_t a, + uint64_t b) { + uint64_t h; + mul_64x64_128(a, b, &h); + return h; +} +#endif /* mul_64x64_high */ + +/***************************************************************************/ + +/* 'magic' primes */ +static const uint64_t prime_0 = UINT64_C(0xEC99BF0D8372CAAB); +static const uint64_t prime_1 = UINT64_C(0x82434FE90EDCEF39); +static const uint64_t prime_2 = UINT64_C(0xD4F06DB99D67BE4B); +static const uint64_t prime_3 = UINT64_C(0xBD9CACC22C6E9571); +static const uint64_t prime_4 = UINT64_C(0x9C06FAF4D023E3AB); +static const uint64_t prime_5 = UINT64_C(0xC060724A8424F345); +static const uint64_t prime_6 = UINT64_C(0xCB5AF53AE3AAAC31); + +/* xor high and low parts of full 128-bit product */ +static __maybe_unused __always_inline uint64_t mux64(uint64_t v, + uint64_t prime) { + uint64_t l, h; + l = mul_64x64_128(v, prime, &h); + return l ^ h; +} + static __maybe_unused __always_inline uint64_t final64(uint64_t a, uint64_t b) { - uint64_t x = (a + rot64(b, 41)) * prime_0; - uint64_t y = (rot64(a, 23) + b) * prime_6; - return mux64(x ^ y, prime_5); -} - + uint64_t x = (a + rot64(b, 41)) * prime_0; + uint64_t y = (rot64(a, 23) + b) * prime_6; + return mux64(x ^ y, prime_5); +} + static __maybe_unused __always_inline void mixup64(uint64_t *__restrict a, uint64_t *__restrict b, uint64_t v, uint64_t prime) { - uint64_t h; - *a ^= mul_64x64_128(*b + v, prime, &h); - *b += h; -} - -/***************************************************************************/ - -typedef union t1ha_uint128 { -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - __uint128_t v; -#endif - struct { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - uint64_t l, h; -#else - uint64_t h, l; -#endif - }; -} t1ha_uint128_t; - + uint64_t h; + *a ^= mul_64x64_128(*b + v, prime, &h); + *b += h; +} + +/***************************************************************************/ + +typedef union t1ha_uint128 { +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + __uint128_t v; +#endif + struct { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + uint64_t l, h; +#else + uint64_t h, l; +#endif + }; +} t1ha_uint128_t; + static __maybe_unused __always_inline t1ha_uint128_t not128(const t1ha_uint128_t v) { - t1ha_uint128_t r; -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = ~v.v; -#else - r.l = ~v.l; - r.h = ~v.h; -#endif - return r; -} - + t1ha_uint128_t r; +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = ~v.v; +#else + r.l = ~v.l; + r.h = ~v.h; +#endif + return r; +} + static __maybe_unused __always_inline t1ha_uint128_t left128(const t1ha_uint128_t v, unsigned s) { - t1ha_uint128_t r; - assert(s < 128); -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = v.v << s; -#else - r.l = (s < 64) ? v.l << s : 0; - r.h = (s < 64) ? (v.h << s) | (s ? v.l >> (64 - s) : 0) : v.l << (s - 64); -#endif - return r; -} - + t1ha_uint128_t r; + assert(s < 128); +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = v.v << s; +#else + r.l = (s < 64) ? v.l << s : 0; + r.h = (s < 64) ? (v.h << s) | (s ? v.l >> (64 - s) : 0) : v.l << (s - 64); +#endif + return r; +} + static __maybe_unused __always_inline t1ha_uint128_t right128(const t1ha_uint128_t v, unsigned s) { - t1ha_uint128_t r; - assert(s < 128); -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = v.v >> s; -#else - r.l = (s < 64) ? (s ? v.h << (64 - s) : 0) | (v.l >> s) : v.h >> (s - 64); - r.h = (s < 64) ? v.h >> s : 0; -#endif - return r; -} - + t1ha_uint128_t r; + assert(s < 128); +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = v.v >> s; +#else + r.l = (s < 64) ? (s ? v.h << (64 - s) : 0) | (v.l >> s) : v.h >> (s - 64); + r.h = (s < 64) ? v.h >> s : 0; +#endif + return r; +} + static __maybe_unused __always_inline t1ha_uint128_t or128(t1ha_uint128_t x, t1ha_uint128_t y) { - t1ha_uint128_t r; -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = x.v | y.v; -#else - r.l = x.l | y.l; - r.h = x.h | y.h; -#endif - return r; -} - + t1ha_uint128_t r; +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = x.v | y.v; +#else + r.l = x.l | y.l; + r.h = x.h | y.h; +#endif + return r; +} + static __maybe_unused __always_inline t1ha_uint128_t xor128(t1ha_uint128_t x, t1ha_uint128_t y) { - t1ha_uint128_t r; -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = x.v ^ y.v; -#else - r.l = x.l ^ y.l; - r.h = x.h ^ y.h; -#endif - return r; -} - + t1ha_uint128_t r; +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = x.v ^ y.v; +#else + r.l = x.l ^ y.l; + r.h = x.h ^ y.h; +#endif + return r; +} + static __maybe_unused __always_inline t1ha_uint128_t rot128(t1ha_uint128_t v, unsigned s) { - s &= 127; -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - v.v = (v.v << (128 - s)) | (v.v >> s); - return v; -#else - return s ? or128(left128(v, 128 - s), right128(v, s)) : v; -#endif -} - + s &= 127; +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + v.v = (v.v << (128 - s)) | (v.v >> s); + return v; +#else + return s ? or128(left128(v, 128 - s), right128(v, s)) : v; +#endif +} + static __maybe_unused __always_inline t1ha_uint128_t add128(t1ha_uint128_t x, t1ha_uint128_t y) { - t1ha_uint128_t r; -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = x.v + y.v; -#else - add64carry_last(add64carry_first(x.l, y.l, &r.l), x.h, y.h, &r.h); -#endif - return r; -} - + t1ha_uint128_t r; +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = x.v + y.v; +#else + add64carry_last(add64carry_first(x.l, y.l, &r.l), x.h, y.h, &r.h); +#endif + return r; +} + static __maybe_unused __always_inline t1ha_uint128_t mul128(t1ha_uint128_t x, t1ha_uint128_t y) { - t1ha_uint128_t r; -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = x.v * y.v; -#else - r.l = mul_64x64_128(x.l, y.l, &r.h); - r.h += x.l * y.h + y.l * x.h; -#endif - return r; -} - -/***************************************************************************/ - -#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) -uint64_t t1ha_ia32cpu_features(void); - + t1ha_uint128_t r; +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = x.v * y.v; +#else + r.l = mul_64x64_128(x.l, y.l, &r.h); + r.h += x.l * y.h + y.l * x.h; +#endif + return r; +} + +/***************************************************************************/ + +#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) +uint64_t t1ha_ia32cpu_features(void); + static __maybe_unused __always_inline bool t1ha_ia32_AESNI_avail(uint64_t ia32cpu_features) { - /* check for AES-NI */ - return (ia32cpu_features & UINT32_C(0x02000000)) != 0; -} - + /* check for AES-NI */ + return (ia32cpu_features & UINT32_C(0x02000000)) != 0; +} + static __maybe_unused __always_inline bool t1ha_ia32_AVX_avail(uint64_t ia32cpu_features) { - /* check for any AVX */ - return (ia32cpu_features & UINT32_C(0x1A000000)) == UINT32_C(0x1A000000); -} - + /* check for any AVX */ + return (ia32cpu_features & UINT32_C(0x1A000000)) == UINT32_C(0x1A000000); +} + static __maybe_unused __always_inline bool t1ha_ia32_AVX2_avail(uint64_t ia32cpu_features) { - /* check for 'Advanced Vector Extensions 2' */ - return ((ia32cpu_features >> 32) & 32) != 0; -} - -#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ + /* check for 'Advanced Vector Extensions 2' */ + return ((ia32cpu_features >> 32) & 32) != 0; +} + +#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ diff --git a/contrib/libs/t1ha/src/t1ha_selfcheck.c b/contrib/libs/t1ha/src/t1ha_selfcheck.c index b92eb948a3..ee9394bf3b 100644 --- a/contrib/libs/t1ha/src/t1ha_selfcheck.c +++ b/contrib/libs/t1ha/src/t1ha_selfcheck.c @@ -1,98 +1,98 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#include "t1ha_selfcheck.h" -#include "t1ha_bits.h" - -const uint8_t t1ha_test_pattern[64] = { - 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x7F, 0x3F, - 0x1F, 0xF, 8, 16, 32, 64, 0x80, 0xFE, 0xFC, 0xF8, 0xF0, - 0xE0, 0xC0, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x55, 0xAA, 11, - 17, 19, 23, 29, 37, 42, 43, 'a', 'b', 'c', 'd', - 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x'}; - -static __inline bool probe(uint64_t (*hash)(const void *, size_t, uint64_t), - const uint64_t reference, const void *data, - unsigned len, uint64_t seed) { - const uint64_t actual = hash(data, len, seed); - assert(actual == reference); - return actual != reference; -} - -__cold int t1ha_selfcheck(uint64_t (*hash)(const void *, size_t, uint64_t), - const uint64_t *reference_values) { - bool failed = false; - - const uint64_t zero = 0; - failed |= probe(hash, /* empty-zero */ *reference_values++, NULL, 0, zero); - failed |= probe(hash, /* empty-all1 */ *reference_values++, NULL, 0, ~zero); - failed |= probe(hash, /* bin64-zero */ *reference_values++, t1ha_test_pattern, - 64, zero); - - uint64_t seed = 1; - for (int i = 1; i < 64; i++) { - /* bin%i-1p%i */ - failed |= probe(hash, *reference_values++, t1ha_test_pattern, i, seed); - seed <<= 1; - } - - seed = ~zero; - for (int i = 1; i <= 7; i++) { - seed <<= 1; - /* align%i_F%i */; - failed |= - probe(hash, *reference_values++, t1ha_test_pattern + i, 64 - i, seed); - } - - uint8_t pattern_long[512]; - for (size_t i = 0; i < sizeof(pattern_long); ++i) - pattern_long[i] = (uint8_t)i; - for (int i = 0; i <= 7; i++) { - /* long-%05i */ - failed |= - probe(hash, *reference_values++, pattern_long + i, 128 + i * 17, seed); - } - - return failed ? -1 : 0; -} + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#include "t1ha_selfcheck.h" +#include "t1ha_bits.h" + +const uint8_t t1ha_test_pattern[64] = { + 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x7F, 0x3F, + 0x1F, 0xF, 8, 16, 32, 64, 0x80, 0xFE, 0xFC, 0xF8, 0xF0, + 0xE0, 0xC0, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x55, 0xAA, 11, + 17, 19, 23, 29, 37, 42, 43, 'a', 'b', 'c', 'd', + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x'}; + +static __inline bool probe(uint64_t (*hash)(const void *, size_t, uint64_t), + const uint64_t reference, const void *data, + unsigned len, uint64_t seed) { + const uint64_t actual = hash(data, len, seed); + assert(actual == reference); + return actual != reference; +} + +__cold int t1ha_selfcheck(uint64_t (*hash)(const void *, size_t, uint64_t), + const uint64_t *reference_values) { + bool failed = false; + + const uint64_t zero = 0; + failed |= probe(hash, /* empty-zero */ *reference_values++, NULL, 0, zero); + failed |= probe(hash, /* empty-all1 */ *reference_values++, NULL, 0, ~zero); + failed |= probe(hash, /* bin64-zero */ *reference_values++, t1ha_test_pattern, + 64, zero); + + uint64_t seed = 1; + for (int i = 1; i < 64; i++) { + /* bin%i-1p%i */ + failed |= probe(hash, *reference_values++, t1ha_test_pattern, i, seed); + seed <<= 1; + } + + seed = ~zero; + for (int i = 1; i <= 7; i++) { + seed <<= 1; + /* align%i_F%i */; + failed |= + probe(hash, *reference_values++, t1ha_test_pattern + i, 64 - i, seed); + } + + uint8_t pattern_long[512]; + for (size_t i = 0; i < sizeof(pattern_long); ++i) + pattern_long[i] = (uint8_t)i; + for (int i = 0; i <= 7; i++) { + /* long-%05i */ + failed |= + probe(hash, *reference_values++, pattern_long + i, 128 + i * 17, seed); + } + + return failed ? -1 : 0; +} diff --git a/contrib/libs/t1ha/src/t1ha_selfcheck.h b/contrib/libs/t1ha/src/t1ha_selfcheck.h index 043f5e6a2d..e83cd2417d 100644 --- a/contrib/libs/t1ha/src/t1ha_selfcheck.h +++ b/contrib/libs/t1ha/src/t1ha_selfcheck.h @@ -1,76 +1,76 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#pragma once -#if defined(_MSC_VER) && _MSC_VER > 1800 -#pragma warning(disable : 4464) /* relative include path contains '..' */ -#endif /* MSVC */ -#include "../t1ha.h" - -/***************************************************************************/ -/* Self-checking */ - -extern const uint8_t t1ha_test_pattern[64]; -int t1ha_selfcheck(uint64_t (*hash)(const void *, size_t, uint64_t), - const uint64_t *reference_values); - -#ifndef T1HA2_DISABLED -extern const uint64_t t1ha_refval_2atonce[81]; -extern const uint64_t t1ha_refval_2atonce128[81]; -extern const uint64_t t1ha_refval_2stream[81]; -extern const uint64_t t1ha_refval_2stream128[81]; -#endif /* T1HA2_DISABLED */ - -#ifndef T1HA1_DISABLED -extern const uint64_t t1ha_refval_64le[81]; -extern const uint64_t t1ha_refval_64be[81]; -#endif /* T1HA1_DISABLED */ - -#ifndef T1HA0_DISABLED -extern const uint64_t t1ha_refval_32le[81]; -extern const uint64_t t1ha_refval_32be[81]; -#if T1HA0_AESNI_AVAILABLE -extern const uint64_t t1ha_refval_ia32aes_a[81]; -extern const uint64_t t1ha_refval_ia32aes_b[81]; -#endif /* T1HA0_AESNI_AVAILABLE */ -#endif /* T1HA0_DISABLED */ + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#pragma once +#if defined(_MSC_VER) && _MSC_VER > 1800 +#pragma warning(disable : 4464) /* relative include path contains '..' */ +#endif /* MSVC */ +#include "../t1ha.h" + +/***************************************************************************/ +/* Self-checking */ + +extern const uint8_t t1ha_test_pattern[64]; +int t1ha_selfcheck(uint64_t (*hash)(const void *, size_t, uint64_t), + const uint64_t *reference_values); + +#ifndef T1HA2_DISABLED +extern const uint64_t t1ha_refval_2atonce[81]; +extern const uint64_t t1ha_refval_2atonce128[81]; +extern const uint64_t t1ha_refval_2stream[81]; +extern const uint64_t t1ha_refval_2stream128[81]; +#endif /* T1HA2_DISABLED */ + +#ifndef T1HA1_DISABLED +extern const uint64_t t1ha_refval_64le[81]; +extern const uint64_t t1ha_refval_64be[81]; +#endif /* T1HA1_DISABLED */ + +#ifndef T1HA0_DISABLED +extern const uint64_t t1ha_refval_32le[81]; +extern const uint64_t t1ha_refval_32be[81]; +#if T1HA0_AESNI_AVAILABLE +extern const uint64_t t1ha_refval_ia32aes_a[81]; +extern const uint64_t t1ha_refval_ia32aes_b[81]; +#endif /* T1HA0_AESNI_AVAILABLE */ +#endif /* T1HA0_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha_selfcheck_all.c b/contrib/libs/t1ha/src/t1ha_selfcheck_all.c index ea0a46b068..f916cef716 100644 --- a/contrib/libs/t1ha/src/t1ha_selfcheck_all.c +++ b/contrib/libs/t1ha/src/t1ha_selfcheck_all.c @@ -1,63 +1,63 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -__cold int t1ha_selfcheck__all_enabled(void) { - int rc = 0; - -#ifndef T1HA2_DISABLED - rc |= t1ha_selfcheck__t1ha2(); -#endif /* T1HA2_DISABLED */ - -#ifndef T1HA1_DISABLED - rc |= t1ha_selfcheck__t1ha1(); -#endif /* T1HA1_DISABLED */ - -#ifndef T1HA0_DISABLED - rc |= t1ha_selfcheck__t1ha0(); -#endif /* T1HA0_DISABLED */ - - return rc; -} + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +__cold int t1ha_selfcheck__all_enabled(void) { + int rc = 0; + +#ifndef T1HA2_DISABLED + rc |= t1ha_selfcheck__t1ha2(); +#endif /* T1HA2_DISABLED */ + +#ifndef T1HA1_DISABLED + rc |= t1ha_selfcheck__t1ha1(); +#endif /* T1HA1_DISABLED */ + +#ifndef T1HA0_DISABLED + rc |= t1ha_selfcheck__t1ha0(); +#endif /* T1HA0_DISABLED */ + + return rc; +} diff --git a/contrib/libs/t1ha/t1ha.h b/contrib/libs/t1ha/t1ha.h index 0f52ccc5a3..9bb8d74496 100644 --- a/contrib/libs/t1ha/t1ha.h +++ b/contrib/libs/t1ha/t1ha.h @@ -1,388 +1,388 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#pragma once - -/***************************************************************************** - * - * PLEASE PAY ATTENTION TO THE FOLLOWING NOTES - * about macros definitions which controls t1ha behaviour and/or performance. - * - * - * 1) T1HA_SYS_UNALIGNED_ACCESS = Defines the system/platform/CPU/architecture - * abilities for unaligned data access. - * - * By default, when the T1HA_SYS_UNALIGNED_ACCESS not defined, - * it will defined on the basis hardcoded knowledge about of capabilities - * of most common CPU architectures. But you could override this - * default behavior when build t1ha library itself: - * - * // To disable unaligned access at all. - * #define T1HA_SYS_UNALIGNED_ACCESS 0 - * + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#pragma once + +/***************************************************************************** + * + * PLEASE PAY ATTENTION TO THE FOLLOWING NOTES + * about macros definitions which controls t1ha behaviour and/or performance. + * + * + * 1) T1HA_SYS_UNALIGNED_ACCESS = Defines the system/platform/CPU/architecture + * abilities for unaligned data access. + * + * By default, when the T1HA_SYS_UNALIGNED_ACCESS not defined, + * it will defined on the basis hardcoded knowledge about of capabilities + * of most common CPU architectures. But you could override this + * default behavior when build t1ha library itself: + * + * // To disable unaligned access at all. + * #define T1HA_SYS_UNALIGNED_ACCESS 0 + * * // To enable unaligned access, but indicate that it significantly slow. - * #define T1HA_SYS_UNALIGNED_ACCESS 1 - * - * // To enable unaligned access, and indicate that it effecient. - * #define T1HA_SYS_UNALIGNED_ACCESS 2 - * - * - * 2) T1HA_USE_FAST_ONESHOT_READ = Controls the data reads at the end of buffer. - * - * When defined to non-zero, t1ha will use 'one shot' method for reading - * up to 8 bytes at the end of data. In this case just the one 64-bit read - * will be performed even when the available less than 8 bytes. - * - * This is little bit faster that switching by length of data tail. - * Unfortunately this will triggering a false-positive alarms from Valgrind, - * AddressSanitizer and other similar tool. - * - * By default, t1ha defines it to 1, but you could override this - * default behavior when build t1ha library itself: - * - * // For little bit faster and small code. - * #define T1HA_USE_FAST_ONESHOT_READ 1 - * - * // For calmness if doubt. - * #define T1HA_USE_FAST_ONESHOT_READ 0 - * - * - * 3) T1HA0_RUNTIME_SELECT = Controls choice fastest function in runtime. - * - * t1ha library offers the t1ha0() function as the fastest for current CPU. - * But actual CPU's features/capabilities and may be significantly different, - * especially on x86 platform. Therefore, internally, t1ha0() may require - * dynamic dispatching for choice best implementation. - * - * By default, t1ha enables such runtime choice and (may be) corresponding - * indirect calls if it reasonable, but you could override this default - * behavior when build t1ha library itself: - * - * // To enable runtime choice of fastest implementation. - * #define T1HA0_RUNTIME_SELECT 1 - * - * // To disable runtime choice of fastest implementation. - * #define T1HA0_RUNTIME_SELECT 0 - * - * When T1HA0_RUNTIME_SELECT is nonzero the t1ha0_resolve() function could - * be used to get actual t1ha0() implementation address at runtime. This is - * useful for two cases: - * - calling by local pointer-to-function usually is little - * bit faster (less overhead) than via a PLT thru the DSO boundary. - * - GNU Indirect functions (see below) don't supported by environment - * and calling by t1ha0_funcptr is not available and/or expensive. - * - * 4) T1HA_USE_INDIRECT_FUNCTIONS = Controls usage of GNU Indirect functions. - * - * In continue of T1HA0_RUNTIME_SELECT the T1HA_USE_INDIRECT_FUNCTIONS - * controls usage of ELF indirect functions feature. In general, when - * available, this reduces overhead of indirect function's calls though - * a DSO-bundary (https://sourceware.org/glibc/wiki/GNU_IFUNC). - * - * By default, t1ha engage GNU Indirect functions when it available - * and useful, but you could override this default behavior when build - * t1ha library itself: - * - * // To enable use of GNU ELF Indirect functions. - * #define T1HA_USE_INDIRECT_FUNCTIONS 1 - * - * // To disable use of GNU ELF Indirect functions. This may be useful - * // if the actual toolchain or the system's loader don't support ones. - * #define T1HA_USE_INDIRECT_FUNCTIONS 0 - * - * 5) T1HA0_AESNI_AVAILABLE = Controls AES-NI detection and dispatching on x86. - * - * In continue of T1HA0_RUNTIME_SELECT the T1HA0_AESNI_AVAILABLE controls - * detection and usage of AES-NI CPU's feature. On the other hand, this - * requires compiling parts of t1ha library with certain properly options, - * and could be difficult or inconvenient in some cases. - * - * By default, t1ha engade AES-NI for t1ha0() on the x86 platform, but - * you could override this default behavior when build t1ha library itself: - * - * // To disable detection and usage of AES-NI instructions for t1ha0(). - * // This may be useful when you unable to build t1ha library properly - * // or known that AES-NI will be unavailable at the deploy. - * #define T1HA0_AESNI_AVAILABLE 0 - * - * // To force detection and usage of AES-NI instructions for t1ha0(), - * // but I don't known reasons to anybody would need this. - * #define T1HA0_AESNI_AVAILABLE 1 - * - * 6) T1HA0_DISABLED, T1HA1_DISABLED, T1HA2_DISABLED = Controls availability of - * t1ha functions. - * - * In some cases could be useful to import/use only few of t1ha functions - * or just the one. So, this definitions allows disable corresponding parts - * of t1ha library. - * - * // To disable t1ha0(), t1ha0_32le(), t1ha0_32be() and all AES-NI. - * #define T1HA0_DISABLED - * - * // To disable t1ha1_le() and t1ha1_be(). - * #define T1HA1_DISABLED - * - * // To disable t1ha2_atonce(), t1ha2_atonce128() and so on. - * #define T1HA2_DISABLED - * - *****************************************************************************/ - -#define T1HA_VERSION_MAJOR 2 -#define T1HA_VERSION_MINOR 1 -#define T1HA_VERSION_RELEASE 1 - -#ifndef __has_attribute -#define __has_attribute(x) (0) -#endif - -#ifndef __has_include -#define __has_include(x) (0) -#endif - -#ifndef __GNUC_PREREQ -#if defined(__GNUC__) && defined(__GNUC_MINOR__) -#define __GNUC_PREREQ(maj, min) \ - ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) -#else -#define __GNUC_PREREQ(maj, min) 0 -#endif -#endif /* __GNUC_PREREQ */ - -#ifndef __CLANG_PREREQ -#ifdef __clang__ -#define __CLANG_PREREQ(maj, min) \ - ((__clang_major__ << 16) + __clang_minor__ >= ((maj) << 16) + (min)) -#else -#define __CLANG_PREREQ(maj, min) (0) -#endif -#endif /* __CLANG_PREREQ */ - -#ifndef __LCC_PREREQ -#ifdef __LCC__ -#define __LCC_PREREQ(maj, min) \ - ((__LCC__ << 16) + __LCC_MINOR__ >= ((maj) << 16) + (min)) -#else -#define __LCC_PREREQ(maj, min) (0) -#endif -#endif /* __LCC_PREREQ */ - -/*****************************************************************************/ - -#ifdef _MSC_VER -/* Avoid '16' bytes padding added after data member 't1ha_context::total' - * and other warnings from std-headers if warning-level > 3. */ -#pragma warning(push, 3) -#endif - -#if defined(__cplusplus) && __cplusplus >= 201103L -#include <climits> -#include <cstddef> -#include <cstdint> -#else -#include <limits.h> -#include <stddef.h> -#include <stdint.h> -#endif - -/*****************************************************************************/ - -#if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || \ - defined(i486) || defined(__i486) || defined(__i486__) || \ - defined(i586) | defined(__i586) || defined(__i586__) || defined(i686) || \ - defined(__i686) || defined(__i686__) || defined(_M_IX86) || \ - defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) || \ - defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) || \ - defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \ - defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__) -#ifndef __ia32__ -/* LY: define neutral __ia32__ for x86 and x86-64 archs */ -#define __ia32__ 1 -#endif /* __ia32__ */ -#if !defined(__amd64__) && (defined(__x86_64) || defined(__x86_64__) || \ - defined(__amd64) || defined(_M_X64)) -/* LY: define trusty __amd64__ for all AMD64/x86-64 arch */ -#define __amd64__ 1 -#endif /* __amd64__ */ -#endif /* all x86 */ - -#if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || \ - !defined(__ORDER_BIG_ENDIAN__) - -/* *INDENT-OFF* */ -/* clang-format off */ - -#if defined(__GLIBC__) || defined(__GNU_LIBRARY__) || defined(__ANDROID__) || \ - defined(HAVE_ENDIAN_H) || __has_include(<endian.h>) -#include <endian.h> -#elif defined(__APPLE__) || defined(__MACH__) || defined(__OpenBSD__) || \ - defined(HAVE_MACHINE_ENDIAN_H) || __has_include(<machine/endian.h>) -#include <machine/endian.h> -#elif defined(HAVE_SYS_ISA_DEFS_H) || __has_include(<sys/isa_defs.h>) -#include <sys/isa_defs.h> -#elif (defined(HAVE_SYS_TYPES_H) && defined(HAVE_SYS_ENDIAN_H)) || \ - (__has_include(<sys/types.h>) && __has_include(<sys/endian.h>)) -#include <sys/endian.h> -#include <sys/types.h> -#elif defined(__bsdi__) || defined(__DragonFly__) || defined(__FreeBSD__) || \ - defined(__NETBSD__) || defined(__NetBSD__) || \ - defined(HAVE_SYS_PARAM_H) || __has_include(<sys/param.h>) -#include <sys/param.h> -#endif /* OS */ - -/* *INDENT-ON* */ -/* clang-format on */ - -#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN) -#define __ORDER_LITTLE_ENDIAN__ __LITTLE_ENDIAN -#define __ORDER_BIG_ENDIAN__ __BIG_ENDIAN -#define __BYTE_ORDER__ __BYTE_ORDER -#elif defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN) -#define __ORDER_LITTLE_ENDIAN__ _LITTLE_ENDIAN -#define __ORDER_BIG_ENDIAN__ _BIG_ENDIAN -#define __BYTE_ORDER__ _BYTE_ORDER -#else -#define __ORDER_LITTLE_ENDIAN__ 1234 -#define __ORDER_BIG_ENDIAN__ 4321 - -#if defined(__LITTLE_ENDIAN__) || \ - (defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) || \ - defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) || \ - defined(__MIPSEL__) || defined(_MIPSEL) || defined(__MIPSEL) || \ - defined(_M_ARM) || defined(_M_ARM64) || defined(__e2k__) || \ - defined(__elbrus_4c__) || defined(__elbrus_8c__) || defined(__bfin__) || \ - defined(__BFIN__) || defined(__ia64__) || defined(_IA64) || \ - defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || \ - defined(__itanium__) || defined(__ia32__) || defined(__CYGWIN__) || \ - defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) || \ - defined(__WINDOWS__) -#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ - -#elif defined(__BIG_ENDIAN__) || \ - (defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) || \ - defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || \ - defined(__MIPSEB__) || defined(_MIPSEB) || defined(__MIPSEB) || \ - defined(__m68k__) || defined(M68000) || defined(__hppa__) || \ - defined(__hppa) || defined(__HPPA__) || defined(__sparc__) || \ - defined(__sparc) || defined(__370__) || defined(__THW_370__) || \ - defined(__s390__) || defined(__s390x__) || defined(__SYSC_ZARCH__) -#define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__ - -#else -#error __BYTE_ORDER__ should be defined. -#endif /* Arch */ - -#endif -#endif /* __BYTE_ORDER__ || __ORDER_LITTLE_ENDIAN__ || __ORDER_BIG_ENDIAN__ */ - -/*****************************************************************************/ - -#ifndef __dll_export -#if defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) -#if defined(__GNUC__) || __has_attribute(dllexport) -#define __dll_export __attribute__((dllexport)) -#else -#define __dll_export __declspec(dllexport) -#endif + * #define T1HA_SYS_UNALIGNED_ACCESS 1 + * + * // To enable unaligned access, and indicate that it effecient. + * #define T1HA_SYS_UNALIGNED_ACCESS 2 + * + * + * 2) T1HA_USE_FAST_ONESHOT_READ = Controls the data reads at the end of buffer. + * + * When defined to non-zero, t1ha will use 'one shot' method for reading + * up to 8 bytes at the end of data. In this case just the one 64-bit read + * will be performed even when the available less than 8 bytes. + * + * This is little bit faster that switching by length of data tail. + * Unfortunately this will triggering a false-positive alarms from Valgrind, + * AddressSanitizer and other similar tool. + * + * By default, t1ha defines it to 1, but you could override this + * default behavior when build t1ha library itself: + * + * // For little bit faster and small code. + * #define T1HA_USE_FAST_ONESHOT_READ 1 + * + * // For calmness if doubt. + * #define T1HA_USE_FAST_ONESHOT_READ 0 + * + * + * 3) T1HA0_RUNTIME_SELECT = Controls choice fastest function in runtime. + * + * t1ha library offers the t1ha0() function as the fastest for current CPU. + * But actual CPU's features/capabilities and may be significantly different, + * especially on x86 platform. Therefore, internally, t1ha0() may require + * dynamic dispatching for choice best implementation. + * + * By default, t1ha enables such runtime choice and (may be) corresponding + * indirect calls if it reasonable, but you could override this default + * behavior when build t1ha library itself: + * + * // To enable runtime choice of fastest implementation. + * #define T1HA0_RUNTIME_SELECT 1 + * + * // To disable runtime choice of fastest implementation. + * #define T1HA0_RUNTIME_SELECT 0 + * + * When T1HA0_RUNTIME_SELECT is nonzero the t1ha0_resolve() function could + * be used to get actual t1ha0() implementation address at runtime. This is + * useful for two cases: + * - calling by local pointer-to-function usually is little + * bit faster (less overhead) than via a PLT thru the DSO boundary. + * - GNU Indirect functions (see below) don't supported by environment + * and calling by t1ha0_funcptr is not available and/or expensive. + * + * 4) T1HA_USE_INDIRECT_FUNCTIONS = Controls usage of GNU Indirect functions. + * + * In continue of T1HA0_RUNTIME_SELECT the T1HA_USE_INDIRECT_FUNCTIONS + * controls usage of ELF indirect functions feature. In general, when + * available, this reduces overhead of indirect function's calls though + * a DSO-bundary (https://sourceware.org/glibc/wiki/GNU_IFUNC). + * + * By default, t1ha engage GNU Indirect functions when it available + * and useful, but you could override this default behavior when build + * t1ha library itself: + * + * // To enable use of GNU ELF Indirect functions. + * #define T1HA_USE_INDIRECT_FUNCTIONS 1 + * + * // To disable use of GNU ELF Indirect functions. This may be useful + * // if the actual toolchain or the system's loader don't support ones. + * #define T1HA_USE_INDIRECT_FUNCTIONS 0 + * + * 5) T1HA0_AESNI_AVAILABLE = Controls AES-NI detection and dispatching on x86. + * + * In continue of T1HA0_RUNTIME_SELECT the T1HA0_AESNI_AVAILABLE controls + * detection and usage of AES-NI CPU's feature. On the other hand, this + * requires compiling parts of t1ha library with certain properly options, + * and could be difficult or inconvenient in some cases. + * + * By default, t1ha engade AES-NI for t1ha0() on the x86 platform, but + * you could override this default behavior when build t1ha library itself: + * + * // To disable detection and usage of AES-NI instructions for t1ha0(). + * // This may be useful when you unable to build t1ha library properly + * // or known that AES-NI will be unavailable at the deploy. + * #define T1HA0_AESNI_AVAILABLE 0 + * + * // To force detection and usage of AES-NI instructions for t1ha0(), + * // but I don't known reasons to anybody would need this. + * #define T1HA0_AESNI_AVAILABLE 1 + * + * 6) T1HA0_DISABLED, T1HA1_DISABLED, T1HA2_DISABLED = Controls availability of + * t1ha functions. + * + * In some cases could be useful to import/use only few of t1ha functions + * or just the one. So, this definitions allows disable corresponding parts + * of t1ha library. + * + * // To disable t1ha0(), t1ha0_32le(), t1ha0_32be() and all AES-NI. + * #define T1HA0_DISABLED + * + * // To disable t1ha1_le() and t1ha1_be(). + * #define T1HA1_DISABLED + * + * // To disable t1ha2_atonce(), t1ha2_atonce128() and so on. + * #define T1HA2_DISABLED + * + *****************************************************************************/ + +#define T1HA_VERSION_MAJOR 2 +#define T1HA_VERSION_MINOR 1 +#define T1HA_VERSION_RELEASE 1 + +#ifndef __has_attribute +#define __has_attribute(x) (0) +#endif + +#ifndef __has_include +#define __has_include(x) (0) +#endif + +#ifndef __GNUC_PREREQ +#if defined(__GNUC__) && defined(__GNUC_MINOR__) +#define __GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#else +#define __GNUC_PREREQ(maj, min) 0 +#endif +#endif /* __GNUC_PREREQ */ + +#ifndef __CLANG_PREREQ +#ifdef __clang__ +#define __CLANG_PREREQ(maj, min) \ + ((__clang_major__ << 16) + __clang_minor__ >= ((maj) << 16) + (min)) +#else +#define __CLANG_PREREQ(maj, min) (0) +#endif +#endif /* __CLANG_PREREQ */ + +#ifndef __LCC_PREREQ +#ifdef __LCC__ +#define __LCC_PREREQ(maj, min) \ + ((__LCC__ << 16) + __LCC_MINOR__ >= ((maj) << 16) + (min)) +#else +#define __LCC_PREREQ(maj, min) (0) +#endif +#endif /* __LCC_PREREQ */ + +/*****************************************************************************/ + +#ifdef _MSC_VER +/* Avoid '16' bytes padding added after data member 't1ha_context::total' + * and other warnings from std-headers if warning-level > 3. */ +#pragma warning(push, 3) +#endif + +#if defined(__cplusplus) && __cplusplus >= 201103L +#include <climits> +#include <cstddef> +#include <cstdint> +#else +#include <limits.h> +#include <stddef.h> +#include <stdint.h> +#endif + +/*****************************************************************************/ + +#if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || \ + defined(i486) || defined(__i486) || defined(__i486__) || \ + defined(i586) | defined(__i586) || defined(__i586__) || defined(i686) || \ + defined(__i686) || defined(__i686__) || defined(_M_IX86) || \ + defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) || \ + defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) || \ + defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \ + defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__) +#ifndef __ia32__ +/* LY: define neutral __ia32__ for x86 and x86-64 archs */ +#define __ia32__ 1 +#endif /* __ia32__ */ +#if !defined(__amd64__) && (defined(__x86_64) || defined(__x86_64__) || \ + defined(__amd64) || defined(_M_X64)) +/* LY: define trusty __amd64__ for all AMD64/x86-64 arch */ +#define __amd64__ 1 +#endif /* __amd64__ */ +#endif /* all x86 */ + +#if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || \ + !defined(__ORDER_BIG_ENDIAN__) + +/* *INDENT-OFF* */ +/* clang-format off */ + +#if defined(__GLIBC__) || defined(__GNU_LIBRARY__) || defined(__ANDROID__) || \ + defined(HAVE_ENDIAN_H) || __has_include(<endian.h>) +#include <endian.h> +#elif defined(__APPLE__) || defined(__MACH__) || defined(__OpenBSD__) || \ + defined(HAVE_MACHINE_ENDIAN_H) || __has_include(<machine/endian.h>) +#include <machine/endian.h> +#elif defined(HAVE_SYS_ISA_DEFS_H) || __has_include(<sys/isa_defs.h>) +#include <sys/isa_defs.h> +#elif (defined(HAVE_SYS_TYPES_H) && defined(HAVE_SYS_ENDIAN_H)) || \ + (__has_include(<sys/types.h>) && __has_include(<sys/endian.h>)) +#include <sys/endian.h> +#include <sys/types.h> +#elif defined(__bsdi__) || defined(__DragonFly__) || defined(__FreeBSD__) || \ + defined(__NETBSD__) || defined(__NetBSD__) || \ + defined(HAVE_SYS_PARAM_H) || __has_include(<sys/param.h>) +#include <sys/param.h> +#endif /* OS */ + +/* *INDENT-ON* */ +/* clang-format on */ + +#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN) +#define __ORDER_LITTLE_ENDIAN__ __LITTLE_ENDIAN +#define __ORDER_BIG_ENDIAN__ __BIG_ENDIAN +#define __BYTE_ORDER__ __BYTE_ORDER +#elif defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN) +#define __ORDER_LITTLE_ENDIAN__ _LITTLE_ENDIAN +#define __ORDER_BIG_ENDIAN__ _BIG_ENDIAN +#define __BYTE_ORDER__ _BYTE_ORDER +#else +#define __ORDER_LITTLE_ENDIAN__ 1234 +#define __ORDER_BIG_ENDIAN__ 4321 + +#if defined(__LITTLE_ENDIAN__) || \ + (defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)) || \ + defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) || \ + defined(__MIPSEL__) || defined(_MIPSEL) || defined(__MIPSEL) || \ + defined(_M_ARM) || defined(_M_ARM64) || defined(__e2k__) || \ + defined(__elbrus_4c__) || defined(__elbrus_8c__) || defined(__bfin__) || \ + defined(__BFIN__) || defined(__ia64__) || defined(_IA64) || \ + defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || \ + defined(__itanium__) || defined(__ia32__) || defined(__CYGWIN__) || \ + defined(_WIN64) || defined(_WIN32) || defined(__TOS_WIN__) || \ + defined(__WINDOWS__) +#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ + +#elif defined(__BIG_ENDIAN__) || \ + (defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)) || \ + defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || \ + defined(__MIPSEB__) || defined(_MIPSEB) || defined(__MIPSEB) || \ + defined(__m68k__) || defined(M68000) || defined(__hppa__) || \ + defined(__hppa) || defined(__HPPA__) || defined(__sparc__) || \ + defined(__sparc) || defined(__370__) || defined(__THW_370__) || \ + defined(__s390__) || defined(__s390x__) || defined(__SYSC_ZARCH__) +#define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__ + +#else +#error __BYTE_ORDER__ should be defined. +#endif /* Arch */ + +#endif +#endif /* __BYTE_ORDER__ || __ORDER_LITTLE_ENDIAN__ || __ORDER_BIG_ENDIAN__ */ + +/*****************************************************************************/ + +#ifndef __dll_export +#if defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) +#if defined(__GNUC__) || __has_attribute(dllexport) +#define __dll_export __attribute__((dllexport)) +#else +#define __dll_export __declspec(dllexport) +#endif #elif defined(__GNUC__) || __has_attribute(__visibility__) #define __dll_export __attribute__((__visibility__("default"))) -#else -#define __dll_export -#endif -#endif /* __dll_export */ - -#ifndef __dll_import -#if defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) -#if defined(__GNUC__) || __has_attribute(dllimport) -#define __dll_import __attribute__((dllimport)) -#else -#define __dll_import __declspec(dllimport) -#endif +#else +#define __dll_export +#endif +#endif /* __dll_export */ + +#ifndef __dll_import +#if defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) +#if defined(__GNUC__) || __has_attribute(dllimport) +#define __dll_import __attribute__((dllimport)) +#else +#define __dll_import __declspec(dllimport) +#endif #elif defined(__GNUC__) || __has_attribute(__visibility__) #define __dll_import __attribute__((__visibility__("default"))) -#else -#define __dll_import -#endif -#endif /* __dll_import */ - -#ifndef __force_inline -#ifdef _MSC_VER -#define __force_inline __forceinline +#else +#define __dll_import +#endif +#endif /* __dll_import */ + +#ifndef __force_inline +#ifdef _MSC_VER +#define __force_inline __forceinline #elif __GNUC_PREREQ(3, 2) || __has_attribute(__always_inline__) #define __force_inline __inline __attribute__((__always_inline__)) -#else -#define __force_inline __inline -#endif -#endif /* __force_inline */ - -#ifndef T1HA_API -#if defined(t1ha_EXPORTS) -#define T1HA_API __dll_export -#elif defined(t1ha_IMPORTS) -#define T1HA_API __dll_import -#else -#define T1HA_API -#endif -#endif /* T1HA_API */ - -#if defined(_MSC_VER) && defined(__ia32__) -#define T1HA_ALIGN_PREFIX __declspec(align(32)) /* required only for SIMD */ -#else -#define T1HA_ALIGN_PREFIX -#endif /* _MSC_VER */ - -#if defined(__GNUC__) && defined(__ia32__) -#define T1HA_ALIGN_SUFFIX \ +#else +#define __force_inline __inline +#endif +#endif /* __force_inline */ + +#ifndef T1HA_API +#if defined(t1ha_EXPORTS) +#define T1HA_API __dll_export +#elif defined(t1ha_IMPORTS) +#define T1HA_API __dll_import +#else +#define T1HA_API +#endif +#endif /* T1HA_API */ + +#if defined(_MSC_VER) && defined(__ia32__) +#define T1HA_ALIGN_PREFIX __declspec(align(32)) /* required only for SIMD */ +#else +#define T1HA_ALIGN_PREFIX +#endif /* _MSC_VER */ + +#if defined(__GNUC__) && defined(__ia32__) +#define T1HA_ALIGN_SUFFIX \ __attribute__((__aligned__(32))) /* required only for SIMD */ -#else -#define T1HA_ALIGN_SUFFIX -#endif /* GCC x86 */ - -#include <util/system/compiler.h> - -#ifndef T1HA_USE_INDIRECT_FUNCTIONS -/* GNU ELF indirect functions usage control. For more info please see - * https://en.wikipedia.org/wiki/Executable_and_Linkable_Format - * and https://sourceware.org/glibc/wiki/GNU_IFUNC */ +#else +#define T1HA_ALIGN_SUFFIX +#endif /* GCC x86 */ + +#include <util/system/compiler.h> + +#ifndef T1HA_USE_INDIRECT_FUNCTIONS +/* GNU ELF indirect functions usage control. For more info please see + * https://en.wikipedia.org/wiki/Executable_and_Linkable_Format + * and https://sourceware.org/glibc/wiki/GNU_IFUNC */ #if defined(__ELF__) && defined(__amd64__) && \ (__has_attribute(__ifunc__) || \ (!defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && \ @@ -391,331 +391,331 @@ * - ELF AND x86_64 * - attribute(__ifunc__) is available OR * GCC >= 4 WITHOUT -fsanitize=address NOR -fstack-protector-all */ -#define T1HA_USE_INDIRECT_FUNCTIONS 1 -#else -#define T1HA_USE_INDIRECT_FUNCTIONS 0 -#endif -#endif /* T1HA_USE_INDIRECT_FUNCTIONS */ - -#if __GNUC_PREREQ(4, 0) -#pragma GCC visibility push(hidden) -#endif /* __GNUC_PREREQ(4,0) */ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef union T1HA_ALIGN_PREFIX t1ha_state256 { - uint8_t bytes[32]; - uint32_t u32[8]; - uint64_t u64[4]; - struct { - uint64_t a, b, c, d; - } n; -} t1ha_state256_t T1HA_ALIGN_SUFFIX; - -typedef struct t1ha_context { - t1ha_state256_t state; - t1ha_state256_t buffer; - size_t partial; - uint64_t total; -} t1ha_context_t; - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -/****************************************************************************** - * - * Self-testing API. - * - * Unfortunately, some compilers (exactly only Microsoft Visual C/C++) has - * a bugs which leads t1ha-functions to produce wrong results. This API allows - * check the correctness of the actual code in runtime. - * - * All check-functions returns 0 on success, or -1 in case the corresponding - * hash-function failed verification. PLEASE, always perform such checking at - * initialization of your code, if you using MSVC or other troubleful compilers. - */ - -T1HA_API int t1ha_selfcheck__all_enabled(void); - -#ifndef T1HA2_DISABLED -T1HA_API int t1ha_selfcheck__t1ha2_atonce(void); -T1HA_API int t1ha_selfcheck__t1ha2_atonce128(void); -T1HA_API int t1ha_selfcheck__t1ha2_stream(void); -T1HA_API int t1ha_selfcheck__t1ha2(void); -#endif /* T1HA2_DISABLED */ - -#ifndef T1HA1_DISABLED -T1HA_API int t1ha_selfcheck__t1ha1_le(void); -T1HA_API int t1ha_selfcheck__t1ha1_be(void); -T1HA_API int t1ha_selfcheck__t1ha1(void); -#endif /* T1HA1_DISABLED */ - -#ifndef T1HA0_DISABLED -T1HA_API int t1ha_selfcheck__t1ha0_32le(void); -T1HA_API int t1ha_selfcheck__t1ha0_32be(void); -T1HA_API int t1ha_selfcheck__t1ha0(void); - -/* Define T1HA0_AESNI_AVAILABLE to 0 for disable AES-NI support. */ -#ifndef T1HA0_AESNI_AVAILABLE -#if defined(__e2k__) || \ - (defined(__ia32__) && (!defined(_M_IX86) || _MSC_VER > 1800)) -#define T1HA0_AESNI_AVAILABLE 1 -#else -#define T1HA0_AESNI_AVAILABLE 0 -#endif -#endif /* ifndef T1HA0_AESNI_AVAILABLE */ - -#if T1HA0_AESNI_AVAILABLE -T1HA_API int t1ha_selfcheck__t1ha0_ia32aes_noavx(void); -T1HA_API int t1ha_selfcheck__t1ha0_ia32aes_avx(void); -#ifndef __e2k__ -T1HA_API int t1ha_selfcheck__t1ha0_ia32aes_avx2(void); -#endif -#endif /* if T1HA0_AESNI_AVAILABLE */ -#endif /* T1HA0_DISABLED */ - -/****************************************************************************** - * - * t1ha2 = 64 and 128-bit, SLIGHTLY MORE ATTENTION FOR QUALITY AND STRENGTH. - * - * - The recommended version of "Fast Positive Hash" with good quality - * for checksum, hash tables and fingerprinting. - * - Portable and extremely efficiency on modern 64-bit CPUs. - * Designed for 64-bit little-endian platforms, - * in other cases will runs slowly. - * - Great quality of hashing and still faster than other non-t1ha hashes. - * Provides streaming mode and 128-bit result. - * - * Note: Due performance reason 64- and 128-bit results are completely - * different each other, i.e. 64-bit result is NOT any part of 128-bit. - */ -#ifndef T1HA2_DISABLED - -/* The at-once variant with 64-bit result */ -T1HA_API uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed); - -/* The at-once variant with 128-bit result. - * Argument `extra_result` is NOT optional and MUST be valid. - * The high 64-bit part of 128-bit hash will be always unconditionally - * stored to the address given by `extra_result` argument. */ -T1HA_API uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result, - const void *__restrict data, size_t length, - uint64_t seed); - -/* The init/update/final trinity for streaming. - * Return 64 or 128-bit result depentently from `extra_result` argument. */ -T1HA_API void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y); -T1HA_API void t1ha2_update(t1ha_context_t *__restrict ctx, - const void *__restrict data, size_t length); - -/* Argument `extra_result` is optional and MAY be NULL. - * - If `extra_result` is NOT NULL then the 128-bit hash will be calculated, - * and high 64-bit part of it will be stored to the address given - * by `extra_result` argument. - * - Otherwise the 64-bit hash will be calculated - * and returned from function directly. - * - * Note: Due performance reason 64- and 128-bit results are completely - * different each other, i.e. 64-bit result is NOT any part of 128-bit. */ -T1HA_API uint64_t t1ha2_final(t1ha_context_t *__restrict ctx, - uint64_t *__restrict extra_result /* optional */); - -#endif /* T1HA2_DISABLED */ - -/****************************************************************************** - * - * t1ha1 = 64-bit, BASELINE FAST PORTABLE HASH: - * - * - Runs faster on 64-bit platforms in other cases may runs slowly. - * - Portable and stable, returns same 64-bit result - * on all architectures and CPUs. - * - Unfortunately it fails the "strict avalanche criteria", - * see test results at https://github.com/demerphq/smhasher. - * - * This flaw is insignificant for the t1ha1() purposes and imperceptible - * from a practical point of view. - * However, nowadays this issue has resolved in the next t1ha2(), - * that was initially planned to providing a bit more quality. - */ -#ifndef T1HA1_DISABLED - -/* The little-endian variant. */ -T1HA_API uint64_t t1ha1_le(const void *data, size_t length, uint64_t seed); - -/* The big-endian variant. */ -T1HA_API uint64_t t1ha1_be(const void *data, size_t length, uint64_t seed); - -#endif /* T1HA1_DISABLED */ - -/****************************************************************************** - * - * t1ha0 = 64-bit, JUST ONLY FASTER: - * - * - Provides fast-as-possible hashing for current CPU, including - * 32-bit systems and engaging the available hardware acceleration. - * - It is a facade that selects most quick-and-dirty hash - * for the current processor. For instance, on IA32 (x86) actual function - * will be selected in runtime, depending on current CPU capabilities - * - * BE CAREFUL!!! THIS IS MEANS: - * - * 1. The quality of hash is a subject for tradeoffs with performance. - * So, the quality and strength of t1ha0() may be lower than t1ha1(), - * especially on 32-bit targets, but then much faster. - * However, guaranteed that it passes all SMHasher tests. - * - * 2. No warranty that the hash result will be same for particular - * key on another machine or another version of libt1ha. - * - * Briefly, such hash-results and their derivatives, should be - * used only in runtime, but should not be persist or transferred - * over a network. - * - * - * When T1HA0_RUNTIME_SELECT is nonzero the t1ha0_resolve() function could - * be used to get actual t1ha0() implementation address at runtime. This is - * useful for two cases: - * - calling by local pointer-to-function usually is little - * bit faster (less overhead) than via a PLT thru the DSO boundary. - * - GNU Indirect functions (see below) don't supported by environment - * and calling by t1ha0_funcptr is not available and/or expensive. - */ - -#ifndef T1HA0_DISABLED - -/* The little-endian variant for 32-bit CPU. */ -uint64_t t1ha0_32le(const void *data, size_t length, uint64_t seed); -/* The big-endian variant for 32-bit CPU. */ -uint64_t t1ha0_32be(const void *data, size_t length, uint64_t seed); - -/* Define T1HA0_AESNI_AVAILABLE to 0 for disable AES-NI support. */ -#ifndef T1HA0_AESNI_AVAILABLE -#if defined(__e2k__) || \ - (defined(__ia32__) && (!defined(_M_IX86) || _MSC_VER > 1800)) -#define T1HA0_AESNI_AVAILABLE 1 -#else -#define T1HA0_AESNI_AVAILABLE 0 -#endif -#endif /* T1HA0_AESNI_AVAILABLE */ - -/* Define T1HA0_RUNTIME_SELECT to 0 for disable dispatching t1ha0 at runtime. */ -#ifndef T1HA0_RUNTIME_SELECT -#if T1HA0_AESNI_AVAILABLE && !defined(__e2k__) -#define T1HA0_RUNTIME_SELECT 1 -#else -#define T1HA0_RUNTIME_SELECT 0 -#endif -#endif /* T1HA0_RUNTIME_SELECT */ - -#if !T1HA0_RUNTIME_SELECT && !defined(T1HA0_USE_DEFINE) -#if defined(__LCC__) -#define T1HA0_USE_DEFINE 1 -#else -#define T1HA0_USE_DEFINE 0 -#endif -#endif /* T1HA0_USE_DEFINE */ - -#if T1HA0_AESNI_AVAILABLE -uint64_t t1ha0_ia32aes_noavx(const void *data, size_t length, uint64_t seed); -uint64_t t1ha0_ia32aes_avx(const void *data, size_t length, uint64_t seed); -#ifndef __e2k__ -uint64_t t1ha0_ia32aes_avx2(const void *data, size_t length, uint64_t seed); -#endif -#endif /* T1HA0_AESNI_AVAILABLE */ - -#if T1HA0_RUNTIME_SELECT -typedef uint64_t (*t1ha0_function_t)(const void *, size_t, uint64_t); -T1HA_API t1ha0_function_t t1ha0_resolve(void); -#if T1HA_USE_INDIRECT_FUNCTIONS -T1HA_API uint64_t t1ha0(const void *data, size_t length, uint64_t seed); -#else -/* Otherwise function pointer will be used. - * Unfortunately this may cause some overhead calling. */ -T1HA_API extern uint64_t (*t1ha0_funcptr)(const void *data, size_t length, - uint64_t seed); -static __force_inline uint64_t t1ha0(const void *data, size_t length, - uint64_t seed) { - return t1ha0_funcptr(data, length, seed); -} -#endif /* T1HA_USE_INDIRECT_FUNCTIONS */ - -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - -#if T1HA0_USE_DEFINE - -#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ - (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) -#if defined(T1HA1_DISABLED) -#define t1ha0 t1ha2_atonce -#else -#define t1ha0 t1ha1_be -#endif /* T1HA1_DISABLED */ -#else /* 32/64 */ -#define t1ha0 t1ha0_32be -#endif /* 32/64 */ - -#else /* T1HA0_USE_DEFINE */ - -static __force_inline uint64_t t1ha0(const void *data, size_t length, - uint64_t seed) { -#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ - (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) -#if defined(T1HA1_DISABLED) - return t1ha2_atonce(data, length, seed); -#else - return t1ha1_be(data, length, seed); -#endif /* T1HA1_DISABLED */ -#else /* 32/64 */ - return t1ha0_32be(data, length, seed); -#endif /* 32/64 */ -} - -#endif /* !T1HA0_USE_DEFINE */ - -#else /* !T1HA0_RUNTIME_SELECT && __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */ - -#if T1HA0_USE_DEFINE - -#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ - (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) -#if defined(T1HA1_DISABLED) -#define t1ha0 t1ha2_atonce -#else -#define t1ha0 t1ha1_le -#endif /* T1HA1_DISABLED */ -#else /* 32/64 */ -#define t1ha0 t1ha0_32le -#endif /* 32/64 */ - -#else - -static __force_inline uint64_t t1ha0(const void *data, size_t length, - uint64_t seed) { -#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ - (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) -#if defined(T1HA1_DISABLED) - return t1ha2_atonce(data, length, seed); -#else - return t1ha1_le(data, length, seed); -#endif /* T1HA1_DISABLED */ -#else /* 32/64 */ - return t1ha0_32le(data, length, seed); -#endif /* 32/64 */ -} - -#endif /* !T1HA0_USE_DEFINE */ - -#endif /* !T1HA0_RUNTIME_SELECT */ - -#endif /* T1HA0_DISABLED */ - -#ifdef __cplusplus -} -#endif - -#if __GNUC_PREREQ(4, 0) -#pragma GCC visibility pop -#endif /* __GNUC_PREREQ(4,0) */ +#define T1HA_USE_INDIRECT_FUNCTIONS 1 +#else +#define T1HA_USE_INDIRECT_FUNCTIONS 0 +#endif +#endif /* T1HA_USE_INDIRECT_FUNCTIONS */ + +#if __GNUC_PREREQ(4, 0) +#pragma GCC visibility push(hidden) +#endif /* __GNUC_PREREQ(4,0) */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef union T1HA_ALIGN_PREFIX t1ha_state256 { + uint8_t bytes[32]; + uint32_t u32[8]; + uint64_t u64[4]; + struct { + uint64_t a, b, c, d; + } n; +} t1ha_state256_t T1HA_ALIGN_SUFFIX; + +typedef struct t1ha_context { + t1ha_state256_t state; + t1ha_state256_t buffer; + size_t partial; + uint64_t total; +} t1ha_context_t; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +/****************************************************************************** + * + * Self-testing API. + * + * Unfortunately, some compilers (exactly only Microsoft Visual C/C++) has + * a bugs which leads t1ha-functions to produce wrong results. This API allows + * check the correctness of the actual code in runtime. + * + * All check-functions returns 0 on success, or -1 in case the corresponding + * hash-function failed verification. PLEASE, always perform such checking at + * initialization of your code, if you using MSVC or other troubleful compilers. + */ + +T1HA_API int t1ha_selfcheck__all_enabled(void); + +#ifndef T1HA2_DISABLED +T1HA_API int t1ha_selfcheck__t1ha2_atonce(void); +T1HA_API int t1ha_selfcheck__t1ha2_atonce128(void); +T1HA_API int t1ha_selfcheck__t1ha2_stream(void); +T1HA_API int t1ha_selfcheck__t1ha2(void); +#endif /* T1HA2_DISABLED */ + +#ifndef T1HA1_DISABLED +T1HA_API int t1ha_selfcheck__t1ha1_le(void); +T1HA_API int t1ha_selfcheck__t1ha1_be(void); +T1HA_API int t1ha_selfcheck__t1ha1(void); +#endif /* T1HA1_DISABLED */ + +#ifndef T1HA0_DISABLED +T1HA_API int t1ha_selfcheck__t1ha0_32le(void); +T1HA_API int t1ha_selfcheck__t1ha0_32be(void); +T1HA_API int t1ha_selfcheck__t1ha0(void); + +/* Define T1HA0_AESNI_AVAILABLE to 0 for disable AES-NI support. */ +#ifndef T1HA0_AESNI_AVAILABLE +#if defined(__e2k__) || \ + (defined(__ia32__) && (!defined(_M_IX86) || _MSC_VER > 1800)) +#define T1HA0_AESNI_AVAILABLE 1 +#else +#define T1HA0_AESNI_AVAILABLE 0 +#endif +#endif /* ifndef T1HA0_AESNI_AVAILABLE */ + +#if T1HA0_AESNI_AVAILABLE +T1HA_API int t1ha_selfcheck__t1ha0_ia32aes_noavx(void); +T1HA_API int t1ha_selfcheck__t1ha0_ia32aes_avx(void); +#ifndef __e2k__ +T1HA_API int t1ha_selfcheck__t1ha0_ia32aes_avx2(void); +#endif +#endif /* if T1HA0_AESNI_AVAILABLE */ +#endif /* T1HA0_DISABLED */ + +/****************************************************************************** + * + * t1ha2 = 64 and 128-bit, SLIGHTLY MORE ATTENTION FOR QUALITY AND STRENGTH. + * + * - The recommended version of "Fast Positive Hash" with good quality + * for checksum, hash tables and fingerprinting. + * - Portable and extremely efficiency on modern 64-bit CPUs. + * Designed for 64-bit little-endian platforms, + * in other cases will runs slowly. + * - Great quality of hashing and still faster than other non-t1ha hashes. + * Provides streaming mode and 128-bit result. + * + * Note: Due performance reason 64- and 128-bit results are completely + * different each other, i.e. 64-bit result is NOT any part of 128-bit. + */ +#ifndef T1HA2_DISABLED + +/* The at-once variant with 64-bit result */ +T1HA_API uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed); + +/* The at-once variant with 128-bit result. + * Argument `extra_result` is NOT optional and MUST be valid. + * The high 64-bit part of 128-bit hash will be always unconditionally + * stored to the address given by `extra_result` argument. */ +T1HA_API uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result, + const void *__restrict data, size_t length, + uint64_t seed); + +/* The init/update/final trinity for streaming. + * Return 64 or 128-bit result depentently from `extra_result` argument. */ +T1HA_API void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y); +T1HA_API void t1ha2_update(t1ha_context_t *__restrict ctx, + const void *__restrict data, size_t length); + +/* Argument `extra_result` is optional and MAY be NULL. + * - If `extra_result` is NOT NULL then the 128-bit hash will be calculated, + * and high 64-bit part of it will be stored to the address given + * by `extra_result` argument. + * - Otherwise the 64-bit hash will be calculated + * and returned from function directly. + * + * Note: Due performance reason 64- and 128-bit results are completely + * different each other, i.e. 64-bit result is NOT any part of 128-bit. */ +T1HA_API uint64_t t1ha2_final(t1ha_context_t *__restrict ctx, + uint64_t *__restrict extra_result /* optional */); + +#endif /* T1HA2_DISABLED */ + +/****************************************************************************** + * + * t1ha1 = 64-bit, BASELINE FAST PORTABLE HASH: + * + * - Runs faster on 64-bit platforms in other cases may runs slowly. + * - Portable and stable, returns same 64-bit result + * on all architectures and CPUs. + * - Unfortunately it fails the "strict avalanche criteria", + * see test results at https://github.com/demerphq/smhasher. + * + * This flaw is insignificant for the t1ha1() purposes and imperceptible + * from a practical point of view. + * However, nowadays this issue has resolved in the next t1ha2(), + * that was initially planned to providing a bit more quality. + */ +#ifndef T1HA1_DISABLED + +/* The little-endian variant. */ +T1HA_API uint64_t t1ha1_le(const void *data, size_t length, uint64_t seed); + +/* The big-endian variant. */ +T1HA_API uint64_t t1ha1_be(const void *data, size_t length, uint64_t seed); + +#endif /* T1HA1_DISABLED */ + +/****************************************************************************** + * + * t1ha0 = 64-bit, JUST ONLY FASTER: + * + * - Provides fast-as-possible hashing for current CPU, including + * 32-bit systems and engaging the available hardware acceleration. + * - It is a facade that selects most quick-and-dirty hash + * for the current processor. For instance, on IA32 (x86) actual function + * will be selected in runtime, depending on current CPU capabilities + * + * BE CAREFUL!!! THIS IS MEANS: + * + * 1. The quality of hash is a subject for tradeoffs with performance. + * So, the quality and strength of t1ha0() may be lower than t1ha1(), + * especially on 32-bit targets, but then much faster. + * However, guaranteed that it passes all SMHasher tests. + * + * 2. No warranty that the hash result will be same for particular + * key on another machine or another version of libt1ha. + * + * Briefly, such hash-results and their derivatives, should be + * used only in runtime, but should not be persist or transferred + * over a network. + * + * + * When T1HA0_RUNTIME_SELECT is nonzero the t1ha0_resolve() function could + * be used to get actual t1ha0() implementation address at runtime. This is + * useful for two cases: + * - calling by local pointer-to-function usually is little + * bit faster (less overhead) than via a PLT thru the DSO boundary. + * - GNU Indirect functions (see below) don't supported by environment + * and calling by t1ha0_funcptr is not available and/or expensive. + */ + +#ifndef T1HA0_DISABLED + +/* The little-endian variant for 32-bit CPU. */ +uint64_t t1ha0_32le(const void *data, size_t length, uint64_t seed); +/* The big-endian variant for 32-bit CPU. */ +uint64_t t1ha0_32be(const void *data, size_t length, uint64_t seed); + +/* Define T1HA0_AESNI_AVAILABLE to 0 for disable AES-NI support. */ +#ifndef T1HA0_AESNI_AVAILABLE +#if defined(__e2k__) || \ + (defined(__ia32__) && (!defined(_M_IX86) || _MSC_VER > 1800)) +#define T1HA0_AESNI_AVAILABLE 1 +#else +#define T1HA0_AESNI_AVAILABLE 0 +#endif +#endif /* T1HA0_AESNI_AVAILABLE */ + +/* Define T1HA0_RUNTIME_SELECT to 0 for disable dispatching t1ha0 at runtime. */ +#ifndef T1HA0_RUNTIME_SELECT +#if T1HA0_AESNI_AVAILABLE && !defined(__e2k__) +#define T1HA0_RUNTIME_SELECT 1 +#else +#define T1HA0_RUNTIME_SELECT 0 +#endif +#endif /* T1HA0_RUNTIME_SELECT */ + +#if !T1HA0_RUNTIME_SELECT && !defined(T1HA0_USE_DEFINE) +#if defined(__LCC__) +#define T1HA0_USE_DEFINE 1 +#else +#define T1HA0_USE_DEFINE 0 +#endif +#endif /* T1HA0_USE_DEFINE */ + +#if T1HA0_AESNI_AVAILABLE +uint64_t t1ha0_ia32aes_noavx(const void *data, size_t length, uint64_t seed); +uint64_t t1ha0_ia32aes_avx(const void *data, size_t length, uint64_t seed); +#ifndef __e2k__ +uint64_t t1ha0_ia32aes_avx2(const void *data, size_t length, uint64_t seed); +#endif +#endif /* T1HA0_AESNI_AVAILABLE */ + +#if T1HA0_RUNTIME_SELECT +typedef uint64_t (*t1ha0_function_t)(const void *, size_t, uint64_t); +T1HA_API t1ha0_function_t t1ha0_resolve(void); +#if T1HA_USE_INDIRECT_FUNCTIONS +T1HA_API uint64_t t1ha0(const void *data, size_t length, uint64_t seed); +#else +/* Otherwise function pointer will be used. + * Unfortunately this may cause some overhead calling. */ +T1HA_API extern uint64_t (*t1ha0_funcptr)(const void *data, size_t length, + uint64_t seed); +static __force_inline uint64_t t1ha0(const void *data, size_t length, + uint64_t seed) { + return t1ha0_funcptr(data, length, seed); +} +#endif /* T1HA_USE_INDIRECT_FUNCTIONS */ + +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +#if T1HA0_USE_DEFINE + +#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ + (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) +#if defined(T1HA1_DISABLED) +#define t1ha0 t1ha2_atonce +#else +#define t1ha0 t1ha1_be +#endif /* T1HA1_DISABLED */ +#else /* 32/64 */ +#define t1ha0 t1ha0_32be +#endif /* 32/64 */ + +#else /* T1HA0_USE_DEFINE */ + +static __force_inline uint64_t t1ha0(const void *data, size_t length, + uint64_t seed) { +#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ + (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) +#if defined(T1HA1_DISABLED) + return t1ha2_atonce(data, length, seed); +#else + return t1ha1_be(data, length, seed); +#endif /* T1HA1_DISABLED */ +#else /* 32/64 */ + return t1ha0_32be(data, length, seed); +#endif /* 32/64 */ +} + +#endif /* !T1HA0_USE_DEFINE */ + +#else /* !T1HA0_RUNTIME_SELECT && __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */ + +#if T1HA0_USE_DEFINE + +#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ + (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) +#if defined(T1HA1_DISABLED) +#define t1ha0 t1ha2_atonce +#else +#define t1ha0 t1ha1_le +#endif /* T1HA1_DISABLED */ +#else /* 32/64 */ +#define t1ha0 t1ha0_32le +#endif /* 32/64 */ + +#else + +static __force_inline uint64_t t1ha0(const void *data, size_t length, + uint64_t seed) { +#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ + (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) +#if defined(T1HA1_DISABLED) + return t1ha2_atonce(data, length, seed); +#else + return t1ha1_le(data, length, seed); +#endif /* T1HA1_DISABLED */ +#else /* 32/64 */ + return t1ha0_32le(data, length, seed); +#endif /* 32/64 */ +} + +#endif /* !T1HA0_USE_DEFINE */ + +#endif /* !T1HA0_RUNTIME_SELECT */ + +#endif /* T1HA0_DISABLED */ + +#ifdef __cplusplus +} +#endif + +#if __GNUC_PREREQ(4, 0) +#pragma GCC visibility pop +#endif /* __GNUC_PREREQ(4,0) */ diff --git a/contrib/libs/t1ha/ya.make b/contrib/libs/t1ha/ya.make index 016a749f88..6b0c94f9f3 100644 --- a/contrib/libs/t1ha/ya.make +++ b/contrib/libs/t1ha/ya.make @@ -1,42 +1,42 @@ # Generated by devtools/yamaker from nixpkgs 8e778c6df06ab73862b9abc71f40489f9bbf6c40. - + LIBRARY() - + OWNER( va-kuznecov g:cpp-contrib ) - + VERSION(2.1.4) - + ORIGINAL_SOURCE(https://github.com/PositiveTechnologies/t1ha/archive/v2.1.4.tar.gz) LICENSE(Zlib) - + LICENSE_TEXTS(.yandex_meta/licenses.list.txt) NO_COMPILER_WARNINGS() NO_RUNTIME() -SRCS( - src/t1ha0.c - src/t1ha0_ia32aes_avx.c - src/t1ha0_ia32aes_avx2.c - src/t1ha0_ia32aes_noavx.c - src/t1ha0_selfcheck.c - src/t1ha1.c - src/t1ha1_selfcheck.c - src/t1ha2.c - src/t1ha2_selfcheck.c - src/t1ha_selfcheck.c - src/t1ha_selfcheck_all.c -) - +SRCS( + src/t1ha0.c + src/t1ha0_ia32aes_avx.c + src/t1ha0_ia32aes_avx2.c + src/t1ha0_ia32aes_noavx.c + src/t1ha0_selfcheck.c + src/t1ha1.c + src/t1ha1_selfcheck.c + src/t1ha2.c + src/t1ha2_selfcheck.c + src/t1ha_selfcheck.c + src/t1ha_selfcheck_all.c +) + IF (ARCH_X86_64) CFLAGS( -maes ) ENDIF() - -END() + +END() diff --git a/contrib/libs/ya.make b/contrib/libs/ya.make index 36d2c99423..9c4640fdcf 100644 --- a/contrib/libs/ya.make +++ b/contrib/libs/ya.make @@ -50,7 +50,7 @@ RECURSE( cxxsupp/libcxxabi-parts djvulibre dlib - dpdk + dpdk dr_wav dwarflib earcut @@ -241,7 +241,7 @@ RECURSE( nodejs_12 node_nan nsync - numa + numa nvidia odpi onnx @@ -304,7 +304,7 @@ RECURSE( sophos sparrowhawk sparsehash - spdk + spdk speex spirv_cross spirv_headers @@ -321,7 +321,7 @@ RECURSE( svt-hevc svt-vp9 szip - t1ha + t1ha taocrypt tbb tcmalloc @@ -384,7 +384,7 @@ IF (OS_DARWIN) gperftools osxfuse macfuse-headers - uuid + uuid ) ENDIF() @@ -404,7 +404,7 @@ IF (OS_LINUX) openmpi sdbus-cpp systemd - uuid + uuid virtiofsd ) ENDIF() diff --git a/library/cpp/actors/core/actorsystem.cpp b/library/cpp/actors/core/actorsystem.cpp index df006ba7b8..c58698a206 100644 --- a/library/cpp/actors/core/actorsystem.cpp +++ b/library/cpp/actors/core/actorsystem.cpp @@ -7,7 +7,7 @@ #include "interconnect.h" #include "servicemap.h" #include "scheduler_queue.h" -#include "scheduler_actor.h" +#include "scheduler_actor.h" #include "log.h" #include "probes.h" #include "ask.h" diff --git a/library/cpp/actors/core/executor_thread.cpp b/library/cpp/actors/core/executor_thread.cpp index d4aac6e60e..446b651efd 100644 --- a/library/cpp/actors/core/executor_thread.cpp +++ b/library/cpp/actors/core/executor_thread.cpp @@ -20,7 +20,7 @@ #endif #include <util/system/type_name.h> -#include <util/system/datetime.h> +#include <util/system/datetime.h> LWTRACE_USING(ACTORLIB_PROVIDER) diff --git a/library/cpp/actors/core/probes.h b/library/cpp/actors/core/probes.h index 0a7804c210..4912d6dd26 100644 --- a/library/cpp/actors/core/probes.h +++ b/library/cpp/actors/core/probes.h @@ -71,9 +71,9 @@ PROBE(SlowICDropConfirmed, GROUPS("ActorLibSlowIC"), \ TYPES(ui32, double), \ NAMES("peerId", "icDropConfirmedMs")) \ - PROBE(ActorsystemScheduler, GROUPS("Durations"), \ - TYPES(ui64, ui64, ui32, ui32, ui64, ui64), \ - NAMES("timeUs", "timerfd_expirations", "eventsGottenFromQueues", "eventsSent", \ + PROBE(ActorsystemScheduler, GROUPS("Durations"), \ + TYPES(ui64, ui64, ui32, ui32, ui64, ui64), \ + NAMES("timeUs", "timerfd_expirations", "eventsGottenFromQueues", "eventsSent", \ "eventsInSendQueue", "eventSchedulingErrorUs")) \ PROBE(ForwardEvent, GROUPS("Orbit", "InterconnectSessionTCP"), \ TYPES(ui32, ui32, ui32, LWTYPE_ACTORID, LWTYPE_ACTORID, ui64, ui32), \ diff --git a/library/cpp/actors/core/scheduler_actor.cpp b/library/cpp/actors/core/scheduler_actor.cpp index cccb302c1f..febc5e40dd 100644 --- a/library/cpp/actors/core/scheduler_actor.cpp +++ b/library/cpp/actors/core/scheduler_actor.cpp @@ -1,135 +1,135 @@ -#include "actor_bootstrapped.h" -#include "hfunc.h" -#include "probes.h" -#include "scheduler_actor.h" -#include "scheduler_queue.h" - +#include "actor_bootstrapped.h" +#include "hfunc.h" +#include "probes.h" +#include "scheduler_actor.h" +#include "scheduler_queue.h" + #include <library/cpp/actors/interconnect/poller_actor.h> -#include <util/system/hp_timer.h> - -#ifdef __linux__ +#include <util/system/hp_timer.h> + +#ifdef __linux__ #include <sys/timerfd.h> #include <errno.h> - -LWTRACE_USING(ACTORLIB_PROVIDER); - -namespace NActors { + +LWTRACE_USING(ACTORLIB_PROVIDER); + +namespace NActors { class TTimerDescriptor: public TSharedDescriptor { - const int Descriptor; + const int Descriptor; - public: - TTimerDescriptor() + public: + TTimerDescriptor() : Descriptor(timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK)) - { - Y_VERIFY(Descriptor != -1, "timerfd_create() failed with %s", strerror(errno)); - } - + { + Y_VERIFY(Descriptor != -1, "timerfd_create() failed with %s", strerror(errno)); + } + ~TTimerDescriptor() override { - close(Descriptor); - } - + close(Descriptor); + } + int GetDescriptor() override { - return Descriptor; - } - }; - + return Descriptor; + } + }; + class TSchedulerActor: public TActor<TSchedulerActor> { - const TSchedulerConfig Cfg; - TIntrusivePtr<TSharedDescriptor> TimerDescriptor; - - TVector<NSchedulerQueue::TReader*> Readers; - + const TSchedulerConfig Cfg; + TIntrusivePtr<TSharedDescriptor> TimerDescriptor; + + TVector<NSchedulerQueue::TReader*> Readers; + TActorId PollerActor; TPollerToken::TPtr PollerToken; - - ui64 RealTime; + + ui64 RealTime; ui64 MonotonicTime; - - ui64 ActiveTick; - typedef TMap<ui64, TAutoPtr<NSchedulerQueue::TQueueType>> TMomentMap; // intrasecond queues + + ui64 ActiveTick; + typedef TMap<ui64, TAutoPtr<NSchedulerQueue::TQueueType>> TMomentMap; // intrasecond queues typedef THashMap<ui64, TAutoPtr<TMomentMap>> TScheduleMap; // over-second schedule - - TScheduleMap ScheduleMap; - - THolder<NThreading::TLegacyFuture<void, false>> MainCycle; - - static const ui64 IntrasecondThreshold = 1048576; // ~second - TAutoPtr<TMomentMap> ActiveSec; + + TScheduleMap ScheduleMap; + + THolder<NThreading::TLegacyFuture<void, false>> MainCycle; + + static const ui64 IntrasecondThreshold = 1048576; // ~second + TAutoPtr<TMomentMap> ActiveSec; volatile ui64* CurrentTimestamp = nullptr; volatile ui64* CurrentMonotonic = nullptr; - TDeque<TAutoPtr<IEventHandle>> EventsToBeSent; - - public: + TDeque<TAutoPtr<IEventHandle>> EventsToBeSent; + + public: static constexpr IActor::EActivityType ActorActivityType() { return IActor::ACTOR_SYSTEM_SCHEDULER_ACTOR; } TSchedulerActor(const TSchedulerConfig& cfg) - : TActor(&TSchedulerActor::StateFunc) - , Cfg(cfg) - , TimerDescriptor(new TTimerDescriptor()) + : TActor(&TSchedulerActor::StateFunc) + , Cfg(cfg) + , TimerDescriptor(new TTimerDescriptor()) , PollerActor(MakePollerActorId()) - { - Y_ASSERT(Cfg.ResolutionMicroseconds != 0); - Y_ASSERT(Cfg.ProgressThreshold != 0); - Become(&TSchedulerActor::StateFunc); - } - + { + Y_ASSERT(Cfg.ResolutionMicroseconds != 0); + Y_ASSERT(Cfg.ProgressThreshold != 0); + Become(&TSchedulerActor::StateFunc); + } + void Handle(TEvSchedulerInitialize::TPtr& ev, const TActorContext& ctx) { const TEvSchedulerInitialize& evInitialize = *ev->Get(); - Y_ASSERT(evInitialize.ScheduleReaders.size() != 0); - Readers.resize(evInitialize.ScheduleReaders.size()); - Copy(evInitialize.ScheduleReaders.begin(), evInitialize.ScheduleReaders.end(), Readers.begin()); - - Y_ASSERT(evInitialize.CurrentTimestamp != nullptr); - CurrentTimestamp = evInitialize.CurrentTimestamp; - + Y_ASSERT(evInitialize.ScheduleReaders.size() != 0); + Readers.resize(evInitialize.ScheduleReaders.size()); + Copy(evInitialize.ScheduleReaders.begin(), evInitialize.ScheduleReaders.end(), Readers.begin()); + + Y_ASSERT(evInitialize.CurrentTimestamp != nullptr); + CurrentTimestamp = evInitialize.CurrentTimestamp; + Y_ASSERT(evInitialize.CurrentMonotonic != nullptr); CurrentMonotonic = evInitialize.CurrentMonotonic; - struct itimerspec new_time; - memset(&new_time, 0, sizeof(new_time)); - new_time.it_value.tv_nsec = Cfg.ResolutionMicroseconds * 1000; - new_time.it_interval.tv_nsec = Cfg.ResolutionMicroseconds * 1000; - int ret = timerfd_settime(TimerDescriptor->GetDescriptor(), 0, &new_time, NULL); - Y_VERIFY(ret != -1, "timerfd_settime() failed with %s", strerror(errno)); + struct itimerspec new_time; + memset(&new_time, 0, sizeof(new_time)); + new_time.it_value.tv_nsec = Cfg.ResolutionMicroseconds * 1000; + new_time.it_interval.tv_nsec = Cfg.ResolutionMicroseconds * 1000; + int ret = timerfd_settime(TimerDescriptor->GetDescriptor(), 0, &new_time, NULL); + Y_VERIFY(ret != -1, "timerfd_settime() failed with %s", strerror(errno)); const bool success = ctx.Send(PollerActor, new TEvPollerRegister(TimerDescriptor, SelfId(), {})); Y_VERIFY(success); - + RealTime = RelaxedLoad(CurrentTimestamp); MonotonicTime = RelaxedLoad(CurrentMonotonic); - + ActiveTick = AlignUp<ui64>(MonotonicTime, IntrasecondThreshold); - } - + } + void Handle(TEvPollerRegisterResult::TPtr ev, const TActorContext& ctx) { PollerToken = ev->Get()->PollerToken; HandleSchedule(ctx); } - void UpdateTime() { + void UpdateTime() { RealTime = TInstant::Now().MicroSeconds(); MonotonicTime = Max(MonotonicTime, GetMonotonicMicroSeconds()); AtomicStore(CurrentTimestamp, RealTime); AtomicStore(CurrentMonotonic, MonotonicTime); - } - + } + void TryUpdateTime(NHPTimer::STime* lastTimeUpdate) { - NHPTimer::STime hpnow; + NHPTimer::STime hpnow; GetTimeFast(&hpnow); - const ui64 elapsedCycles = hpnow > *lastTimeUpdate ? hpnow - *lastTimeUpdate : 0; - if (elapsedCycles > Cfg.ResolutionMicroseconds * (NHPTimer::GetCyclesPerSecond() / IntrasecondThreshold)) { - UpdateTime(); + const ui64 elapsedCycles = hpnow > *lastTimeUpdate ? hpnow - *lastTimeUpdate : 0; + if (elapsedCycles > Cfg.ResolutionMicroseconds * (NHPTimer::GetCyclesPerSecond() / IntrasecondThreshold)) { + UpdateTime(); GetTimeFast(lastTimeUpdate); - } - } - - void HandleSchedule(const TActorContext& ctx) { + } + } + + void HandleSchedule(const TActorContext& ctx) { for (;;) { NHPTimer::STime schedulingStart; GetTimeFast(&schedulingStart); NHPTimer::STime lastTimeUpdate = schedulingStart; - + ui64 expired; ssize_t bytesRead; bytesRead = read(TimerDescriptor->GetDescriptor(), &expired, sizeof(expired)); @@ -143,7 +143,7 @@ namespace NActors { } Y_VERIFY(bytesRead == sizeof(expired), "Error while reading from timerfd, strerror# %s", strerror(errno)); UpdateTime(); - + ui32 eventsGottenFromQueues = 0; // collect everything from queues for (ui32 i = 0; i != Readers.size(); ++i) { @@ -151,9 +151,9 @@ namespace NActors { const ui64 instant = AlignUp<ui64>(x->InstantMicroseconds, Cfg.ResolutionMicroseconds); IEventHandle* const ev = x->Ev; ISchedulerCookie* const cookie = x->Cookie; - + // check is cookie still valid? looks like it will hurt performance w/o sagnificant memory save - + if (instant <= ActiveTick) { if (!ActiveSec) ActiveSec.Reset(new TMomentMap()); @@ -173,9 +173,9 @@ namespace NActors { } ++eventsGottenFromQueues; TryUpdateTime(&lastTimeUpdate); - } - } - + } + } + ui64 eventSchedulingErrorUs = 0; // send everything triggered on schedule for (;;) { @@ -197,17 +197,17 @@ namespace NActors { delete ev; } } else { - EventsToBeSent.push_back(ev); - } + EventsToBeSent.push_back(ev); + } TryUpdateTime(&lastTimeUpdate); - } - } + } + } ActiveSec->erase(it); } else { break; - } - } - + } + } + if (ActiveTick <= MonotonicTime) { Y_VERIFY_DEBUG(!ActiveSec || ActiveSec->empty()); ActiveSec.Destroy(); @@ -218,12 +218,12 @@ namespace NActors { ScheduleMap.erase(it); } continue; - } + } // ok, if we are here - then nothing is ready, so send step complete break; - } - + } + // Send all from buffer queue const ui64 eventsToBeSentSize = EventsToBeSent.size(); ui32 sentCount = 0; @@ -237,7 +237,7 @@ namespace NActors { ctx.Send(EventsToBeSent.front()); EventsToBeSent.pop_front(); } - + NHPTimer::STime hpnow; GetTimeFast(&hpnow); const ui64 processingTime = hpnow > schedulingStart ? hpnow - schedulingStart : 0; @@ -245,35 +245,35 @@ namespace NActors { LWPROBE(ActorsystemScheduler, elapsedTimeMicroseconds, expired, eventsGottenFromQueues, sentCount, eventsToBeSentSize, eventSchedulingErrorUs); TryUpdateTime(&lastTimeUpdate); - } - } - + } + } + STRICT_STFUNC(StateFunc, HFunc(TEvSchedulerInitialize, Handle) CFunc(TEvPollerReady::EventType, HandleSchedule) CFunc(TEvents::TSystem::PoisonPill, Die) HFunc(TEvPollerRegisterResult, Handle) ) - }; - + }; + IActor* CreateSchedulerActor(const TSchedulerConfig& cfg) { - if (cfg.UseSchedulerActor) { - return new TSchedulerActor(cfg); - } else { - return nullptr; - } - } - + if (cfg.UseSchedulerActor) { + return new TSchedulerActor(cfg); + } else { + return nullptr; + } + } + } - -#else // linux - -namespace NActors { + +#else // linux + +namespace NActors { IActor* CreateSchedulerActor(const TSchedulerConfig& cfg) { - Y_UNUSED(cfg); - return nullptr; - } - + Y_UNUSED(cfg); + return nullptr; + } + } - -#endif // linux + +#endif // linux diff --git a/library/cpp/actors/core/scheduler_actor.h b/library/cpp/actors/core/scheduler_actor.h index 5aa9f0216d..c2c561b43d 100644 --- a/library/cpp/actors/core/scheduler_actor.h +++ b/library/cpp/actors/core/scheduler_actor.h @@ -1,29 +1,29 @@ -#pragma once - -#include "actor.h" -#include "event_local.h" -#include "events.h" -#include "scheduler_basic.h" - -namespace NActors { - struct TEvSchedulerInitialize : TEventLocal<TEvSchedulerInitialize, TEvents::TSystem::Bootstrap> { - TVector<NSchedulerQueue::TReader*> ScheduleReaders; +#pragma once + +#include "actor.h" +#include "event_local.h" +#include "events.h" +#include "scheduler_basic.h" + +namespace NActors { + struct TEvSchedulerInitialize : TEventLocal<TEvSchedulerInitialize, TEvents::TSystem::Bootstrap> { + TVector<NSchedulerQueue::TReader*> ScheduleReaders; volatile ui64* CurrentTimestamp; volatile ui64* CurrentMonotonic; - + TEvSchedulerInitialize(const TVector<NSchedulerQueue::TReader*>& scheduleReaders, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) - : ScheduleReaders(scheduleReaders) - , CurrentTimestamp(currentTimestamp) + : ScheduleReaders(scheduleReaders) + , CurrentTimestamp(currentTimestamp) , CurrentMonotonic(currentMonotonic) { } - }; - + }; + IActor* CreateSchedulerActor(const TSchedulerConfig& cfg); - + inline TActorId MakeSchedulerActorId() { - char x[12] = {'s', 'c', 'h', 'e', 'd', 'u', 'l', 'e', 'r', 's', 'e', 'r'}; + char x[12] = {'s', 'c', 'h', 'e', 'd', 'u', 'l', 'e', 'r', 's', 'e', 'r'}; return TActorId(0, TStringBuf(x, 12)); - } - + } + } diff --git a/library/cpp/actors/core/scheduler_actor_ut.cpp b/library/cpp/actors/core/scheduler_actor_ut.cpp index 8f5cf6b23f..09b7369d36 100644 --- a/library/cpp/actors/core/scheduler_actor_ut.cpp +++ b/library/cpp/actors/core/scheduler_actor_ut.cpp @@ -1,100 +1,100 @@ -#include "actor_coroutine.h" -#include "actorsystem.h" -#include "executor_pool_basic.h" -#include "scheduler_actor.h" -#include "scheduler_basic.h" -#include "events.h" -#include "event_local.h" -#include "hfunc.h" +#include "actor_coroutine.h" +#include "actorsystem.h" +#include "executor_pool_basic.h" +#include "scheduler_actor.h" +#include "scheduler_basic.h" +#include "events.h" +#include "event_local.h" +#include "hfunc.h" #include <library/cpp/actors/interconnect/poller_actor.h> #include <library/cpp/testing/unittest/registar.h> - -#include <util/system/sanitizers.h> - -using namespace NActors; - + +#include <util/system/sanitizers.h> + +using namespace NActors; + Y_UNIT_TEST_SUITE(SchedulerActor) { - class TTestActor: public TActorBootstrapped<TTestActor> { - TManualEvent& DoneEvent; - TAtomic& EventsProcessed; - TInstant LastWakeup; - const TAtomicBase EventsTotalCount; - const TDuration ScheduleDelta; - - public: - TTestActor(TManualEvent& doneEvent, TAtomic& eventsProcessed, TAtomicBase eventsTotalCount, ui32 scheduleDeltaMs) - : DoneEvent(doneEvent) - , EventsProcessed(eventsProcessed) - , EventsTotalCount(eventsTotalCount) - , ScheduleDelta(TDuration::MilliSeconds(scheduleDeltaMs)) - { - } - + class TTestActor: public TActorBootstrapped<TTestActor> { + TManualEvent& DoneEvent; + TAtomic& EventsProcessed; + TInstant LastWakeup; + const TAtomicBase EventsTotalCount; + const TDuration ScheduleDelta; + + public: + TTestActor(TManualEvent& doneEvent, TAtomic& eventsProcessed, TAtomicBase eventsTotalCount, ui32 scheduleDeltaMs) + : DoneEvent(doneEvent) + , EventsProcessed(eventsProcessed) + , EventsTotalCount(eventsTotalCount) + , ScheduleDelta(TDuration::MilliSeconds(scheduleDeltaMs)) + { + } + void Bootstrap(const TActorContext& ctx) { - LastWakeup = ctx.Now(); - Become(&TThis::StateFunc); - ctx.Schedule(ScheduleDelta, new TEvents::TEvWakeup()); - } - + LastWakeup = ctx.Now(); + Become(&TThis::StateFunc); + ctx.Schedule(ScheduleDelta, new TEvents::TEvWakeup()); + } + void Handle(TEvents::TEvWakeup::TPtr& /*ev*/, const TActorContext& ctx) { - const TInstant now = ctx.Now(); - UNIT_ASSERT(now - LastWakeup >= ScheduleDelta); - LastWakeup = now; - - if (AtomicIncrement(EventsProcessed) == EventsTotalCount) { - DoneEvent.Signal(); - } else { - ctx.Schedule(ScheduleDelta, new TEvents::TEvWakeup()); - } - } - + const TInstant now = ctx.Now(); + UNIT_ASSERT(now - LastWakeup >= ScheduleDelta); + LastWakeup = now; + + if (AtomicIncrement(EventsProcessed) == EventsTotalCount) { + DoneEvent.Signal(); + } else { + ctx.Schedule(ScheduleDelta, new TEvents::TEvWakeup()); + } + } + STRICT_STFUNC(StateFunc, {HFunc(TEvents::TEvWakeup, Handle)}) - }; - - void Test(TAtomicBase eventsTotalCount, ui32 scheduleDeltaMs) { + }; + + void Test(TAtomicBase eventsTotalCount, ui32 scheduleDeltaMs) { THolder<TActorSystemSetup> setup = MakeHolder<TActorSystemSetup>(); - setup->NodeId = 0; - setup->ExecutorsCount = 1; + setup->NodeId = 0; + setup->ExecutorsCount = 1; setup->Executors.Reset(new TAutoPtr<IExecutorPool>[setup->ExecutorsCount]); - for (ui32 i = 0; i < setup->ExecutorsCount; ++i) { - setup->Executors[i] = new TBasicExecutorPool(i, 5, 10, "basic"); - } - // create poller actor (whether platform supports it) + for (ui32 i = 0; i < setup->ExecutorsCount; ++i) { + setup->Executors[i] = new TBasicExecutorPool(i, 5, 10, "basic"); + } + // create poller actor (whether platform supports it) TActorId pollerActorId; if (IActor* poller = CreatePollerActor()) { pollerActorId = MakePollerActorId(); - setup->LocalServices.emplace_back(pollerActorId, TActorSetupCmd(poller, TMailboxType::ReadAsFilled, 0)); - } + setup->LocalServices.emplace_back(pollerActorId, TActorSetupCmd(poller, TMailboxType::ReadAsFilled, 0)); + } TActorId schedulerActorId; if (IActor* schedulerActor = CreateSchedulerActor(TSchedulerConfig())) { schedulerActorId = MakeSchedulerActorId(); - setup->LocalServices.emplace_back(schedulerActorId, TActorSetupCmd(schedulerActor, TMailboxType::ReadAsFilled, 0)); - } - setup->Scheduler = CreateSchedulerThread(TSchedulerConfig()); - - TActorSystem actorSystem(setup); - - actorSystem.Start(); - - TManualEvent doneEvent; - TAtomic eventsProcessed = 0; - actorSystem.Register(new TTestActor(doneEvent, eventsProcessed, eventsTotalCount, scheduleDeltaMs)); - doneEvent.WaitI(); - - UNIT_ASSERT(AtomicGet(eventsProcessed) == eventsTotalCount); - - actorSystem.Stop(); - } - + setup->LocalServices.emplace_back(schedulerActorId, TActorSetupCmd(schedulerActor, TMailboxType::ReadAsFilled, 0)); + } + setup->Scheduler = CreateSchedulerThread(TSchedulerConfig()); + + TActorSystem actorSystem(setup); + + actorSystem.Start(); + + TManualEvent doneEvent; + TAtomic eventsProcessed = 0; + actorSystem.Register(new TTestActor(doneEvent, eventsProcessed, eventsTotalCount, scheduleDeltaMs)); + doneEvent.WaitI(); + + UNIT_ASSERT(AtomicGet(eventsProcessed) == eventsTotalCount); + + actorSystem.Stop(); + } + Y_UNIT_TEST(LongEvents) { - Test(10, 500); - } - + Test(10, 500); + } + Y_UNIT_TEST(MediumEvents) { - Test(100, 50); - } - + Test(100, 50); + } + Y_UNIT_TEST(QuickEvents) { - Test(1000, 5); - } -} + Test(1000, 5); + } +} diff --git a/library/cpp/actors/core/scheduler_basic.cpp b/library/cpp/actors/core/scheduler_basic.cpp index ab5919c15f..fba200e16b 100644 --- a/library/cpp/actors/core/scheduler_basic.cpp +++ b/library/cpp/actors/core/scheduler_basic.cpp @@ -42,7 +42,7 @@ namespace NActors { , StopFlag(false) , ScheduleMap(3600) { - Y_VERIFY(!Config.UseSchedulerActor, "Cannot create scheduler thread because Config.UseSchedulerActor# true"); + Y_VERIFY(!Config.UseSchedulerActor, "Cannot create scheduler thread because Config.UseSchedulerActor# true"); } TBasicSchedulerThread::~TBasicSchedulerThread() { @@ -247,28 +247,28 @@ namespace NActors { MainCycle->Get(); MainCycle.Destroy(); } - + } - -#ifdef __linux__ - -namespace NActors { - ISchedulerThread* CreateSchedulerThread(const TSchedulerConfig& config) { - if (config.UseSchedulerActor) { - return new TMockSchedulerThread(); - } else { - return new TBasicSchedulerThread(config); - } - } - + +#ifdef __linux__ + +namespace NActors { + ISchedulerThread* CreateSchedulerThread(const TSchedulerConfig& config) { + if (config.UseSchedulerActor) { + return new TMockSchedulerThread(); + } else { + return new TBasicSchedulerThread(config); + } + } + } - -#else // __linux__ - -namespace NActors { + +#else // __linux__ + +namespace NActors { ISchedulerThread* CreateSchedulerThread(const TSchedulerConfig& config) { - return new TBasicSchedulerThread(config); - } + return new TBasicSchedulerThread(config); + } } - -#endif // __linux__ + +#endif // __linux__ diff --git a/library/cpp/actors/core/scheduler_basic.h b/library/cpp/actors/core/scheduler_basic.h index 043cc5257d..2ccde39235 100644 --- a/library/cpp/actors/core/scheduler_basic.h +++ b/library/cpp/actors/core/scheduler_basic.h @@ -49,33 +49,33 @@ namespace NActors { void PrepareStop() override; void Stop() override; }; - + class TMockSchedulerThread: public ISchedulerThread { - public: - virtual ~TMockSchedulerThread() override { - } - + public: + virtual ~TMockSchedulerThread() override { + } + void Prepare(TActorSystem* actorSystem, volatile ui64* currentTimestamp, volatile ui64* currentMonotonic) override { - Y_UNUSED(actorSystem); + Y_UNUSED(actorSystem); *currentTimestamp = TInstant::Now().MicroSeconds(); *currentMonotonic = GetMonotonicMicroSeconds(); - } - + } + void PrepareSchedules(NSchedulerQueue::TReader** readers, ui32 scheduleReadersCount) override { - Y_UNUSED(readers); - Y_UNUSED(scheduleReadersCount); - } - - void Start() override { - } - - void PrepareStop() override { - } - - void Stop() override { - } - }; - + Y_UNUSED(readers); + Y_UNUSED(scheduleReadersCount); + } + + void Start() override { + } + + void PrepareStop() override { + } + + void Stop() override { + } + }; + ISchedulerThread* CreateSchedulerThread(const TSchedulerConfig& cfg); - + } diff --git a/library/cpp/actors/core/ut/ya.make b/library/cpp/actors/core/ut/ya.make index 50d885b1b3..3ee28d5850 100644 --- a/library/cpp/actors/core/ut/ya.make +++ b/library/cpp/actors/core/ut/ya.make @@ -23,11 +23,11 @@ ELSE() ENDIF() -PEERDIR( +PEERDIR( library/cpp/actors/interconnect library/cpp/actors/testlib -) - +) + SRCS( actor_coroutine_ut.cpp actor_ut.cpp diff --git a/library/cpp/actors/core/ya.make b/library/cpp/actors/core/ya.make index a583363523..880a9d00db 100644 --- a/library/cpp/actors/core/ya.make +++ b/library/cpp/actors/core/ya.make @@ -89,8 +89,8 @@ SRCS( probes.h process_stats.cpp process_stats.h - scheduler_actor.cpp - scheduler_actor.h + scheduler_actor.cpp + scheduler_actor.h scheduler_basic.cpp scheduler_basic.h scheduler_cookie.cpp diff --git a/library/cpp/actors/interconnect/events_local.h b/library/cpp/actors/interconnect/events_local.h index fa1054be14..8a46ffd535 100644 --- a/library/cpp/actors/interconnect/events_local.h +++ b/library/cpp/actors/interconnect/events_local.h @@ -1,27 +1,27 @@ -#pragma once - +#pragma once + #include <library/cpp/actors/core/events.h> #include <library/cpp/actors/core/event_local.h> #include <library/cpp/actors/protos/interconnect.pb.h> -#include <util/generic/deque.h> -#include <util/network/address.h> - -#include "interconnect_stream.h" -#include "packet.h" +#include <util/generic/deque.h> +#include <util/network/address.h> + +#include "interconnect_stream.h" +#include "packet.h" #include "types.h" - -namespace NActors { + +namespace NActors { struct TProgramInfo { ui64 PID = 0; ui64 StartTime = 0; ui64 Serial = 0; }; - + enum class ENetwork : ui32 { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // local messages //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - + Start = EventSpaceBegin(TEvents::ES_INTERCONNECT_TCP), SocketReadyRead = Start, @@ -71,11 +71,11 @@ namespace NActors { struct TEvSocketReadyRead: public TEventLocal<TEvSocketReadyRead, ui32(ENetwork::SocketReadyRead)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketReadyRead, "Network: TEvSocketReadyRead") }; - + struct TEvSocketReadyWrite: public TEventLocal<TEvSocketReadyWrite, ui32(ENetwork::SocketReadyWrite)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketReadyWrite, "Network: TEvSocketReadyWrite") }; - + struct TEvSocketError: public TEventLocal<TEvSocketError, ui32(ENetwork::SocketError)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketError, ::strerror(Error)) TString GetReason() const { @@ -83,18 +83,18 @@ namespace NActors { } const int Error; TIntrusivePtr<NInterconnect::TStreamSocket> Socket; - + TEvSocketError(int error, TIntrusivePtr<NInterconnect::TStreamSocket> sock) : Error(error) , Socket(std::move(sock)) { } }; - + struct TEvSocketConnect: public TEventLocal<TEvSocketConnect, ui32(ENetwork::Connect)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketConnect, "Network: TEvSocketConnect") }; - + struct TEvSocketDisconnect: public TEventLocal<TEvSocketDisconnect, ui32(ENetwork::Disconnect)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketDisconnect, "Network: TEvSocketDisconnect") TDisconnectReason Reason; @@ -104,7 +104,7 @@ namespace NActors { { } }; - + struct TEvHandshakeAsk: public TEventLocal<TEvHandshakeAsk, ui32(ENetwork::HandshakeAsk)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeAsk, "Network: TEvHandshakeAsk") TEvHandshakeAsk(const TActorId& self, @@ -119,21 +119,21 @@ namespace NActors { const TActorId Peer; const ui64 Counter; }; - + struct TEvHandshakeAck: public TEventLocal<TEvHandshakeAck, ui32(ENetwork::HandshakeAck)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeAck, "Network: TEvHandshakeAck") - + TEvHandshakeAck(const TActorId& self, ui64 nextPacket, TSessionParams params) : Self(self) , NextPacket(nextPacket) , Params(std::move(params)) {} - + const TActorId Self; const ui64 NextPacket; const TSessionParams Params; }; - + struct TEvHandshakeNak : TEventLocal<TEvHandshakeNak, ui32(ENetwork::HandshakeNak)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvSocketReadyRead, "Network: TEvHandshakeNak") }; @@ -143,32 +143,32 @@ namespace NActors { ui32(ENetwork::HandshakeRequest)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeRequest, "Network: TEvHandshakeRequest") - + NActorsInterconnect::THandshakeRequest Record; }; - + struct TEvHandshakeReplyOK : public TEventLocal<TEvHandshakeReplyOK, ui32(ENetwork::HandshakeReplyOK)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeReplyOK, "Network: TEvHandshakeReplyOK") - + NActorsInterconnect::THandshakeReply Record; }; - + struct TEvHandshakeReplyError : public TEventLocal<TEvHandshakeReplyError, ui32(ENetwork::HandshakeReplyError)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeReplyError, "Network: TEvHandshakeReplyError") - + TEvHandshakeReplyError(TString error) { Record.SetErrorExplaination(error); } - + NActorsInterconnect::THandshakeReply Record; }; - + struct TEvIncomingConnection: public TEventLocal<TEvIncomingConnection, ui32(ENetwork::IncomingConnection)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvIncomingConnection, "Network: TEvIncomingConnection") TIntrusivePtr<NInterconnect::TStreamSocket> Socket; @@ -179,10 +179,10 @@ namespace NActors { , Address(std::move(address)) {} }; - + struct TEvHandshakeDone: public TEventLocal<TEvHandshakeDone, ui32(ENetwork::HandshakeDone)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeDone, "Network: TEvHandshakeDone") - + TEvHandshakeDone( TIntrusivePtr<NInterconnect::TStreamSocket> socket, const TActorId& peer, @@ -198,7 +198,7 @@ namespace NActors { , Params(std::move(params)) { } - + TIntrusivePtr<NInterconnect::TStreamSocket> Socket; const TActorId Peer; const TActorId Self; @@ -206,10 +206,10 @@ namespace NActors { TAutoPtr<TProgramInfo> ProgramInfo; const TSessionParams Params; }; - + struct TEvHandshakeFail: public TEventLocal<TEvHandshakeFail, ui32(ENetwork::HandshakeFail)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvHandshakeFail, "Network: TEvHandshakeFail") - + enum EnumHandshakeFail { HANDSHAKE_FAIL_TRANSIENT, HANDSHAKE_FAIL_PERMANENT, @@ -224,58 +224,58 @@ namespace NActors { const EnumHandshakeFail Temporary; const TString Explanation; - }; - + }; + struct TEvKick: public TEventLocal<TEvKick, ui32(ENetwork::Kick)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvKick, "Network: TEvKick") }; - + struct TEvFlush: public TEventLocal<TEvFlush, ui32(ENetwork::Flush)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvFlush, "Network: TEvFlush") }; - + struct TEvLocalNodeInfo : public TEventLocal<TEvLocalNodeInfo, ui32(ENetwork::NodeInfo)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvLocalNodeInfo, "Network: TEvLocalNodeInfo") - + ui32 NodeId; NAddr::IRemoteAddrPtr Address; }; - + struct TEvBunchOfEventsToDestroy : TEventLocal<TEvBunchOfEventsToDestroy, ui32(ENetwork::BunchOfEventsToDestroy)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvBunchOfEventsToDestroy, "Network: TEvBunchOfEventsToDestroy") - + TEvBunchOfEventsToDestroy(TDeque<TAutoPtr<IEventBase>> events) : Events(std::move(events)) { } - + TDeque<TAutoPtr<IEventBase>> Events; }; - + struct TEvResolveAddress : public TEventLocal<TEvResolveAddress, ui32(ENetwork::ResolveAddress)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvResolveAddress, "Network: TEvResolveAddress") - + TString Address; ui16 Port; }; - + struct TEvAddressInfo : public TEventLocal<TEvAddressInfo, ui32(ENetwork::AddressInfo)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvAddressInfo, "Network: TEvAddressInfo") - + NAddr::IRemoteAddrPtr Address; }; - + struct TEvResolveError : public TEventLocal<TEvResolveError, ui32(ENetwork::ResolveError)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvResolveError, "Network: TEvResolveError") - + TString Explain; }; - + struct TEvHTTPStreamStatus : public TEventLocal<TEvHTTPStreamStatus, ui32(ENetwork::HTTPStreamStatus)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvHTTPStreamStatus, @@ -285,38 +285,38 @@ namespace NActors { COMPLETE, ERROR, }; - + EStatus Status; TString Error; TString HttpHeaders; - }; - + }; + struct TEvHTTPSendContent : public TEventLocal<TEvHTTPSendContent, ui32(ENetwork::HTTPSendContent)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvHTTPSendContent, "Network: TEvHTTPSendContent") - + const char* Data; size_t Len; bool Last; }; - + struct TEvConnectWakeup : public TEventLocal<TEvConnectWakeup, ui32(ENetwork::ConnectProtocolWakeup)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvConnectWakeup, "Protocols: TEvConnectWakeup") }; - + struct TEvHTTPProtocolRetry : public TEventLocal<TEvHTTPProtocolRetry, ui32(ENetwork::HTTPProtocolRetry)> { DEFINE_SIMPLE_LOCAL_EVENT(TEvHTTPProtocolRetry, "Protocols: TEvHTTPProtocolRetry") }; - + struct TEvLoadMessage : TEventPB<TEvLoadMessage, NActorsInterconnect::TEvLoadMessage, static_cast<ui32>(ENetwork::EvLoadMessage)> { TEvLoadMessage() = default; - + template <typename TContainer> TEvLoadMessage(const TContainer& route, const TString& id, const TString* payload) { for (const TActorId& actorId : route) { @@ -329,7 +329,7 @@ namespace NActors { if (payload) { Record.SetPayload(*payload); } - } + } template <typename TContainer> TEvLoadMessage(const TContainer& route, const TString& id, TRope&& payload) { @@ -343,7 +343,7 @@ namespace NActors { AddPayload(std::move(payload)); } }; - + struct TEvUpdateFromInputSession : TEventLocal<TEvUpdateFromInputSession, static_cast<ui32>(ENetwork::EvUpdateFromInputSession)> { ui64 ConfirmedByInput; // latest Confirm value from processed input packet ui64 NumDataBytes; diff --git a/library/cpp/actors/interconnect/poller_actor.h b/library/cpp/actors/interconnect/poller_actor.h index dd787518e5..f927b82089 100644 --- a/library/cpp/actors/interconnect/poller_actor.h +++ b/library/cpp/actors/interconnect/poller_actor.h @@ -1,6 +1,6 @@ #pragma once -#include "events_local.h" +#include "events_local.h" #include "poller.h" #include <library/cpp/actors/core/actor.h> @@ -56,8 +56,8 @@ namespace NActors { IActor* CreatePollerActor(); inline TActorId MakePollerActorId() { - char x[12] = {'I', 'C', 'P', 'o', 'l', 'l', 'e', 'r', '\xDE', '\xAD', '\xBE', '\xEF'}; + char x[12] = {'I', 'C', 'P', 'o', 'l', 'l', 'e', 'r', '\xDE', '\xAD', '\xBE', '\xEF'}; return TActorId(0, TStringBuf(std::begin(x), std::end(x))); - } - + } + } diff --git a/library/cpp/actors/interconnect/ya.make b/library/cpp/actors/interconnect/ya.make index d6d9c3d8da..60d29b0fc0 100644 --- a/library/cpp/actors/interconnect/ya.make +++ b/library/cpp/actors/interconnect/ya.make @@ -16,7 +16,7 @@ SRCS( channel_scheduler.h event_filter.h event_holder_pool.h - events_local.h + events_local.h interconnect_address.cpp interconnect_address.h interconnect_channel.cpp diff --git a/library/cpp/actors/testlib/test_runtime.cpp b/library/cpp/actors/testlib/test_runtime.cpp index 2be70b75a5..6fa25b9965 100644 --- a/library/cpp/actors/testlib/test_runtime.cpp +++ b/library/cpp/actors/testlib/test_runtime.cpp @@ -774,7 +774,7 @@ namespace NActors { } void TTestActorRuntimeBase::SetLogBackend(const TAutoPtr<TLogBackend> logBackend) { - Y_VERIFY(!IsInitialized); + Y_VERIFY(!IsInitialized); TGuard<TMutex> guard(Mutex); LogBackend = logBackend; } diff --git a/library/cpp/actors/util/rope.h b/library/cpp/actors/util/rope.h index 82b407a787..f5595efbaa 100644 --- a/library/cpp/actors/util/rope.h +++ b/library/cpp/actors/util/rope.h @@ -4,8 +4,8 @@ #include <util/generic/string.h> #include <util/generic/hash_set.h> #include <util/stream/str.h> -#include <util/system/sanitizers.h> -#include <util/system/valgrind.h> +#include <util/system/sanitizers.h> +#include <util/system/valgrind.h> // exactly one of them must be included #include "rope_cont_list.h" @@ -1135,27 +1135,27 @@ inline TRope TRope::CopySpaceOptimized(TRope&& origin, size_t worstRatioPer1k, T return res; } - -#if defined(WITH_VALGRIND) || defined(_msan_enabled_) - -inline void CheckRopeIsDefined(TRope::TConstIterator begin, ui64 size) { - while (size) { - ui64 contiguousSize = Min(size, begin.ContiguousSize()); -# if defined(WITH_VALGRIND) - VALGRIND_CHECK_MEM_IS_DEFINED(begin.ContiguousData(), contiguousSize); -# endif -# if defined(_msan_enabled_) - NSan::CheckMemIsInitialized(begin.ContiguousData(), contiguousSize); -# endif - size -= contiguousSize; - begin += contiguousSize; - } -} - -# define CHECK_ROPE_IS_DEFINED(begin, size) CheckRopeIsDefined(begin, size) - -#else - -# define CHECK_ROPE_IS_DEFINED(begin, size) do {} while (false) - -#endif + +#if defined(WITH_VALGRIND) || defined(_msan_enabled_) + +inline void CheckRopeIsDefined(TRope::TConstIterator begin, ui64 size) { + while (size) { + ui64 contiguousSize = Min(size, begin.ContiguousSize()); +# if defined(WITH_VALGRIND) + VALGRIND_CHECK_MEM_IS_DEFINED(begin.ContiguousData(), contiguousSize); +# endif +# if defined(_msan_enabled_) + NSan::CheckMemIsInitialized(begin.ContiguousData(), contiguousSize); +# endif + size -= contiguousSize; + begin += contiguousSize; + } +} + +# define CHECK_ROPE_IS_DEFINED(begin, size) CheckRopeIsDefined(begin, size) + +#else + +# define CHECK_ROPE_IS_DEFINED(begin, size) do {} while (false) + +#endif diff --git a/library/cpp/lwtrace/all.h b/library/cpp/lwtrace/all.h index b412bbfcea..d7aa57c49d 100644 --- a/library/cpp/lwtrace/all.h +++ b/library/cpp/lwtrace/all.h @@ -130,8 +130,8 @@ // NOTE: #define MY_PROBE(name, ...) GLOBAL_LWPROBE(MY_PROVIDER, name, ## __VA_ARGS__) #define GLOBAL_LWPROBE(provider, probe, ...) LWPROBE_I(LWTRACE_GET_NAMESPACE(provider)::LWTRACE_GET_NAME(probe), ##__VA_ARGS__) #define LWPROBE(probe, ...) LWPROBE_I(LWTRACE_GET_NAME(probe), ##__VA_ARGS__) -#define GLOBAL_LWPROBE_ENABLED(provider, probe) LWPROBE_ENABLED_I(LWTRACE_GET_NAMESPACE(provider)::LWTRACE_GET_NAME(probe)) -#define LWPROBE_ENABLED(probe) LWPROBE_ENABLED_I(LWTRACE_GET_NAME(probe)) +#define GLOBAL_LWPROBE_ENABLED(provider, probe) LWPROBE_ENABLED_I(LWTRACE_GET_NAMESPACE(provider)::LWTRACE_GET_NAME(probe)) +#define LWPROBE_ENABLED(probe) LWPROBE_ENABLED_I(LWTRACE_GET_NAME(probe)) #define LWPROBE_OBJ(probe, ...) LWPROBE_I(probe, ##__VA_ARGS__) // Calls a probe when scope is beeing left @@ -149,8 +149,8 @@ #else #define GLOBAL_LWPROBE(provider, probe, ...) #define LWPROBE(probe, ...) -#define GLOBAL_LWPROBE_ENABLED(provider, probe) false -#define LWPROBE_ENABLED(probe) false +#define GLOBAL_LWPROBE_ENABLED(provider, probe) false +#define LWPROBE_ENABLED(probe) false #define LWPROBE_OBJ(probe, ...) Y_UNUSED(probe) #define GLOBAL_LWPROBE_DURATION(provider, probe, ...) #define LWPROBE_DURATION(probe, ...) @@ -183,8 +183,8 @@ #define LWTRACE_GET_EVENTS(provider) NULL #define GLOBAL_LWPROBE(provider, probe, ...) #define LWPROBE(probe, ...) -#define GLOBAL_LWPROBE_ENABLED(provider, probe) false -#define LWPROBE_ENABLED(probe) false +#define GLOBAL_LWPROBE_ENABLED(provider, probe) false +#define LWPROBE_ENABLED(probe) false #define GLOBAL_LWPROBE_DURATION(provider, probe, ...) #define LWPROBE_DURATION(probe, ...) #define GLOBAL_LWTRACK(provider, probe, orbit, ...) diff --git a/library/cpp/lwtrace/preprocessor.h b/library/cpp/lwtrace/preprocessor.h index ca03f9383f..40865467b2 100644 --- a/library/cpp/lwtrace/preprocessor.h +++ b/library/cpp/lwtrace/preprocessor.h @@ -264,8 +264,8 @@ namespace NLWTrace { } \ } while (false) /**/ -#define LWPROBE_ENABLED_I(probe) ((probe).Probe.GetExecutorsCount() > 0) - +#define LWPROBE_ENABLED_I(probe) ((probe).Probe.GetExecutorsCount() > 0) + #define LWPROBE_DURATION_I(probetype, uniqid, probe, ...) probetype ::TScopedDuration uniqid(probe, 0 /* fake P0 - used for duration */, ##__VA_ARGS__); #define LWTRACK_I(probe, orbit, ...) \ diff --git a/library/cpp/lwtrace/shuttle.h b/library/cpp/lwtrace/shuttle.h index 6d3c8dd400..85c6e4da61 100644 --- a/library/cpp/lwtrace/shuttle.h +++ b/library/cpp/lwtrace/shuttle.h @@ -176,13 +176,13 @@ namespace NLWTrace { private: TShuttlePtr HeadNoLock; public: - TOrbit() = default; - TOrbit(const TOrbit&) = delete; - TOrbit(TOrbit&&) = default; - - TOrbit& operator=(const TOrbit&) = delete; - TOrbit& operator=(TOrbit&&) = default; - + TOrbit() = default; + TOrbit(const TOrbit&) = delete; + TOrbit(TOrbit&&) = default; + + TOrbit& operator=(const TOrbit&) = delete; + TOrbit& operator=(TOrbit&&) = default; + ~TOrbit() { Reset(); } diff --git a/library/cpp/monlib/dynamic_counters/percentile/percentile.h b/library/cpp/monlib/dynamic_counters/percentile/percentile.h index cbe3c7a87d..73c482bce9 100644 --- a/library/cpp/monlib/dynamic_counters/percentile/percentile.h +++ b/library/cpp/monlib/dynamic_counters/percentile/percentile.h @@ -1,6 +1,6 @@ #pragma once -#include "percentile_base.h" +#include "percentile_base.h" namespace NMonitoring { @@ -9,7 +9,7 @@ namespace NMonitoring { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template <size_t BUCKET_SIZE, size_t BUCKET_COUNT, size_t FRAME_COUNT> -struct TPercentileTracker : public TPercentileBase { +struct TPercentileTracker : public TPercentileBase { TAtomic Items[BUCKET_COUNT]; TAtomicBase Frame[FRAME_COUNT][BUCKET_COUNT]; size_t CurrentFrame; diff --git a/library/cpp/monlib/dynamic_counters/percentile/percentile_base.h b/library/cpp/monlib/dynamic_counters/percentile/percentile_base.h index e8b1a40899..d3c825c43d 100644 --- a/library/cpp/monlib/dynamic_counters/percentile/percentile_base.h +++ b/library/cpp/monlib/dynamic_counters/percentile/percentile_base.h @@ -1,36 +1,36 @@ -#pragma once - +#pragma once + #include <library/cpp/monlib/dynamic_counters/counters.h> - -#include <util/string/printf.h> - -namespace NMonitoring { - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Percentile tracker for monitoring -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -struct TPercentileBase : public TThrRefBase { - using TPercentile = std::pair<float, NMonitoring::TDynamicCounters::TCounterPtr>; - using TPercentiles = TVector<TPercentile>; - - TPercentiles Percentiles; - + +#include <util/string/printf.h> + +namespace NMonitoring { + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Percentile tracker for monitoring +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct TPercentileBase : public TThrRefBase { + using TPercentile = std::pair<float, NMonitoring::TDynamicCounters::TCounterPtr>; + using TPercentiles = TVector<TPercentile>; + + TPercentiles Percentiles; + void Initialize(const TIntrusivePtr<NMonitoring::TDynamicCounters> &counters, const TVector<float> &thresholds, TCountableBase::EVisibility visibility = TCountableBase::EVisibility::Public) { - Percentiles.reserve(thresholds.size()); - for (size_t i = 0; i < thresholds.size(); ++i) { - Percentiles.emplace_back(thresholds[i], + Percentiles.reserve(thresholds.size()); + for (size_t i = 0; i < thresholds.size(); ++i) { + Percentiles.emplace_back(thresholds[i], counters->GetNamedCounter("percentile", Sprintf("%.1f", thresholds[i] * 100.f), false, visibility)); - } - } - - void Initialize(const TIntrusivePtr<NMonitoring::TDynamicCounters> &counters, TString group, TString subgroup, + } + } + + void Initialize(const TIntrusivePtr<NMonitoring::TDynamicCounters> &counters, TString group, TString subgroup, TString name, const TVector<float> &thresholds, TCountableBase::EVisibility visibility = TCountableBase::EVisibility::Public) { - auto subCounters = counters->GetSubgroup(group, subgroup)->GetSubgroup("sensor", name); + auto subCounters = counters->GetSubgroup(group, subgroup)->GetSubgroup("sensor", name); Initialize(subCounters, thresholds, visibility); - } -}; - -} // NMonitoring + } +}; + +} // NMonitoring diff --git a/library/cpp/monlib/dynamic_counters/percentile/percentile_lg.h b/library/cpp/monlib/dynamic_counters/percentile/percentile_lg.h index 9f862d959f..0042cd9a6a 100644 --- a/library/cpp/monlib/dynamic_counters/percentile/percentile_lg.h +++ b/library/cpp/monlib/dynamic_counters/percentile/percentile_lg.h @@ -6,8 +6,8 @@ #include <cmath> -#include "percentile_base.h" - +#include "percentile_base.h" + namespace NMonitoring { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -15,14 +15,14 @@ namespace NMonitoring { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template <size_t BASE_BITS, size_t EXP_BITS, size_t FRAME_COUNT> -struct TPercentileTrackerLg : public TPercentileBase { - static constexpr size_t BUCKET_COUNT = size_t(1) << EXP_BITS; - static constexpr size_t BUCKET_SIZE = size_t(1) << BASE_BITS; +struct TPercentileTrackerLg : public TPercentileBase { + static constexpr size_t BUCKET_COUNT = size_t(1) << EXP_BITS; + static constexpr size_t BUCKET_SIZE = size_t(1) << BASE_BITS; static constexpr size_t ITEMS_COUNT = BUCKET_COUNT * BUCKET_SIZE; - static constexpr size_t TRACKER_LIMIT = BUCKET_SIZE * ((size_t(1) << BUCKET_COUNT) - 1) - - (size_t(1) << (BUCKET_COUNT - 1)); - static constexpr size_t MAX_GRANULARITY = size_t(1) << (BUCKET_COUNT - 1); - + static constexpr size_t TRACKER_LIMIT = BUCKET_SIZE * ((size_t(1) << BUCKET_COUNT) - 1) + - (size_t(1) << (BUCKET_COUNT - 1)); + static constexpr size_t MAX_GRANULARITY = size_t(1) << (BUCKET_COUNT - 1); + size_t Borders[BUCKET_COUNT]; TAtomic Items[ITEMS_COUNT]; TAtomicBase Frame[FRAME_COUNT][ITEMS_COUNT]; @@ -132,7 +132,7 @@ struct TPercentileTrackerLg : public TPercentileBase { } void Increment(size_t value) { - size_t bucket_idx = BucketIdxMostSignificantBit(value); + size_t bucket_idx = BucketIdxMostSignificantBit(value); size_t inside_bucket_idx = (value - Borders[bucket_idx] + (1 << bucket_idx) - 1) >> bucket_idx; size_t idx = bucket_idx * BUCKET_SIZE + inside_bucket_idx; AtomicIncrement(Items[Min(idx, ITEMS_COUNT - 1)]); diff --git a/library/cpp/monlib/dynamic_counters/percentile/percentile_ut.cpp b/library/cpp/monlib/dynamic_counters/percentile/percentile_ut.cpp index b30cf36328..6c8bb54ec9 100644 --- a/library/cpp/monlib/dynamic_counters/percentile/percentile_ut.cpp +++ b/library/cpp/monlib/dynamic_counters/percentile/percentile_ut.cpp @@ -5,58 +5,58 @@ using namespace NMonitoring; Y_UNIT_TEST_SUITE(PercentileTest) { -template<size_t A, size_t B, size_t B_BEGIN> -void printSizeAndLimit() { - using TPerc = TPercentileTrackerLg<A, B, 15>; - Cout << "TPercentileTrackerLg<" << A << ", " << B << ", 15>" - << "; sizeof# " << LeftPad(HumanReadableSize(sizeof(TPerc), SF_BYTES), 7) - << "; max_granularity# " << LeftPad(HumanReadableSize(TPerc::MAX_GRANULARITY, SF_QUANTITY), 5) - << "; limit# " << LeftPad(HumanReadableSize(TPerc::TRACKER_LIMIT , SF_QUANTITY), 5) << Endl; - if constexpr (B > 1) { - printSizeAndLimit<A, B - 1, B_BEGIN>(); - } else if constexpr (A > 1) { - Cout << Endl; - printSizeAndLimit<A - 1, B_BEGIN, B_BEGIN>(); - } -} - - Y_UNIT_TEST(PrintTrackerLgSizeAndLimits) { - printSizeAndLimit<10, 5, 5>(); - } - - Y_UNIT_TEST(TrackerLimitTest) { - { - using TPerc = TPercentileTrackerLg<1, 0, 1>; - TPerc tracker; - tracker.Increment(Max<size_t>()); - UNIT_ASSERT_EQUAL(TPerc::TRACKER_LIMIT, tracker.GetPercentile(1.0)); - } - { - using TPerc = TPercentileTrackerLg<1, 1, 1>; - TPerc tracker; - tracker.Increment(Max<size_t>()); - UNIT_ASSERT_EQUAL(TPerc::TRACKER_LIMIT, tracker.GetPercentile(1.0)); - } - { - using TPerc = TPercentileTrackerLg<1, 5, 1>; - TPerc tracker; - tracker.Increment(Max<size_t>()); - UNIT_ASSERT_EQUAL(TPerc::TRACKER_LIMIT, tracker.GetPercentile(1.0)); - } - { - using TPerc = TPercentileTrackerLg<2, 1, 1>; - TPerc tracker; - tracker.Increment(Max<size_t>()); - UNIT_ASSERT_EQUAL(TPerc::TRACKER_LIMIT, tracker.GetPercentile(1.0)); - } - { - using TPerc = TPercentileTrackerLg<5, 4, 1>; - TPerc tracker; - tracker.Increment(Max<size_t>()); - UNIT_ASSERT_EQUAL(TPerc::TRACKER_LIMIT, tracker.GetPercentile(1.0)); - } - } - +template<size_t A, size_t B, size_t B_BEGIN> +void printSizeAndLimit() { + using TPerc = TPercentileTrackerLg<A, B, 15>; + Cout << "TPercentileTrackerLg<" << A << ", " << B << ", 15>" + << "; sizeof# " << LeftPad(HumanReadableSize(sizeof(TPerc), SF_BYTES), 7) + << "; max_granularity# " << LeftPad(HumanReadableSize(TPerc::MAX_GRANULARITY, SF_QUANTITY), 5) + << "; limit# " << LeftPad(HumanReadableSize(TPerc::TRACKER_LIMIT , SF_QUANTITY), 5) << Endl; + if constexpr (B > 1) { + printSizeAndLimit<A, B - 1, B_BEGIN>(); + } else if constexpr (A > 1) { + Cout << Endl; + printSizeAndLimit<A - 1, B_BEGIN, B_BEGIN>(); + } +} + + Y_UNIT_TEST(PrintTrackerLgSizeAndLimits) { + printSizeAndLimit<10, 5, 5>(); + } + + Y_UNIT_TEST(TrackerLimitTest) { + { + using TPerc = TPercentileTrackerLg<1, 0, 1>; + TPerc tracker; + tracker.Increment(Max<size_t>()); + UNIT_ASSERT_EQUAL(TPerc::TRACKER_LIMIT, tracker.GetPercentile(1.0)); + } + { + using TPerc = TPercentileTrackerLg<1, 1, 1>; + TPerc tracker; + tracker.Increment(Max<size_t>()); + UNIT_ASSERT_EQUAL(TPerc::TRACKER_LIMIT, tracker.GetPercentile(1.0)); + } + { + using TPerc = TPercentileTrackerLg<1, 5, 1>; + TPerc tracker; + tracker.Increment(Max<size_t>()); + UNIT_ASSERT_EQUAL(TPerc::TRACKER_LIMIT, tracker.GetPercentile(1.0)); + } + { + using TPerc = TPercentileTrackerLg<2, 1, 1>; + TPerc tracker; + tracker.Increment(Max<size_t>()); + UNIT_ASSERT_EQUAL(TPerc::TRACKER_LIMIT, tracker.GetPercentile(1.0)); + } + { + using TPerc = TPercentileTrackerLg<5, 4, 1>; + TPerc tracker; + tracker.Increment(Max<size_t>()); + UNIT_ASSERT_EQUAL(TPerc::TRACKER_LIMIT, tracker.GetPercentile(1.0)); + } + } + Y_UNIT_TEST(BucketIdxIfvsBucketIdxBinarySearch) { for (size_t var = 0; var < 5; var++) { if (var == 0) { diff --git a/ydb/core/base/appdata.cpp b/ydb/core/base/appdata.cpp index bcd402736f..f9e517fc42 100644 --- a/ydb/core/base/appdata.cpp +++ b/ydb/core/base/appdata.cpp @@ -25,7 +25,7 @@ TAppData::TAppData( , CompilerSchemeCacheTables(Max<ui64>() / 4) , Mon(nullptr) , BusMonPage(nullptr) - , Icb(new TControlBoard()) + , Icb(new TControlBoard()) , InFlightLimiterRegistry(new NGRpcService::TInFlightLimiterRegistry(Icb)) , StaticBlobStorageConfig(new NKikimrBlobStorage::TNodeWardenServiceSet) , KikimrShouldContinue(kikimrShouldContinue) diff --git a/ydb/core/base/appdata.h b/ydb/core/base/appdata.h index 33dc3edd72..c666f7468c 100644 --- a/ydb/core/base/appdata.h +++ b/ydb/core/base/appdata.h @@ -49,11 +49,11 @@ namespace NMiniKQL { } namespace NDataShard { - class IExportFactory; + class IExportFactory; } namespace NSQS { - class IEventsWriterFactory; + class IEventsWriterFactory; class IAuthFactory; } @@ -75,9 +75,9 @@ struct TAppData { const NScheme::TTypeRegistry* TypeRegistry = nullptr; const NMiniKQL::IFunctionRegistry* FunctionRegistry = nullptr; - const NDataShard::IExportFactory *DataShardExportFactory = nullptr; + const NDataShard::IExportFactory *DataShardExportFactory = nullptr; const TFormatFactory* FormatFactory = nullptr; - const NSQS::IEventsWriterFactory* SqsEventsWriterFactory = nullptr; + const NSQS::IEventsWriterFactory* SqsEventsWriterFactory = nullptr; NSQS::IAuthFactory* SqsAuthFactory = nullptr; @@ -85,7 +85,7 @@ struct TAppData { const NPQ::IPersQueueMirrorReaderFactory* PersQueueMirrorReaderFactory = nullptr; NYdb::TDriver* YdbDriver = nullptr; - const NPDisk::IIoContextFactory* IoContextFactory = nullptr; + const NPDisk::IIoContextFactory* IoContextFactory = nullptr; struct TDefaultTabletTypes { TTabletTypes::EType SchemeShard; @@ -124,7 +124,7 @@ struct TAppData { NActors::TMon* Mon; NMonitoring::TDynamicCounterPtr Counters; NMonitoring::TBusNgMonPage* BusMonPage; - TIntrusivePtr<NKikimr::TControlBoard> Icb; + TIntrusivePtr<NKikimr::TControlBoard> Icb; TIntrusivePtr<NGRpcService::TInFlightLimiterRegistry> InFlightLimiterRegistry; TIntrusivePtr<NInterconnect::TPollerThreads> PollerThreads; @@ -140,7 +140,7 @@ struct TAppData { NKikimrConfig::TSqsConfig SqsConfig; NKikimrProto::TAuthConfig AuthConfig; NKikimrProto::TKeyConfig KeyConfig; - NKikimrProto::TKeyConfig PDiskKeyConfig; + NKikimrProto::TKeyConfig PDiskKeyConfig; TFeatureFlags FeatureFlags; NKikimrConfig::THiveConfig HiveConfig; NKikimrConfig::TDataShardConfig DataShardConfig; diff --git a/ydb/core/base/blobstorage.cpp b/ydb/core/base/blobstorage.cpp index 0c8bea320f..60bef7f4df 100644 --- a/ydb/core/base/blobstorage.cpp +++ b/ydb/core/base/blobstorage.cpp @@ -1,49 +1,49 @@ -#include "blobstorage.h" - -namespace NKikimr { - -NKikimrBlobStorage::EPDiskType PDiskTypeToPDiskType(const TPDiskCategory::EDeviceType type) { - switch (type) { - case TPDiskCategory::DEVICE_TYPE_ROT: - return NKikimrBlobStorage::EPDiskType::ROT; - case TPDiskCategory::DEVICE_TYPE_SSD: - return NKikimrBlobStorage::EPDiskType::SSD; - case TPDiskCategory::DEVICE_TYPE_NVME: - return NKikimrBlobStorage::EPDiskType::NVME; - case TPDiskCategory::DEVICE_TYPE_UNKNOWN: - return NKikimrBlobStorage::EPDiskType::UNKNOWN_TYPE; - default: - Y_FAIL("Device type is unknown; type# %" PRIu64, (ui64)type); - } -} - -TPDiskCategory::EDeviceType PDiskTypeToPDiskType(const NKikimrBlobStorage::EPDiskType type) { - switch (type) { - case NKikimrBlobStorage::EPDiskType::ROT: - return TPDiskCategory::DEVICE_TYPE_ROT; - case NKikimrBlobStorage::EPDiskType::SSD: - return TPDiskCategory::DEVICE_TYPE_SSD; - case NKikimrBlobStorage::EPDiskType::NVME: - return TPDiskCategory::DEVICE_TYPE_NVME; - case NKikimrBlobStorage::EPDiskType::UNKNOWN_TYPE: - return TPDiskCategory::DEVICE_TYPE_UNKNOWN; - default: - Y_FAIL("Device type is unknown; type# %" PRIu64, (ui64)type); - } -} - -bool operator==(const TPDiskCategory x, const TPDiskCategory y) { - return x.Kind() == y.Kind() && x.Type() == y.Type(); -} - -bool operator!=(const TPDiskCategory x, const TPDiskCategory y) { - return !(x == y); -} - -bool operator<(const TPDiskCategory x, const TPDiskCategory y) { - return std::make_tuple(x.Type(), x.Kind()) < std::make_tuple(y.Type(), y.Kind()); -} - +#include "blobstorage.h" + +namespace NKikimr { + +NKikimrBlobStorage::EPDiskType PDiskTypeToPDiskType(const TPDiskCategory::EDeviceType type) { + switch (type) { + case TPDiskCategory::DEVICE_TYPE_ROT: + return NKikimrBlobStorage::EPDiskType::ROT; + case TPDiskCategory::DEVICE_TYPE_SSD: + return NKikimrBlobStorage::EPDiskType::SSD; + case TPDiskCategory::DEVICE_TYPE_NVME: + return NKikimrBlobStorage::EPDiskType::NVME; + case TPDiskCategory::DEVICE_TYPE_UNKNOWN: + return NKikimrBlobStorage::EPDiskType::UNKNOWN_TYPE; + default: + Y_FAIL("Device type is unknown; type# %" PRIu64, (ui64)type); + } +} + +TPDiskCategory::EDeviceType PDiskTypeToPDiskType(const NKikimrBlobStorage::EPDiskType type) { + switch (type) { + case NKikimrBlobStorage::EPDiskType::ROT: + return TPDiskCategory::DEVICE_TYPE_ROT; + case NKikimrBlobStorage::EPDiskType::SSD: + return TPDiskCategory::DEVICE_TYPE_SSD; + case NKikimrBlobStorage::EPDiskType::NVME: + return TPDiskCategory::DEVICE_TYPE_NVME; + case NKikimrBlobStorage::EPDiskType::UNKNOWN_TYPE: + return TPDiskCategory::DEVICE_TYPE_UNKNOWN; + default: + Y_FAIL("Device type is unknown; type# %" PRIu64, (ui64)type); + } +} + +bool operator==(const TPDiskCategory x, const TPDiskCategory y) { + return x.Kind() == y.Kind() && x.Type() == y.Type(); +} + +bool operator!=(const TPDiskCategory x, const TPDiskCategory y) { + return !(x == y); +} + +bool operator<(const TPDiskCategory x, const TPDiskCategory y) { + return std::make_tuple(x.Type(), x.Kind()) < std::make_tuple(y.Type(), y.Kind()); +} + std::unique_ptr<TEvBlobStorage::TEvPutResult> TEvBlobStorage::TEvPut::MakeErrorResponse( NKikimrProto::EReplyStatus status, const TString& errorReason, ui32 groupId) { auto res = std::make_unique<TEvPutResult>(status, Id, TStorageStatusFlags(), groupId, 0.0f); @@ -115,15 +115,15 @@ std::unique_ptr<TEvBlobStorage::TEvStatusResult> TEvBlobStorage::TEvStatus::Make return res; } -}; - -template<> -void Out<NKikimr::TStorageStatusFlags>(IOutputStream& o, - typename TTypeTraits<NKikimr::TStorageStatusFlags>::TFuncParam x) { - return x.Output(o); -} - -template<> -void Out<NKikimr::TPDiskCategory>(IOutputStream &str, const NKikimr::TPDiskCategory &value) { - str << value.ToString(); -} +}; + +template<> +void Out<NKikimr::TStorageStatusFlags>(IOutputStream& o, + typename TTypeTraits<NKikimr::TStorageStatusFlags>::TFuncParam x) { + return x.Output(o); +} + +template<> +void Out<NKikimr::TPDiskCategory>(IOutputStream &str, const NKikimr::TPDiskCategory &value) { + str << value.ToString(); +} diff --git a/ydb/core/base/blobstorage.h b/ydb/core/base/blobstorage.h index a010718b44..a2faee326e 100644 --- a/ydb/core/base/blobstorage.h +++ b/ydb/core/base/blobstorage.h @@ -1,7 +1,7 @@ #pragma once #include "defs.h" -#include "blobstorage_pdisk_category.h" +#include "blobstorage_pdisk_category.h" #include "events.h" #include "tablet_types.h" #include "logoblob.h" @@ -17,7 +17,7 @@ #include <ydb/library/wilson/wilson_event.h> #include <library/cpp/lwtrace/shuttle.h> - + #include <util/stream/str.h> #include <util/generic/xrange.h> @@ -84,10 +84,10 @@ struct TStorageStatusFlags { } }; -NKikimrBlobStorage::EPDiskType PDiskTypeToPDiskType(const TPDiskCategory::EDeviceType type); - -TPDiskCategory::EDeviceType PDiskTypeToPDiskType(const NKikimrBlobStorage::EPDiskType type); - +NKikimrBlobStorage::EPDiskType PDiskTypeToPDiskType(const TPDiskCategory::EDeviceType type); + +TPDiskCategory::EDeviceType PDiskTypeToPDiskType(const NKikimrBlobStorage::EPDiskType type); + enum EGroupConfigurationType { GroupConfigurationTypeStatic = 0, GroupConfigurationTypeDynamic = 1 @@ -522,13 +522,13 @@ struct TEvBlobStorage { // vdisk <-> controller interface, EvCnt = EvPut + 6 * 512, /// 268 635 136 EvVGenerationChange, - EvRegisterPDiskLoadActor, + EvRegisterPDiskLoadActor, EvStatusUpdate, EvDropDonor, EvCntReply = EvPut + 7 * 512, /// 268 635 648 EvVGenerationChangeResult, - EvRegisterPDiskLoadActorResult, + EvRegisterPDiskLoadActorResult, // internal vdisk interface EvYardInit = EvPut + 8 * 512, /// 268 636 160 @@ -630,16 +630,16 @@ struct TEvBlobStorage { EvAnubisQuantumDone, EvAnubisCandidates, EvAnubisVGet, - EvChunksLock, + EvChunksLock, EvChunksUnlock, // 268 636 260 - EvWhiteboardReportResult, - EvHttpInfoResult, - EvReadLogContinue, - EvLogSectorRestore, - EvLogInitResult, + EvWhiteboardReportResult, + EvHttpInfoResult, + EvReadLogContinue, + EvLogSectorRestore, + EvLogInitResult, EvAskForCutLog, EvDelLogoBlobDataSyncLog, - EvPDiskFormattingFinished, + EvPDiskFormattingFinished, EvRecoveryLogReplayDone, EvMonStreamQuery, // 268 636 270 EvMonStreamActorDeathNote, @@ -700,8 +700,8 @@ struct TEvBlobStorage { EvOsirisDone, EvSyncLogWriteDone, EvAnubisVGetResult, - EvChunksLockResult, - EvChunksUnlockResult, + EvChunksLockResult, + EvChunksUnlockResult, EvDelLogoBlobDataSyncLogResult, EvAddBulkSstResult, /// 268 636 702 EvAddBulkSstCommitted, @@ -713,7 +713,7 @@ struct TEvBlobStorage { EvReplResume, EvReplDone, EvFreshAppendixCompactionDone, - EvDeviceError, + EvDeviceError, EvHugeLockChunksResult, EvHugeStatResult, @@ -732,9 +732,9 @@ struct TEvBlobStorage { EvAbortOperation, EvResume, EvTimeStats, - EvOverseerRequest, // Not used - EvOverseerLogLastLsn, // Not used - EvOverseerConfirm, // Not used + EvOverseerRequest, // Not used + EvOverseerLogLastLsn, // Not used + EvOverseerConfirm, // Not used EvLatencyReport, EvGroupStatReport, EvAccelerateGet, @@ -774,7 +774,7 @@ struct TEvBlobStorage { EvControllerScrubQueryStartQuantum, EvControllerScrubQuantumFinished, EvControllerScrubReportQuantumInProgress, - EvControllerUpdateNodeDrives, + EvControllerUpdateNodeDrives, // EvControllerReadSchemeStringResult = EvPut + 12 * 512, // EvControllerReadDataStringResult, @@ -809,9 +809,9 @@ struct TEvBlobStorage { // node controller internal messages EvRegisterNodeRetry = EvPut + 14 * 512, - EvAskRestartPDisk, - EvRestartPDisk, - EvRestartPDiskResult, + EvAskRestartPDisk, + EvRestartPDisk, + EvRestartPDiskResult, EvNodeWardenQueryGroupInfo, EvNodeWardenGroupInfo, @@ -883,7 +883,7 @@ struct TEvBlobStorage { const TInstant Deadline; const NKikimrBlobStorage::EPutHandleClass HandleClass; const ETactic Tactic; - mutable NLWTrace::TOrbit Orbit; + mutable NLWTrace::TOrbit Orbit; ui32 RestartCounter = 0; TEvPut(const TLogoBlobID &id, const TString &buffer, TInstant deadline, @@ -2036,7 +2036,7 @@ struct TEvBlobStorage { struct TEvControllerGetGroup; struct TEvControllerUpdateDiskStatus; struct TEvControllerUpdateGroupStat; - struct TEvControllerUpdateNodeDrives; + struct TEvControllerUpdateNodeDrives; struct TEvControllerNodeServiceSetUpdate; struct TEvControllerProposeGroupKey; struct TEvControllerSelectGroupsResult; @@ -2059,10 +2059,10 @@ struct TEvBlobStorage { struct TEvDropDonor; struct TEvBunchOfEvents; - - struct TEvAskRestartPDisk; - struct TEvRestartPDisk; - struct TEvRestartPDiskResult; + + struct TEvAskRestartPDisk; + struct TEvRestartPDisk; + struct TEvRestartPDiskResult; }; // EPutHandleClass defines BlobStorage queue to a request to diff --git a/ydb/core/base/blobstorage_grouptype.cpp b/ydb/core/base/blobstorage_grouptype.cpp index 72ac901e02..66c79ba42c 100644 --- a/ydb/core/base/blobstorage_grouptype.cpp +++ b/ydb/core/base/blobstorage_grouptype.cpp @@ -47,14 +47,14 @@ static const std::array<TBlobStorageErasureParameters, TErasureType::ErasureSpec ,{2} // 7 = ErasureSpicies::Erasure3Plus2Stipe ,{2} // 8 = ErasureSpicies::ErasureMirror3Plus2 ,{6} // 9 = ErasureSpicies::ErasireMirror3dc - ,{3} // 10 = ErasureSpicies::Erasure4Plus3Block - ,{3} // 11 = ErasureSpicies::Erasure4Plus3Stripe - ,{3} // 12 = ErasureSpicies::Erasure3Plus3Block - ,{3} // 13 = ErasureSpicies::Erasure3Plus3Stripe - ,{3} // 14 = ErasureSpicies::Erasure2Plus3Block - ,{3} // 15 = ErasureSpicies::Erasure2Plus3Stripe - ,{2} // 16 = ErasureSpicies::Erasure2Plus2Block - ,{2} // 17 = ErasureSpicies::Erasure2Plus2Stripe + ,{3} // 10 = ErasureSpicies::Erasure4Plus3Block + ,{3} // 11 = ErasureSpicies::Erasure4Plus3Stripe + ,{3} // 12 = ErasureSpicies::Erasure3Plus3Block + ,{3} // 13 = ErasureSpicies::Erasure3Plus3Stripe + ,{3} // 14 = ErasureSpicies::Erasure2Plus3Block + ,{3} // 15 = ErasureSpicies::Erasure2Plus3Stripe + ,{2} // 16 = ErasureSpicies::Erasure2Plus2Block + ,{2} // 17 = ErasureSpicies::Erasure2Plus2Stripe ,{5} // 18 = ErasureSpicies::ErasureMirror3of4 }}; diff --git a/ydb/core/base/blobstorage_pdisk_category.h b/ydb/core/base/blobstorage_pdisk_category.h index e22413e832..8c25b4dc25 100644 --- a/ydb/core/base/blobstorage_pdisk_category.h +++ b/ydb/core/base/blobstorage_pdisk_category.h @@ -1,127 +1,127 @@ -#pragma once - -#include <util/generic/string.h> -#include <util/stream/str.h> -#include <util/string/printf.h> -#include <util/system/types.h> - -namespace NKikimr { - - -class TPDiskCategory { - union { - struct { - ui64 IsSolidState : 1; - ui64 Kind : 55; - ui64 TypeExt : 8; - } N; - - ui64 X; - } Raw; - - // For compatibility TypeExt not used for old types (ROT, SSD), so followed scheme is used: - // ROT -> IsSolidState# 0, TypeExt# 0 - // SSD -> IsSolidState# 1, TypeExt# 0 - // NVME -> IsSolidState# 1, TypeExt# 2 - -public: - enum EDeviceType : ui8 { - DEVICE_TYPE_ROT = 0, - DEVICE_TYPE_SSD = 1, - DEVICE_TYPE_NVME = 2, - DEVICE_TYPE_UNKNOWN = 255, - }; - - static TPDiskCategory::EDeviceType DeviceTypeFromStr(const TString &typeName) { - if (typeName == "ROT" || typeName == "DEVICE_TYPE_ROT") { - return DEVICE_TYPE_ROT; - } else if (typeName == "SSD" || typeName == "DEVICE_TYPE_SSD") { - return DEVICE_TYPE_SSD; - } else if (typeName == "NVME" || typeName == "DEVICE_TYPE_NVME") { - return DEVICE_TYPE_NVME; - } - return DEVICE_TYPE_UNKNOWN; - } - - static TString DeviceTypeStr(const TPDiskCategory::EDeviceType type, bool isShort) { - switch(type) { - case DEVICE_TYPE_ROT: - return isShort ? "ROT" : "DEVICE_TYPE_ROT"; - case DEVICE_TYPE_SSD: - return isShort ? "SSD" : "DEVICE_TYPE_SSD"; - case DEVICE_TYPE_NVME: - return isShort ? "NVME" : "DEVICE_TYPE_NVME"; - default: - return Sprintf("DEVICE_TYPE_UNKNOWN(%" PRIu64 ")", (ui64)type); - } - } - - TPDiskCategory() = default; - - TPDiskCategory(ui64 raw) { - Raw.X = raw; - } - - TPDiskCategory(EDeviceType type, ui64 kind) { - Raw.N.TypeExt = 0; - if (type == DEVICE_TYPE_NVME) { - Raw.N.TypeExt = type; - Y_VERIFY(Raw.N.TypeExt == type, "type# %" PRIu64 " is out of range!", (ui64)type); - } - Raw.N.IsSolidState = (type == DEVICE_TYPE_SSD || type == DEVICE_TYPE_NVME); - Raw.N.Kind = kind; - Y_VERIFY(Raw.N.Kind == kind, "kind# %" PRIu64 " is out of range!", (ui64)kind); - } - - ui64 GetRaw() const { - return Raw.X; - } - - operator ui64() const { - return Raw.X; - } - - bool IsSolidState() const { - return Raw.N.IsSolidState || Raw.N.TypeExt == DEVICE_TYPE_SSD || Raw.N.TypeExt == DEVICE_TYPE_NVME; - } - - EDeviceType Type() const { - if (Raw.N.TypeExt == 0) { - if (Raw.N.IsSolidState) { - return DEVICE_TYPE_SSD; - } else { - return DEVICE_TYPE_ROT; - } - } - return static_cast<EDeviceType>(Raw.N.TypeExt); - } - - TString TypeStrLong() const { - return DeviceTypeStr(Type(), false); - } - - TString TypeStrShort() const { - return DeviceTypeStr(Type(), true); - } - - ui64 Kind() const { - return Raw.N.Kind; - } - - TString ToString() const { - TStringStream str; - str << "{Type# " << DeviceTypeStr(Type(), false); - str << " Kind# " << Raw.N.Kind; - str << "}"; - return str.Str(); - } -}; - -static_assert(sizeof(TPDiskCategory) == sizeof(ui64), "sizeof(TPDiskCategory) must be 8 bytes!"); - -bool operator==(const TPDiskCategory x, const TPDiskCategory y); -bool operator!=(const TPDiskCategory x, const TPDiskCategory y); - -bool operator<(const TPDiskCategory x, const TPDiskCategory y); - -} +#pragma once + +#include <util/generic/string.h> +#include <util/stream/str.h> +#include <util/string/printf.h> +#include <util/system/types.h> + +namespace NKikimr { + + +class TPDiskCategory { + union { + struct { + ui64 IsSolidState : 1; + ui64 Kind : 55; + ui64 TypeExt : 8; + } N; + + ui64 X; + } Raw; + + // For compatibility TypeExt not used for old types (ROT, SSD), so followed scheme is used: + // ROT -> IsSolidState# 0, TypeExt# 0 + // SSD -> IsSolidState# 1, TypeExt# 0 + // NVME -> IsSolidState# 1, TypeExt# 2 + +public: + enum EDeviceType : ui8 { + DEVICE_TYPE_ROT = 0, + DEVICE_TYPE_SSD = 1, + DEVICE_TYPE_NVME = 2, + DEVICE_TYPE_UNKNOWN = 255, + }; + + static TPDiskCategory::EDeviceType DeviceTypeFromStr(const TString &typeName) { + if (typeName == "ROT" || typeName == "DEVICE_TYPE_ROT") { + return DEVICE_TYPE_ROT; + } else if (typeName == "SSD" || typeName == "DEVICE_TYPE_SSD") { + return DEVICE_TYPE_SSD; + } else if (typeName == "NVME" || typeName == "DEVICE_TYPE_NVME") { + return DEVICE_TYPE_NVME; + } + return DEVICE_TYPE_UNKNOWN; + } + + static TString DeviceTypeStr(const TPDiskCategory::EDeviceType type, bool isShort) { + switch(type) { + case DEVICE_TYPE_ROT: + return isShort ? "ROT" : "DEVICE_TYPE_ROT"; + case DEVICE_TYPE_SSD: + return isShort ? "SSD" : "DEVICE_TYPE_SSD"; + case DEVICE_TYPE_NVME: + return isShort ? "NVME" : "DEVICE_TYPE_NVME"; + default: + return Sprintf("DEVICE_TYPE_UNKNOWN(%" PRIu64 ")", (ui64)type); + } + } + + TPDiskCategory() = default; + + TPDiskCategory(ui64 raw) { + Raw.X = raw; + } + + TPDiskCategory(EDeviceType type, ui64 kind) { + Raw.N.TypeExt = 0; + if (type == DEVICE_TYPE_NVME) { + Raw.N.TypeExt = type; + Y_VERIFY(Raw.N.TypeExt == type, "type# %" PRIu64 " is out of range!", (ui64)type); + } + Raw.N.IsSolidState = (type == DEVICE_TYPE_SSD || type == DEVICE_TYPE_NVME); + Raw.N.Kind = kind; + Y_VERIFY(Raw.N.Kind == kind, "kind# %" PRIu64 " is out of range!", (ui64)kind); + } + + ui64 GetRaw() const { + return Raw.X; + } + + operator ui64() const { + return Raw.X; + } + + bool IsSolidState() const { + return Raw.N.IsSolidState || Raw.N.TypeExt == DEVICE_TYPE_SSD || Raw.N.TypeExt == DEVICE_TYPE_NVME; + } + + EDeviceType Type() const { + if (Raw.N.TypeExt == 0) { + if (Raw.N.IsSolidState) { + return DEVICE_TYPE_SSD; + } else { + return DEVICE_TYPE_ROT; + } + } + return static_cast<EDeviceType>(Raw.N.TypeExt); + } + + TString TypeStrLong() const { + return DeviceTypeStr(Type(), false); + } + + TString TypeStrShort() const { + return DeviceTypeStr(Type(), true); + } + + ui64 Kind() const { + return Raw.N.Kind; + } + + TString ToString() const { + TStringStream str; + str << "{Type# " << DeviceTypeStr(Type(), false); + str << " Kind# " << Raw.N.Kind; + str << "}"; + return str.Str(); + } +}; + +static_assert(sizeof(TPDiskCategory) == sizeof(ui64), "sizeof(TPDiskCategory) must be 8 bytes!"); + +bool operator==(const TPDiskCategory x, const TPDiskCategory y); +bool operator!=(const TPDiskCategory x, const TPDiskCategory y); + +bool operator<(const TPDiskCategory x, const TPDiskCategory y); + +} diff --git a/ydb/core/base/compile_time_flags.h b/ydb/core/base/compile_time_flags.h index a0775d956a..4259148ee3 100644 --- a/ydb/core/base/compile_time_flags.h +++ b/ydb/core/base/compile_time_flags.h @@ -38,7 +38,7 @@ #ifndef KIKIMR_USE_PROTOBUF_WITH_PAYLOAD #define KIKIMR_USE_PROTOBUF_WITH_PAYLOAD 1 #endif - + // This feature flag enables use of column families in tables // Runtime support is expected to ship in 19-6, may be enabled in 19-8 #ifndef KIKIMR_SCHEMESHARD_ALLOW_COLUMN_FAMILIES @@ -60,13 +60,13 @@ #ifndef KIKIMR_ALLOW_SSREPLICA_PROBES #define KIKIMR_ALLOW_SSREPLICA_PROBES 0 #endif - -// This feature enables cutting PDisk's log from the middle of log chunks list -#ifndef KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE -#define KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE true -#endif - -// This feature flag enables PDisk to use t1ha hash in sector footer checksums -#ifndef KIKIMR_PDISK_ENABLE_T1HA_HASH_WRITING + +// This feature enables cutting PDisk's log from the middle of log chunks list +#ifndef KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE +#define KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE true +#endif + +// This feature flag enables PDisk to use t1ha hash in sector footer checksums +#ifndef KIKIMR_PDISK_ENABLE_T1HA_HASH_WRITING #define KIKIMR_PDISK_ENABLE_T1HA_HASH_WRITING true -#endif +#endif diff --git a/ydb/core/base/logoblob.h b/ydb/core/base/logoblob.h index 5aa560728a..36eea54393 100644 --- a/ydb/core/base/logoblob.h +++ b/ydb/core/base/logoblob.h @@ -38,12 +38,12 @@ namespace NKikimr { Set(tabletId, generation, step, channel, blobSize, cookie, partId, 0); } - explicit TLogoBlobID(ui64 tabletId, ui32 generation, ui32 step, ui32 channel, ui32 blobSize, ui32 cookie, - ui32 partId, ui32 crcMode) - { - Set(tabletId, generation, step, channel, blobSize, cookie, partId, crcMode); - } - + explicit TLogoBlobID(ui64 tabletId, ui32 generation, ui32 step, ui32 channel, ui32 blobSize, ui32 cookie, + ui32 partId, ui32 crcMode) + { + Set(tabletId, generation, step, channel, blobSize, cookie, partId, crcMode); + } + explicit TLogoBlobID(ui64 raw1, ui64 raw2, ui64 raw3) { Raw.X[0] = raw1; diff --git a/ydb/core/base/services/blobstorage_service_id.h b/ydb/core/base/services/blobstorage_service_id.h index 14935628b1..43f6eceda6 100644 --- a/ydb/core/base/services/blobstorage_service_id.h +++ b/ydb/core/base/services/blobstorage_service_id.h @@ -1,50 +1,50 @@ -#pragma once - -#include "defs.h" -#include <util/generic/string.h> - -namespace NKikimr { - +#pragma once + +#include "defs.h" +#include <util/generic/string.h> + +namespace NKikimr { + inline TActorId MakeBlobStorageProxyID(ui32 blobStorageGroup) { - char x[12] = {'b', 's', 'p', 'r', 'o', 'x', 'y' }; - x[7] = (char)blobStorageGroup; - x[8] = (char)(blobStorageGroup >> 8); - x[9] = (char)(blobStorageGroup >> 16); - x[10] = (char)(blobStorageGroup >> 24); + char x[12] = {'b', 's', 'p', 'r', 'o', 'x', 'y' }; + x[7] = (char)blobStorageGroup; + x[8] = (char)(blobStorageGroup >> 8); + x[9] = (char)(blobStorageGroup >> 16); + x[10] = (char)(blobStorageGroup >> 24); return TActorId(0, TStringBuf(x, 12)); -} - +} + inline TActorId MakeBlobStorageLoadID(ui32 nodeId) { - char x[12] = {'b', 's', 'l', 'o', 'a', 'd', 'd', 0}; - x[8] = (char)(nodeId >> 24); - x[9] = (char)(nodeId >> 16); - x[10] = (char)(nodeId >> 8); - x[11] = (char)nodeId; + char x[12] = {'b', 's', 'l', 'o', 'a', 'd', 'd', 0}; + x[8] = (char)(nodeId >> 24); + x[9] = (char)(nodeId >> 16); + x[10] = (char)(nodeId >> 8); + x[11] = (char)nodeId; return TActorId(nodeId, TStringBuf(x, 12)); -} - +} + inline TActorId MakeBlobStorageFailureInjectionID(ui32 nodeId) { - char x[12] = {'b', 's', 'F', 'a', 'i', 'l', 'I', 'n'}; - x[8] = (char)(nodeId >> 24); - x[9] = (char)(nodeId >> 16); - x[10] = (char)(nodeId >> 8); - x[11] = (char)nodeId; + char x[12] = {'b', 's', 'F', 'a', 'i', 'l', 'I', 'n'}; + x[8] = (char)(nodeId >> 24); + x[9] = (char)(nodeId >> 16); + x[10] = (char)(nodeId >> 8); + x[11] = (char)nodeId; return TActorId(nodeId, TStringBuf(x, 12)); -} - +} + inline TActorId MakeBlobStorageVDiskID(ui32 node, ui32 pDiskID, ui32 vDiskSlotID) { - char x[12] = {'b','s','v','d'}; - x[4] = (char)pDiskID; - x[5] = (char)(pDiskID >> 8); - x[6] = (char)(pDiskID >> 16); - x[7] = (char)(pDiskID >> 24); - x[8] = (char)vDiskSlotID; - x[9] = (char)(vDiskSlotID >> 8); - x[10] = (char)(vDiskSlotID >> 16); - x[11] = (char)(vDiskSlotID >> 24); + char x[12] = {'b','s','v','d'}; + x[4] = (char)pDiskID; + x[5] = (char)(pDiskID >> 8); + x[6] = (char)(pDiskID >> 16); + x[7] = (char)(pDiskID >> 24); + x[8] = (char)vDiskSlotID; + x[9] = (char)(vDiskSlotID >> 8); + x[10] = (char)(vDiskSlotID >> 16); + x[11] = (char)(vDiskSlotID >> 24); return TActorId(node, TStringBuf(x, 12)); -} - +} + inline std::tuple<ui32, ui32, ui32> DecomposeVDiskServiceId(const TActorId& actorId) { Y_VERIFY(actorId.IsService()); const TStringBuf serviceId = actorId.ServiceId(); @@ -59,22 +59,22 @@ inline std::tuple<ui32, ui32, ui32> DecomposeVDiskServiceId(const TActorId& acto } inline TActorId MakeBlobStoragePDiskID(ui32 node, ui32 pDiskID) { - char x[12] = {'b','s','p','d','i','s','k', 0}; - x[8] = (char)pDiskID; - x[9] = (char)(pDiskID >> 8); - x[10] = (char)(pDiskID >> 16); - x[11] = (char)(pDiskID >> 24); + char x[12] = {'b','s','p','d','i','s','k', 0}; + x[8] = (char)pDiskID; + x[9] = (char)(pDiskID >> 8); + x[10] = (char)(pDiskID >> 16); + x[11] = (char)(pDiskID >> 24); return TActorId(node, TStringBuf(x, 12)); -} - +} + inline TActorId MakeBlobStorageReplBrokerID() { - char x[12] = {'b', 's', 'r', 'e', 'p', 'l', 'b', 'r', 'o', 'k', 'e', 'r'}; + char x[12] = {'b', 's', 'r', 'e', 'p', 'l', 'b', 'r', 'o', 'k', 'e', 'r'}; return TActorId(0, TStringBuf(x, 12)); -} - +} + inline TActorId MakeBlobStorageNodeWardenID(ui32 node) { - char x[12] = {'b','s','n','o','d','e','c','n','t','r','l','r'}; + char x[12] = {'b','s','n','o','d','e','c','n','t','r','l','r'}; return TActorId(node, TStringBuf(x, 12)); -} - -} // namespace NKikimr +} + +} // namespace NKikimr diff --git a/ydb/core/base/services/defs.h b/ydb/core/base/services/defs.h index d326476347..9c81773dfa 100644 --- a/ydb/core/base/services/defs.h +++ b/ydb/core/base/services/defs.h @@ -1,11 +1,11 @@ -#pragma once +#pragma once // unique tag to fix pragma once gcc glueing: ./ydb/core/base/services/defs.h #include <library/cpp/actors/core/defs.h> #include <library/cpp/actors/core/actor.h> #include <library/cpp/actors/core/event.h> #include <library/cpp/actors/core/actorid.h> - -namespace NKikimr { - // actorlib is organic part of kikimr so we emulate global import by this directive - using namespace NActors; -} // namespace NKikimr + +namespace NKikimr { + // actorlib is organic part of kikimr so we emulate global import by this directive + using namespace NActors; +} // namespace NKikimr diff --git a/ydb/core/base/services/ya.make b/ydb/core/base/services/ya.make index eccfdeac93..8d043ac349 100644 --- a/ydb/core/base/services/ya.make +++ b/ydb/core/base/services/ya.make @@ -1,18 +1,18 @@ -LIBRARY() - -OWNER( - g:kikimr -) - -SRCS( - blobstorage_service_id.h -) - -PEERDIR( +LIBRARY() + +OWNER( + g:kikimr +) + +SRCS( + blobstorage_service_id.h +) + +PEERDIR( library/cpp/actors/core library/cpp/actors/protos library/cpp/actors/interconnect library/cpp/actors/helpers -) - -END() +) + +END() diff --git a/ydb/core/base/ut/ya.make b/ydb/core/base/ut/ya.make index 058d0836e3..7d6b2f3546 100644 --- a/ydb/core/base/ut/ya.make +++ b/ydb/core/base/ut/ya.make @@ -12,7 +12,7 @@ PEERDIR( ) SRCS( - blobstorage_grouptype_ut.cpp + blobstorage_grouptype_ut.cpp localdb_ut.cpp logoblob_ut.cpp shared_data_ut.cpp diff --git a/ydb/core/base/ya.make b/ydb/core/base/ya.make index 026d6c55af..83db5825c3 100644 --- a/ydb/core/base/ya.make +++ b/ydb/core/base/ya.make @@ -6,7 +6,7 @@ OWNER( g:kikimr ) -IF (KIKIMR_DEFAULT_SHARDED_COMPACTION) +IF (KIKIMR_DEFAULT_SHARDED_COMPACTION) # Makes it easier to test sharded compaction CFLAGS( -DKIKIMR_DEFAULT_SHARDED_COMPACTION=1 @@ -21,7 +21,7 @@ SRCS( board_publish.cpp board_replica.cpp blobstorage.h - blobstorage.cpp + blobstorage.cpp channel_profiles.h counters.cpp counters.h @@ -85,7 +85,7 @@ SRCS( tx_processing.h tx_processing.cpp user_registry.h - blobstorage_grouptype.cpp + blobstorage_grouptype.cpp ) PEERDIR( diff --git a/ydb/core/blobstorage/backpressure/queue_backpressure_client.cpp b/ydb/core/blobstorage/backpressure/queue_backpressure_client.cpp index 4fe64a76eb..200b85f616 100644 --- a/ydb/core/blobstorage/backpressure/queue_backpressure_client.cpp +++ b/ydb/core/blobstorage/backpressure/queue_backpressure_client.cpp @@ -1,9 +1,9 @@ #include "queue.h" -#include "queue_backpressure_client.h" -#include "queue_backpressure_server.h" +#include "queue_backpressure_client.h" +#include "queue_backpressure_server.h" #include "unisched.h" #include "common.h" - + //#define BSQUEUE_EVENT_COUNTERS 1 namespace NKikimr::NBsQueue { diff --git a/ydb/core/blobstorage/backpressure/queue_backpressure_client_ut.cpp b/ydb/core/blobstorage/backpressure/queue_backpressure_client_ut.cpp index 05e0904f7c..af14a13f2d 100644 --- a/ydb/core/blobstorage/backpressure/queue_backpressure_client_ut.cpp +++ b/ydb/core/blobstorage/backpressure/queue_backpressure_client_ut.cpp @@ -161,7 +161,7 @@ public: PDiskGuid = 1; PDiskKey = 1; FormatPDisk(Path, DiskSize, 4096, ChunkSize, PDiskGuid, PDiskKey, PDiskKey, PDiskKey, PDiskKey, "queue_test", - false, false, SectorMap); + false, false, SectorMap); PDiskId = MakeBlobStoragePDiskID(1, 1); ui64 pDiskCategory = 0; @@ -169,7 +169,7 @@ public: pDiskConfig->GetDriveDataSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; pDiskConfig->WriteCacheSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; pDiskConfig->SectorMap = SectorMap; - pDiskConfig->EnableSectorEncryption = !pDiskConfig->SectorMap; + pDiskConfig->EnableSectorEncryption = !pDiskConfig->SectorMap; TActorSetupCmd pDiskSetup(CreatePDisk(pDiskConfig.Get(), PDiskKey, Counters), TMailboxType::Revolving, 0); setup->LocalServices.emplace_back(PDiskId, pDiskSetup); diff --git a/ydb/core/blobstorage/base/blobstorage_events.h b/ydb/core/blobstorage/base/blobstorage_events.h index d97ba67db8..ef170a085d 100644 --- a/ydb/core/blobstorage/base/blobstorage_events.h +++ b/ydb/core/blobstorage/base/blobstorage_events.h @@ -37,7 +37,7 @@ namespace NKikimr { {} TEvControllerRegisterNode(ui32 nodeID, const TVector<ui32>& startedDynamicGroups, - const TVector<ui32>& groupGenerations, const TVector<NPDisk::TDriveData>& drivesData) { + const TVector<ui32>& groupGenerations, const TVector<NPDisk::TDriveData>& drivesData) { Record.SetNodeID(nodeID); for (auto groupId: startedDynamicGroups) { Record.AddGroups(groupId); @@ -45,43 +45,43 @@ namespace NKikimr { for (auto generation : groupGenerations) { Record.AddGroupGenerations(generation); } - for (const auto& data : drivesData) { - data.ToProto(Record.AddDrivesData()); - } + for (const auto& data : drivesData) { + data.ToProto(Record.AddDrivesData()); + } + } + + TString ToString() const override { + TStringStream str; + str << "{TEvRegisterNode Record# " << Record.DebugString(); + str << "}"; + return str.Str(); + } + }; + + struct TEvBlobStorage::TEvControllerUpdateNodeDrives : public TEventPB< + TEvBlobStorage::TEvControllerUpdateNodeDrives, + NKikimrBlobStorage::TEvControllerUpdateNodeDrives, + TEvBlobStorage::EvControllerUpdateNodeDrives> + { + TEvControllerUpdateNodeDrives() + {} + + TEvControllerUpdateNodeDrives(ui32 nodeId, const TVector<NPDisk::TDriveData>& drivesData) { + Record.SetNodeId(nodeId); + for (const auto& data : drivesData) { + data.ToProto(Record.AddDrivesData()); + } } TString ToString() const override { TStringStream str; - str << "{TEvRegisterNode Record# " << Record.DebugString(); + str << "{TEvControllerUpdateNodeDrives Record# " << Record.DebugString(); str << "}"; return str.Str(); } }; - struct TEvBlobStorage::TEvControllerUpdateNodeDrives : public TEventPB< - TEvBlobStorage::TEvControllerUpdateNodeDrives, - NKikimrBlobStorage::TEvControllerUpdateNodeDrives, - TEvBlobStorage::EvControllerUpdateNodeDrives> - { - TEvControllerUpdateNodeDrives() - {} - - TEvControllerUpdateNodeDrives(ui32 nodeId, const TVector<NPDisk::TDriveData>& drivesData) { - Record.SetNodeId(nodeId); - for (const auto& data : drivesData) { - data.ToProto(Record.AddDrivesData()); - } - } - - TString ToString() const override { - TStringStream str; - str << "{TEvControllerUpdateNodeDrives Record# " << Record.DebugString(); - str << "}"; - return str.Str(); - } - }; - - + struct TEvBlobStorage::TEvControllerNodeServiceSetUpdate : public TEventPB< TEvBlobStorage::TEvControllerNodeServiceSetUpdate, NKikimrBlobStorage::TEvControllerNodeServiceSetUpdate, @@ -419,35 +419,35 @@ namespace NKikimr { std::vector<std::unique_ptr<IEventHandle>> Bunch; }; - struct TEvBlobStorage::TEvAskRestartPDisk : TEventLocal<TEvAskRestartPDisk, EvAskRestartPDisk> { - const ui32 PDiskId; - - TEvAskRestartPDisk(const ui32& pdiskId) - : PDiskId(pdiskId) - {} - }; - - struct TEvBlobStorage::TEvRestartPDisk : TEventLocal<TEvRestartPDisk, EvRestartPDisk> { - const ui32 PDiskId; + struct TEvBlobStorage::TEvAskRestartPDisk : TEventLocal<TEvAskRestartPDisk, EvAskRestartPDisk> { + const ui32 PDiskId; + + TEvAskRestartPDisk(const ui32& pdiskId) + : PDiskId(pdiskId) + {} + }; + + struct TEvBlobStorage::TEvRestartPDisk : TEventLocal<TEvRestartPDisk, EvRestartPDisk> { + const ui32 PDiskId; NPDisk::TKey MainKey; - TIntrusivePtr<TPDiskConfig> Config; - - TEvRestartPDisk(const ui32& pdiskId, const NPDisk::TKey& mainKey, const TIntrusivePtr<TPDiskConfig>& config) - : PDiskId(pdiskId) + TIntrusivePtr<TPDiskConfig> Config; + + TEvRestartPDisk(const ui32& pdiskId, const NPDisk::TKey& mainKey, const TIntrusivePtr<TPDiskConfig>& config) + : PDiskId(pdiskId) , MainKey(mainKey) - , Config(config) - {} - }; - - struct TEvBlobStorage::TEvRestartPDiskResult : TEventLocal<TEvRestartPDiskResult, EvRestartPDiskResult> { - const ui32 PDiskId; - NKikimrProto::EReplyStatus Status; - - TEvRestartPDiskResult(const ui32& pdiskId, NKikimrProto::EReplyStatus status = NKikimrProto::EReplyStatus::OK) - : PDiskId(pdiskId) - , Status(status) - {} - }; + , Config(config) + {} + }; + + struct TEvBlobStorage::TEvRestartPDiskResult : TEventLocal<TEvRestartPDiskResult, EvRestartPDiskResult> { + const ui32 PDiskId; + NKikimrProto::EReplyStatus Status; + + TEvRestartPDiskResult(const ui32& pdiskId, NKikimrProto::EReplyStatus status = NKikimrProto::EReplyStatus::OK) + : PDiskId(pdiskId) + , Status(status) + {} + }; struct TEvBlobStorage::TEvControllerScrubQueryStartQuantum : TEventPB<TEvControllerScrubQueryStartQuantum, NKikimrBlobStorage::TEvControllerScrubQueryStartQuantum, EvControllerScrubQueryStartQuantum> { diff --git a/ydb/core/blobstorage/base/bufferwithgaps.h b/ydb/core/blobstorage/base/bufferwithgaps.h index 04a660670c..28fc2f78ed 100644 --- a/ydb/core/blobstorage/base/bufferwithgaps.h +++ b/ydb/core/blobstorage/base/bufferwithgaps.h @@ -2,7 +2,7 @@ #include "defs.h" #include <util/generic/string.h> -#include <util/generic/set.h> +#include <util/generic/set.h> #include <util/generic/map.h> #include <util/generic/vector.h> #include <util/generic/algorithm.h> @@ -20,49 +20,49 @@ namespace NKikimr { class TBufferWithGaps { TString Data; - // <begin, size> - TMap<ui32, ui32> Gaps; + // <begin, size> + TMap<ui32, ui32> Gaps; ui32 Offset; // Data's offset in Gaps space - bool IsCommited; + bool IsCommited; public: TBufferWithGaps() : Offset(0) - , IsCommited(false) + , IsCommited(false) {} TBufferWithGaps(ui32 offset) : Offset(offset) - , IsCommited(false) + , IsCommited(false) + {} + + TBufferWithGaps(ui32 offset, ui32 size) + : Data(TString::Uninitialized(size)) + , Offset(offset) + , IsCommited(false) {} - TBufferWithGaps(ui32 offset, ui32 size) - : Data(TString::Uninitialized(size)) - , Offset(offset) - , IsCommited(false) - {} - TBufferWithGaps(TBufferWithGaps &&) = default; TBufferWithGaps &operator=(TBufferWithGaps &&) = default; void AddGap(ui32 begin, ui32 end) { - // ensure gaps never overlap - ui32 size = end - begin; - auto f = Gaps.upper_bound(begin); - if (!Gaps.empty() && f != Gaps.begin()) { - auto prev = std::prev(f); - if (prev->first + prev->second == begin) { - prev->second += size; - return; - } + // ensure gaps never overlap + ui32 size = end - begin; + auto f = Gaps.upper_bound(begin); + if (!Gaps.empty() && f != Gaps.begin()) { + auto prev = std::prev(f); + if (prev->first + prev->second == begin) { + prev->second += size; + return; + } } - // add new gap - Gaps.emplace(begin, size); + // add new gap + Gaps.emplace(begin, size); } void SetData(TString&& data) { Data = std::move(data); - IsCommited = true; + IsCommited = true; } TString ToString() const { @@ -83,48 +83,48 @@ namespace NKikimr { return reinterpret_cast<T *>(Data.data() + offset); } - ui8 *RawDataPtr(ui32 offset, ui32 len) { + ui8 *RawDataPtr(ui32 offset, ui32 len) { Y_VERIFY(offset + len <= Data.size(), "Buffer has size# %zu less then requested offset# %" PRIu32 " len# %" PRIu32, Data.size(), offset, len); - IsCommited = false; - return reinterpret_cast<ui8 *>(Data.Detach() + offset); - } - - void Commit() { - IsCommited = true; - } - + IsCommited = false; + return reinterpret_cast<ui8 *>(Data.Detach() + offset); + } + + void Commit() { + IsCommited = true; + } + bool IsReadable() const { - Y_VERIFY(IsCommited, "returned data was not commited"); + Y_VERIFY(IsCommited, "returned data was not commited"); return Gaps.empty(); } bool IsReadable(ui32 offset, ui32 len) const { - Y_VERIFY(IsCommited, "returned data was not commited"); + Y_VERIFY(IsCommited, "returned data was not commited"); if (offset + len > Data.size()) { return false; } const ui32 begin = Offset + offset; const ui32 end = begin + len; - auto f = Gaps.upper_bound(begin); - if (Gaps.empty()) { - return true; - } else if (f == Gaps.begin()) { - // intersection occurs only when 'end > f->Begin' - // [begin ) end - // [ f->Begin ) f->End - return end <= f->first; - } else { - // There are two possible intersections can occur: - // 1. The gap before (there are always such one) f can have prev->end > begin - // 2. f may be either Gaps.end() or the last element. If it is the last element, check that f->begin >= end - // [ prev->begin ) prev->end - // [begin ) end - // [ f->Begin ) f->End - auto prev = std::prev(f); - return prev->first + prev->second <= begin && (f == Gaps.end() || end <= f->first); - } + auto f = Gaps.upper_bound(begin); + if (Gaps.empty()) { + return true; + } else if (f == Gaps.begin()) { + // intersection occurs only when 'end > f->Begin' + // [begin ) end + // [ f->Begin ) f->End + return end <= f->first; + } else { + // There are two possible intersections can occur: + // 1. The gap before (there are always such one) f can have prev->end > begin + // 2. f may be either Gaps.end() or the last element. If it is the last element, check that f->begin >= end + // [ prev->begin ) prev->end + // [begin ) end + // [ f->Begin ) f->End + auto prev = std::prev(f); + return prev->first + prev->second <= begin && (f == Gaps.end() || end <= f->first); + } } ui32 Size() const { @@ -135,20 +135,20 @@ namespace NKikimr { Data.swap(other.Data); Gaps.swap(other.Gaps); DoSwap(Offset, other.Offset); - DoSwap(IsCommited, other.IsCommited); + DoSwap(IsCommited, other.IsCommited); } void Clear() { Data.clear(); Gaps.clear(); - Offset = 0; - IsCommited = false; + Offset = 0; + IsCommited = false; + } + + bool IsDetached() const { + return Data.IsDetached(); } - bool IsDetached() const { - return Data.IsDetached(); - } - bool Empty() const { return Data.empty(); } @@ -156,14 +156,14 @@ namespace NKikimr { void Sanitize() const { if (Data.size()) { ui64 a = 0; - for (const auto &gap : Gaps) { - ui64 b = gap.first - Offset; + for (const auto &gap : Gaps) { + ui64 b = gap.first - Offset; if (a < b) { - ui64 size = gap.second; + ui64 size = gap.second; Y_UNUSED(size); REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(DataPtr<const char>(a, size), size); } - a = b + gap.second; + a = b + gap.second; } ui64 b = Data.size(); if (a < b) { diff --git a/ydb/core/blobstorage/base/common_latency_hist_bounds.h b/ydb/core/blobstorage/base/common_latency_hist_bounds.h index 55caad841c..1f44e11919 100644 --- a/ydb/core/blobstorage/base/common_latency_hist_bounds.h +++ b/ydb/core/blobstorage/base/common_latency_hist_bounds.h @@ -1,43 +1,43 @@ -#pragma once - -#include "defs.h" - +#pragma once + +#include "defs.h" + #include <ydb/core/base/blobstorage.h> #include <library/cpp/monlib/metrics/histogram_snapshot.h> - -namespace NKikimr { - -static inline NMonitoring::TBucketBounds GetCommonLatencyHistBounds(TPDiskCategory::EDeviceType type) { - NMonitoring::TBucketBounds bounds = { - 8, 16, 32, 64, 128, 256, 512, // ms - 1'024, 4'096, // s - 65'536 // minutes - }; - switch (type) { - case TPDiskCategory::DEVICE_TYPE_UNKNOWN: - // Use default - break; - case TPDiskCategory::DEVICE_TYPE_ROT: - // Use default - break; - case TPDiskCategory::DEVICE_TYPE_SSD: - bounds = { - 0.5, // us - 1, 2, 8, 32, 128, 512, // ms - 1'024, 4'096, // s - 65'536 // minutes - }; - break; - case TPDiskCategory::DEVICE_TYPE_NVME: - bounds = { - 0.25, 0.5, // us - 1, 2, 4, 8, 32, 128, // ms - 1'024, // s - 65'536 // minutes - }; - break; - } - return bounds; -} - -} // NKikimr + +namespace NKikimr { + +static inline NMonitoring::TBucketBounds GetCommonLatencyHistBounds(TPDiskCategory::EDeviceType type) { + NMonitoring::TBucketBounds bounds = { + 8, 16, 32, 64, 128, 256, 512, // ms + 1'024, 4'096, // s + 65'536 // minutes + }; + switch (type) { + case TPDiskCategory::DEVICE_TYPE_UNKNOWN: + // Use default + break; + case TPDiskCategory::DEVICE_TYPE_ROT: + // Use default + break; + case TPDiskCategory::DEVICE_TYPE_SSD: + bounds = { + 0.5, // us + 1, 2, 8, 32, 128, 512, // ms + 1'024, 4'096, // s + 65'536 // minutes + }; + break; + case TPDiskCategory::DEVICE_TYPE_NVME: + bounds = { + 0.25, 0.5, // us + 1, 2, 4, 8, 32, 128, // ms + 1'024, // s + 65'536 // minutes + }; + break; + } + return bounds; +} + +} // NKikimr diff --git a/ydb/core/blobstorage/crypto/chacha_vec.cpp b/ydb/core/blobstorage/crypto/chacha_vec.cpp index 44bfaa630e..870c63e2e4 100644 --- a/ydb/core/blobstorage/crypto/chacha_vec.cpp +++ b/ydb/core/blobstorage/crypto/chacha_vec.cpp @@ -7,7 +7,7 @@ Public domain. #include "chacha_vec.h" #include "secured_block.h" -#include <util/system/align.h> +#include <util/system/align.h> #include <util/system/yassert.h> @@ -50,10 +50,10 @@ Public domain. # define ROTV3(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(2,1,0,3)) # define ROTW7(x) (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128i)x,25)) # define ROTW12(x) (vec)(_mm_slli_epi32((__m128i)x,12) ^ _mm_srli_epi32((__m128i)x,20)) -# define SHIFT_LEFT(x) (vec)_mm_bslli_si128((__m128i)(x), 8) -# define SHIFT_RIGHT(x) (vec)_mm_bsrli_si128((__m128i)(x), 8) -# define SET(x0, x1) (vec)_mm_set_epi64x((x0), (x1)) - +# define SHIFT_LEFT(x) (vec)_mm_bslli_si128((__m128i)(x), 8) +# define SHIFT_RIGHT(x) (vec)_mm_bsrli_si128((__m128i)(x), 8) +# define SET(x0, x1) (vec)_mm_set_epi64x((x0), (x1)) + # if __SSSE3__ # include <tmmintrin.h> # define ROTW8(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(14,13,12,15,10,9,8,11,6,5,4,7,2,1,0,3)) @@ -63,7 +63,7 @@ Public domain. # define ROTW16(x) (vec)(_mm_slli_epi32((__m128i)x,16) ^ _mm_srli_epi32((__m128i)x,16)) # endif #else -# error -- Implementation supports only machines with neon or SSE2 +# error -- Implementation supports only machines with neon or SSE2 #endif #ifndef REVV_BE @@ -93,62 +93,62 @@ Public domain. c = c+d; b ^= c; b = b<< 7 | b>>25; #define WRITE_XOR(in, op, d, v0, v1, v2, v3) \ - *(vec *)(op + d + 0) = *(vec *)(in + d + 0) ^ v0; \ - *(vec *)(op + d + 4) = *(vec *)(in + d + 4) ^ v1; \ - *(vec *)(op + d + 8) = *(vec *)(in + d + 8) ^ v2; \ - *(vec *)(op + d + 12) = *(vec *)(in + d + 12) ^ v3; - -template<bool Aligned> -Y_FORCE_INLINE void WriteXor(ui32 *op, ui32 *ip, - vec v0, vec v1, vec v2, vec v3, - const vec i_v[4]) { - if constexpr (Aligned) { - *(vec *)(op + 0) = *(vec *)(ip + 0) ^ v0; - *(vec *)(op + 4) = *(vec *)(ip + 4) ^ v1; - *(vec *)(op + 8) = *(vec *)(ip + 8) ^ v2; - *(vec *)(op + 12) = *(vec *)(ip + 12) ^ v3; - } else { - *(vec *)(op + 0) = i_v[0] ^ v0; - *(vec *)(op + 4) = i_v[1] ^ v1; - *(vec *)(op + 8) = i_v[2] ^ v2; - *(vec *)(op + 12) = i_v[3] ^ v3; - } -} - -template<bool Aligned, bool IsFirst, bool IsLast> -Y_FORCE_INLINE void ReadW(ui32 *ip, vec i_v[4], vec& next_i_v) { - if constexpr (Aligned) { - return; - } - - vec tmp; - - if constexpr (IsFirst) { - next_i_v = SET(0, *(ui64*)(ip - 2)); - } - - tmp = *(vec*)(ip + 0); - i_v[1] = SHIFT_RIGHT(tmp); - i_v[0] = next_i_v | SHIFT_LEFT(tmp); - - tmp = *(vec*)(ip + 4); - i_v[2] = SHIFT_RIGHT(tmp); - i_v[1] = i_v[1] | SHIFT_LEFT(tmp); - - tmp = *(vec*)(ip + 8); - i_v[3] = SHIFT_RIGHT(tmp); - i_v[2] = i_v[2] | SHIFT_LEFT(tmp); - - if constexpr (IsLast) { - i_v[3] = i_v[3] | SET(*(ui64*)(ip + 12), 0); - } else { - tmp = *(vec*)(ip + 12); - next_i_v = SHIFT_RIGHT(tmp); - i_v[3] = i_v[3] | SHIFT_LEFT(tmp); - } -} - - + *(vec *)(op + d + 0) = *(vec *)(in + d + 0) ^ v0; \ + *(vec *)(op + d + 4) = *(vec *)(in + d + 4) ^ v1; \ + *(vec *)(op + d + 8) = *(vec *)(in + d + 8) ^ v2; \ + *(vec *)(op + d + 12) = *(vec *)(in + d + 12) ^ v3; + +template<bool Aligned> +Y_FORCE_INLINE void WriteXor(ui32 *op, ui32 *ip, + vec v0, vec v1, vec v2, vec v3, + const vec i_v[4]) { + if constexpr (Aligned) { + *(vec *)(op + 0) = *(vec *)(ip + 0) ^ v0; + *(vec *)(op + 4) = *(vec *)(ip + 4) ^ v1; + *(vec *)(op + 8) = *(vec *)(ip + 8) ^ v2; + *(vec *)(op + 12) = *(vec *)(ip + 12) ^ v3; + } else { + *(vec *)(op + 0) = i_v[0] ^ v0; + *(vec *)(op + 4) = i_v[1] ^ v1; + *(vec *)(op + 8) = i_v[2] ^ v2; + *(vec *)(op + 12) = i_v[3] ^ v3; + } +} + +template<bool Aligned, bool IsFirst, bool IsLast> +Y_FORCE_INLINE void ReadW(ui32 *ip, vec i_v[4], vec& next_i_v) { + if constexpr (Aligned) { + return; + } + + vec tmp; + + if constexpr (IsFirst) { + next_i_v = SET(0, *(ui64*)(ip - 2)); + } + + tmp = *(vec*)(ip + 0); + i_v[1] = SHIFT_RIGHT(tmp); + i_v[0] = next_i_v | SHIFT_LEFT(tmp); + + tmp = *(vec*)(ip + 4); + i_v[2] = SHIFT_RIGHT(tmp); + i_v[1] = i_v[1] | SHIFT_LEFT(tmp); + + tmp = *(vec*)(ip + 8); + i_v[3] = SHIFT_RIGHT(tmp); + i_v[2] = i_v[2] | SHIFT_LEFT(tmp); + + if constexpr (IsLast) { + i_v[3] = i_v[3] | SET(*(ui64*)(ip + 12), 0); + } else { + tmp = *(vec*)(ip + 12); + next_i_v = SHIFT_RIGHT(tmp); + i_v[3] = i_v[3] | SHIFT_LEFT(tmp); + } +} + + constexpr size_t ChaChaVec::KEY_SIZE; constexpr size_t ChaChaVec::BLOCK_SIZE; @@ -168,8 +168,8 @@ void ChaChaVec::SetKey(const ui8* key, size_t size) kp = (ui32*)aligned_key; #else alignas(16) ui32 k[4]; - ((vec *)k)[0] = ((vec *)aligned_key)[0]; - ((vec *)k)[1] = ((vec *)aligned_key)[1]; + ((vec *)k)[0] = ((vec *)aligned_key)[0]; + ((vec *)k)[1] = ((vec *)aligned_key)[1]; kp = (ui32*)k; #endif s0_ = *(vec *)chacha_const; @@ -199,186 +199,186 @@ void ChaChaVec::SetIV(const ui8* iv, const ui8* blockIdx) s3_ = NONCE(np, bp); } - + void ChaChaVec::SetIV(const ui8* iv) { const ui8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0}; SetIV(iv, zero); } -template<bool Aligned> -void ChaChaVec::EncipherImpl(const ui8* plaintext, ui8* ciphertext, size_t len) +template<bool Aligned> +void ChaChaVec::EncipherImpl(const ui8* plaintext, ui8* ciphertext, size_t len) +{ + size_t iters, i; + ui32* ip = (ui32*)AlignUp<intptr_t>(intptr_t(plaintext), 16); + ui32 *op = (ui32*)ciphertext; + + const ui32 unalignment = intptr_t(plaintext) % 16; + if constexpr (!Aligned) { + Y_VERIFY(unalignment == 8, "Unalignment# %d", (int)unalignment); + } + + Y_VERIFY(intptr_t(ip) % 16 == 0); + Y_VERIFY(intptr_t(op) % 16 == 0); + + // Unused if Aligned + vec i_v[4]; + vec next_i_v; + + for (iters = 0; iters < len/(CHACHA_BPI * BLOCK_SIZE); iters++) { + vec v0,v1,v2,v3,v4,v5,v6,v7; + v4 = v0 = s0_; v5 = v1 = s1_; v6 = v2 = s2_; v3 = s3_; + v7 = v3 + ONE; + #if CHACHA_VBPI > 2 + vec v8,v9,v10,v11; + v8 = v4; v9 = v5; v10 = v6; + v11 = v7 + ONE; + #endif + #if CHACHA_VBPI > 3 + vec v12,v13,v14,v15; + v12 = v8; v13 = v9; v14 = v10; + v15 = v11 + ONE; + #endif + #if CHACHA_GPR_TOO + ui32* kp = (ui32*)&s1_; + ui32* np = (ui32*)&s3_ + 2; + + ui32 x0, x1, x2, x3, x4, x5, x6, x7, x8, + x9, x10, x11, x12, x13, x14, x15; + x0 = chacha_const[0]; x1 = chacha_const[1]; + x2 = chacha_const[2]; x3 = chacha_const[3]; + x4 = kp[0]; x5 = kp[1]; x6 = kp[2]; x7 = kp[3]; + x8 = kp[4]; x9 = kp[5]; x10 = kp[6]; x11 = kp[7]; + x12 = CHACHA_BPI*iters+(CHACHA_BPI-1); x13 = 0; x14 = np[0]; x15 = np[1]; + #endif + for (i = rounds_/2; i; i--) { + DQROUND_VECTORS(v0,v1,v2,v3) + DQROUND_VECTORS(v4,v5,v6,v7) + #if CHACHA_VBPI > 2 + DQROUND_VECTORS(v8,v9,v10,v11) + #endif + #if CHACHA_VBPI > 3 + DQROUND_VECTORS(v12,v13,v14,v15) + #endif + #if CHACHA_GPR_TOO + QROUND_WORDS(x0, x4, x8, x12) + QROUND_WORDS(x1, x5, x9, x13) + QROUND_WORDS(x2, x6, x10, x14) + QROUND_WORDS(x3, x7, x11, x15) + QROUND_WORDS(x0, x5, x10, x15) + QROUND_WORDS(x1, x6, x11, x12) + QROUND_WORDS(x2, x7, x8, x13) + QROUND_WORDS(x3, x4, x9, x14) + #endif + } + ReadW<Aligned, true, false>(ip + 0, i_v, next_i_v); + WriteXor<Aligned>(op + 0, ip + 0, v0+s0_, v1+s1_, v2+s2_, v3+s3_, i_v); + s3_ += ONE; + ReadW<Aligned, false, CHACHA_VBPI == 2>(ip + 16, i_v, next_i_v); + WriteXor<Aligned>(op + 16, ip + 16, v4+s0_, v5+s1_, v6+s2_, v7+s3_, i_v); + s3_ += ONE; + #if CHACHA_VBPI > 2 + ReadW<Aligned, false, CHACHA_VBPI == 3>(ip + 32, i_v, next_i_v); + WriteXor<Aligned>(op + 32, ip + 32, v8+s0_, v9+s1_, v10+s2_, v11+s3_, i_v); + s3_ += ONE; + #endif + #if CHACHA_VBPI > 3 + ReadW<Aligned, false, CHACHA_VBPI == 4>(ip + 48, i_v, next_i_v); + WriteXor<Aligned>(op + 48, ip + 48, v12+s0_, v13+s1_, v14+s2_, v15+s3_, i_v); + s3_ += ONE; + #endif + ip += CHACHA_VBPI*16; + Y_ASSERT(intptr_t(ip) % 16 == 0); + op += CHACHA_VBPI*16; + Y_ASSERT(intptr_t(op) % 16 == 0); + #if CHACHA_GPR_TOO + op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0])); + op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1])); + op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2])); + op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3])); + op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0])); + op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1])); + op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2])); + op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3])); + op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4])); + op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5])); + op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6])); + op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7])); + op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + CHACHA_BPI*iters+(CHACHA_BPI-1))); + op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13)); + op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[0])); + op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[1])); + s3_ += ONE; + ip += 16; + op += 16; + #endif + } + + for (iters = len % (CHACHA_BPI*BLOCK_SIZE)/BLOCK_SIZE; iters != 0; iters--) { + vec v0 = s0_, v1 = s1_, v2 = s2_, v3 = s3_; + for (i = rounds_/2; i; i--) { + DQROUND_VECTORS(v0,v1,v2,v3) + } + ReadW<Aligned, true, true>(ip, i_v, next_i_v); + WriteXor<Aligned>(op, ip, v0+s0_, v1+s1_, v2+s2_, v3+s3_, i_v); + s3_ += ONE; + ip += 16; + op += 16; + } + + len = len % BLOCK_SIZE; + if (len) { + if (!Aligned) { + // Unaligned version can work only on full blocks + alignas(16) ui8 buf[BLOCK_SIZE]; + memcpy(buf, ip - 2, len); + EncipherImpl<true>(buf, buf, len); + memcpy(op, buf, len); + SecureWipeBuffer(buf, BLOCK_SIZE); + } else { + alignas(16) char buf[16]; + vec tail; + vec v0, v1, v2, v3; + v0 = s0_; v1 = s1_; v2 = s2_; v3 = s3_; + for (i = rounds_/2; i; i--) { + DQROUND_VECTORS(v0,v1,v2,v3) + } + if (len >= 32) { + *(vec *)(op + 0) = *(vec *)(ip + 0) ^ (v0 + s0_); + *(vec *)(op + 4) = *(vec *)(ip + 4) ^ (v1 + s1_); + if (len >= 48) { + *(vec *)(op + 8) = *(vec *)(ip + 8) ^ (v2 + s2_); + tail = v3 + s3_; + op += 12; + ip += 12; + len -= 48; + } else { + tail = v2 + s2_; + op += 8; + ip += 8; + len -= 32; + } + } else if (len >= 16) { + *(vec *)(op + 0) = *(vec *)(ip + 0) ^ (v0 + s0_); + tail = v1 + s1_; + op += 4; + ip += 4; + len -= 16; + } else { + tail = v0 + s0_; + } + memcpy(buf, ip, len); + void *bp = buf; + *(vec *)bp = tail ^ *(vec *)bp; + memcpy(op, buf, len); + SecureWipeBuffer(buf, 16); + } + } +} + +// Old version, used only for compatibility tests +void ChaChaVec::EncipherOld(const ui8* plaintext, ui8* ciphertext, size_t len) { size_t iters, i; - ui32* ip = (ui32*)AlignUp<intptr_t>(intptr_t(plaintext), 16); - ui32 *op = (ui32*)ciphertext; - - const ui32 unalignment = intptr_t(plaintext) % 16; - if constexpr (!Aligned) { - Y_VERIFY(unalignment == 8, "Unalignment# %d", (int)unalignment); - } - - Y_VERIFY(intptr_t(ip) % 16 == 0); - Y_VERIFY(intptr_t(op) % 16 == 0); - - // Unused if Aligned - vec i_v[4]; - vec next_i_v; - - for (iters = 0; iters < len/(CHACHA_BPI * BLOCK_SIZE); iters++) { - vec v0,v1,v2,v3,v4,v5,v6,v7; - v4 = v0 = s0_; v5 = v1 = s1_; v6 = v2 = s2_; v3 = s3_; - v7 = v3 + ONE; - #if CHACHA_VBPI > 2 - vec v8,v9,v10,v11; - v8 = v4; v9 = v5; v10 = v6; - v11 = v7 + ONE; - #endif - #if CHACHA_VBPI > 3 - vec v12,v13,v14,v15; - v12 = v8; v13 = v9; v14 = v10; - v15 = v11 + ONE; - #endif - #if CHACHA_GPR_TOO - ui32* kp = (ui32*)&s1_; - ui32* np = (ui32*)&s3_ + 2; - - ui32 x0, x1, x2, x3, x4, x5, x6, x7, x8, - x9, x10, x11, x12, x13, x14, x15; - x0 = chacha_const[0]; x1 = chacha_const[1]; - x2 = chacha_const[2]; x3 = chacha_const[3]; - x4 = kp[0]; x5 = kp[1]; x6 = kp[2]; x7 = kp[3]; - x8 = kp[4]; x9 = kp[5]; x10 = kp[6]; x11 = kp[7]; - x12 = CHACHA_BPI*iters+(CHACHA_BPI-1); x13 = 0; x14 = np[0]; x15 = np[1]; - #endif - for (i = rounds_/2; i; i--) { - DQROUND_VECTORS(v0,v1,v2,v3) - DQROUND_VECTORS(v4,v5,v6,v7) - #if CHACHA_VBPI > 2 - DQROUND_VECTORS(v8,v9,v10,v11) - #endif - #if CHACHA_VBPI > 3 - DQROUND_VECTORS(v12,v13,v14,v15) - #endif - #if CHACHA_GPR_TOO - QROUND_WORDS(x0, x4, x8, x12) - QROUND_WORDS(x1, x5, x9, x13) - QROUND_WORDS(x2, x6, x10, x14) - QROUND_WORDS(x3, x7, x11, x15) - QROUND_WORDS(x0, x5, x10, x15) - QROUND_WORDS(x1, x6, x11, x12) - QROUND_WORDS(x2, x7, x8, x13) - QROUND_WORDS(x3, x4, x9, x14) - #endif - } - ReadW<Aligned, true, false>(ip + 0, i_v, next_i_v); - WriteXor<Aligned>(op + 0, ip + 0, v0+s0_, v1+s1_, v2+s2_, v3+s3_, i_v); - s3_ += ONE; - ReadW<Aligned, false, CHACHA_VBPI == 2>(ip + 16, i_v, next_i_v); - WriteXor<Aligned>(op + 16, ip + 16, v4+s0_, v5+s1_, v6+s2_, v7+s3_, i_v); - s3_ += ONE; - #if CHACHA_VBPI > 2 - ReadW<Aligned, false, CHACHA_VBPI == 3>(ip + 32, i_v, next_i_v); - WriteXor<Aligned>(op + 32, ip + 32, v8+s0_, v9+s1_, v10+s2_, v11+s3_, i_v); - s3_ += ONE; - #endif - #if CHACHA_VBPI > 3 - ReadW<Aligned, false, CHACHA_VBPI == 4>(ip + 48, i_v, next_i_v); - WriteXor<Aligned>(op + 48, ip + 48, v12+s0_, v13+s1_, v14+s2_, v15+s3_, i_v); - s3_ += ONE; - #endif - ip += CHACHA_VBPI*16; - Y_ASSERT(intptr_t(ip) % 16 == 0); - op += CHACHA_VBPI*16; - Y_ASSERT(intptr_t(op) % 16 == 0); - #if CHACHA_GPR_TOO - op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0])); - op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1])); - op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2])); - op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3])); - op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0])); - op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1])); - op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2])); - op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3])); - op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4])); - op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5])); - op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6])); - op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7])); - op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + CHACHA_BPI*iters+(CHACHA_BPI-1))); - op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13)); - op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[0])); - op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[1])); - s3_ += ONE; - ip += 16; - op += 16; - #endif - } - - for (iters = len % (CHACHA_BPI*BLOCK_SIZE)/BLOCK_SIZE; iters != 0; iters--) { - vec v0 = s0_, v1 = s1_, v2 = s2_, v3 = s3_; - for (i = rounds_/2; i; i--) { - DQROUND_VECTORS(v0,v1,v2,v3) - } - ReadW<Aligned, true, true>(ip, i_v, next_i_v); - WriteXor<Aligned>(op, ip, v0+s0_, v1+s1_, v2+s2_, v3+s3_, i_v); - s3_ += ONE; - ip += 16; - op += 16; - } - - len = len % BLOCK_SIZE; - if (len) { - if (!Aligned) { - // Unaligned version can work only on full blocks - alignas(16) ui8 buf[BLOCK_SIZE]; - memcpy(buf, ip - 2, len); - EncipherImpl<true>(buf, buf, len); - memcpy(op, buf, len); - SecureWipeBuffer(buf, BLOCK_SIZE); - } else { - alignas(16) char buf[16]; - vec tail; - vec v0, v1, v2, v3; - v0 = s0_; v1 = s1_; v2 = s2_; v3 = s3_; - for (i = rounds_/2; i; i--) { - DQROUND_VECTORS(v0,v1,v2,v3) - } - if (len >= 32) { - *(vec *)(op + 0) = *(vec *)(ip + 0) ^ (v0 + s0_); - *(vec *)(op + 4) = *(vec *)(ip + 4) ^ (v1 + s1_); - if (len >= 48) { - *(vec *)(op + 8) = *(vec *)(ip + 8) ^ (v2 + s2_); - tail = v3 + s3_; - op += 12; - ip += 12; - len -= 48; - } else { - tail = v2 + s2_; - op += 8; - ip += 8; - len -= 32; - } - } else if (len >= 16) { - *(vec *)(op + 0) = *(vec *)(ip + 0) ^ (v0 + s0_); - tail = v1 + s1_; - op += 4; - ip += 4; - len -= 16; - } else { - tail = v0 + s0_; - } - memcpy(buf, ip, len); - void *bp = buf; - *(vec *)bp = tail ^ *(vec *)bp; - memcpy(op, buf, len); - SecureWipeBuffer(buf, 16); - } - } -} - -// Old version, used only for compatibility tests -void ChaChaVec::EncipherOld(const ui8* plaintext, ui8* ciphertext, size_t len) -{ - size_t iters, i; ui32 *op=(ui32*)ciphertext, *ip=(ui32*)plaintext; for (iters = 0; iters < len/(CHACHA_BPI * BLOCK_SIZE); iters++) { @@ -508,22 +508,22 @@ void ChaChaVec::EncipherOld(const ui8* plaintext, ui8* ciphertext, size_t len) void *bp = buf; *(vec *)bp = tail ^ *(vec *)bp; memcpy(op, buf, len); - SecureWipeBuffer(buf, 16); + SecureWipeBuffer(buf, 16); + } +} + +void ChaChaVec::Encipher(const ui8* plaintext, ui8* ciphertext, size_t len) +{ + const ui32 input_unalignment = intptr_t(plaintext) % 16; + if (input_unalignment == 0) { + EncipherImpl<true>(plaintext, ciphertext, len); + } else if (input_unalignment == 8) { + EncipherImpl<false>(plaintext, ciphertext, len); + } else { + Y_FAIL("ChaChaVec can work only with input aligned on 8, 16 or more bytes"); } } -void ChaChaVec::Encipher(const ui8* plaintext, ui8* ciphertext, size_t len) -{ - const ui32 input_unalignment = intptr_t(plaintext) % 16; - if (input_unalignment == 0) { - EncipherImpl<true>(plaintext, ciphertext, len); - } else if (input_unalignment == 8) { - EncipherImpl<false>(plaintext, ciphertext, len); - } else { - Y_FAIL("ChaChaVec can work only with input aligned on 8, 16 or more bytes"); - } -} - void ChaChaVec::Decipher(const ui8* ciphertext, ui8* plaintext, size_t len) { Encipher(ciphertext, plaintext, len); diff --git a/ydb/core/blobstorage/crypto/chacha_vec.h b/ydb/core/blobstorage/crypto/chacha_vec.h index 3ebde7d025..7bcaf9dcdf 100644 --- a/ydb/core/blobstorage/crypto/chacha_vec.h +++ b/ydb/core/blobstorage/crypto/chacha_vec.h @@ -41,15 +41,15 @@ public: void SetIV(const ui8* iv, const ui8* blockIdx); void SetIV(const ui8* iv); void Encipher(const ui8* plaintext, ui8* ciphertext, size_t size); - // Only for tests - void EncipherOld(const ui8* plaintext, ui8* ciphertext, size_t size); + // Only for tests + void EncipherOld(const ui8* plaintext, ui8* ciphertext, size_t size); void Decipher(const ui8* ciphertext, ui8* plaintext, size_t size); ~ChaChaVec(); private: - template<bool Aligned> - void EncipherImpl(const ui8* plaintext, ui8* ciphertext, size_t len); - + template<bool Aligned> + void EncipherImpl(const ui8* plaintext, ui8* ciphertext, size_t len); + vec s0_, s1_, s2_, s3_; ui8 rounds_; }; diff --git a/ydb/core/blobstorage/crypto/chacha_vec_ut.cpp b/ydb/core/blobstorage/crypto/chacha_vec_ut.cpp index d5efd0121e..823348e832 100644 --- a/ydb/core/blobstorage/crypto/chacha_vec_ut.cpp +++ b/ydb/core/blobstorage/crypto/chacha_vec_ut.cpp @@ -1,5 +1,5 @@ -#include <util/random/fast.h> - +#include <util/random/fast.h> + #include "chacha_vec.h" #include "secured_block.h" #include <ydb/core/blobstorage/crypto/ut/ut_helpers.h> @@ -137,43 +137,43 @@ Y_UNIT_TEST_SUITE(TChaChaVec) UNIT_ASSERT_EQUAL(((ui8*)buf)[i], plaintext.data()[i]); } } - - Y_UNIT_TEST(CompatibilityTest) { - ui64 offset = 0; - - ChaChaVec cipher1; - cipher1.SetIV(tc8_iv, (ui8*)&offset); - cipher1.SetKey(tc8_key, KEY_SIZE); - - ChaChaVec cipher2; - cipher2.SetIV(tc8_iv, (ui8*)&offset); - cipher2.SetKey(tc8_key, KEY_SIZE); - - TReallyFastRng32 rng(5124); - UNIT_ASSERT_EQUAL(ChaChaVec::BLOCK_SIZE, 64); - for (size_t size = 51; size < 67953; size += 113) { - TAlignedBuf bufOrig(size, 16); - TAlignedBuf bufNew(size, 16); - - for (ui32 i = 0; i < size; ++i) { - bufNew.Data()[i] = bufOrig.Data()[i] = (rng.GenRand() % 256); - } - cipher1.EncipherOld(bufOrig.Data(), bufOrig.Data(), size); - cipher2.Encipher(bufNew.Data(), bufNew.Data(), size); - UNIT_ASSERT_ARRAYS_EQUAL(bufOrig.Data(), bufNew.Data(), size); - } - - for (size_t size = 51; size < 67953; size += 113) { - TAlignedBuf bufOrig(size, 16); - TAlignedBuf bufInNew(size, 8); - TAlignedBuf bufOutNew(size, 16); - - for (ui32 i = 0; i < size; ++i) { - (bufInNew.Data() + 8)[i] = bufOrig.Data()[i] = (rng.GenRand() % 256); - } - cipher1.EncipherOld(bufOrig.Data(), bufOrig.Data(), size); - cipher2.Encipher(bufInNew.Data() + 8, bufOutNew.Data(), size); - UNIT_ASSERT_ARRAYS_EQUAL(bufOrig.Data(), bufOutNew.Data(), size); - } - } + + Y_UNIT_TEST(CompatibilityTest) { + ui64 offset = 0; + + ChaChaVec cipher1; + cipher1.SetIV(tc8_iv, (ui8*)&offset); + cipher1.SetKey(tc8_key, KEY_SIZE); + + ChaChaVec cipher2; + cipher2.SetIV(tc8_iv, (ui8*)&offset); + cipher2.SetKey(tc8_key, KEY_SIZE); + + TReallyFastRng32 rng(5124); + UNIT_ASSERT_EQUAL(ChaChaVec::BLOCK_SIZE, 64); + for (size_t size = 51; size < 67953; size += 113) { + TAlignedBuf bufOrig(size, 16); + TAlignedBuf bufNew(size, 16); + + for (ui32 i = 0; i < size; ++i) { + bufNew.Data()[i] = bufOrig.Data()[i] = (rng.GenRand() % 256); + } + cipher1.EncipherOld(bufOrig.Data(), bufOrig.Data(), size); + cipher2.Encipher(bufNew.Data(), bufNew.Data(), size); + UNIT_ASSERT_ARRAYS_EQUAL(bufOrig.Data(), bufNew.Data(), size); + } + + for (size_t size = 51; size < 67953; size += 113) { + TAlignedBuf bufOrig(size, 16); + TAlignedBuf bufInNew(size, 8); + TAlignedBuf bufOutNew(size, 16); + + for (ui32 i = 0; i < size; ++i) { + (bufInNew.Data() + 8)[i] = bufOrig.Data()[i] = (rng.GenRand() % 256); + } + cipher1.EncipherOld(bufOrig.Data(), bufOrig.Data(), size); + cipher2.Encipher(bufInNew.Data() + 8, bufOutNew.Data(), size); + UNIT_ASSERT_ARRAYS_EQUAL(bufOrig.Data(), bufOutNew.Data(), size); + } + } } diff --git a/ydb/core/blobstorage/crypto/crypto.cpp b/ydb/core/blobstorage/crypto/crypto.cpp index de580e5bd6..0d91e89164 100644 --- a/ydb/core/blobstorage/crypto/crypto.cpp +++ b/ydb/core/blobstorage/crypto/crypto.cpp @@ -1,13 +1,13 @@ #include "crypto.h" #include "secured_block.h" -// ifunc is broken at least under msan and tsan, so disable it -// There is no need to use fastest resolved function since specific -// function is used in performance critical code -#define T1HA0_RUNTIME_SELECT 0 -#define T1HA_USE_INDIRECT_FUNCTIONS 0 -#include <contrib/libs/t1ha/t1ha.h> - +// ifunc is broken at least under msan and tsan, so disable it +// There is no need to use fastest resolved function since specific +// function is used in performance critical code +#define T1HA0_RUNTIME_SELECT 0 +#define T1HA_USE_INDIRECT_FUNCTIONS 0 +#include <contrib/libs/t1ha/t1ha.h> + namespace NKikimr { //////////////////////////////////////////////////////////////////////////// // KeyContainer @@ -126,39 +126,39 @@ THashCalculator::~THashCalculator() { } //////////////////////////////////////////////////////////////////////////// -// T1haHasher -//////////////////////////////////////////////////////////////////////////// - -template<ET1haFunc func_type> -void TT1ha0HasherBase<func_type>::SetKey(const ui64 key) { - T1haSeed = key; -} - -template<ET1haFunc func_type> -ui64 TT1ha0HasherBase<func_type>::Hash(const void* data, ui64 size) const { +// T1haHasher +//////////////////////////////////////////////////////////////////////////// + +template<ET1haFunc func_type> +void TT1ha0HasherBase<func_type>::SetKey(const ui64 key) { + T1haSeed = key; +} + +template<ET1haFunc func_type> +ui64 TT1ha0HasherBase<func_type>::Hash(const void* data, ui64 size) const { #if !defined(_arm64_) - if constexpr (func_type == ET1haFunc::T1HA0_NO_AVX) { - return t1ha0_ia32aes_noavx(data, size, T1haSeed); - } else if constexpr (func_type == ET1haFunc::T1HA0_AVX) { - return t1ha0_ia32aes_avx(data, size, T1haSeed); - } else if constexpr (func_type == ET1haFunc::T1HA0_AVX2) { - return t1ha0_ia32aes_avx2(data, size, T1haSeed); - } + if constexpr (func_type == ET1haFunc::T1HA0_NO_AVX) { + return t1ha0_ia32aes_noavx(data, size, T1haSeed); + } else if constexpr (func_type == ET1haFunc::T1HA0_AVX) { + return t1ha0_ia32aes_avx(data, size, T1haSeed); + } else if constexpr (func_type == ET1haFunc::T1HA0_AVX2) { + return t1ha0_ia32aes_avx2(data, size, T1haSeed); + } #else return t1ha0(data, size, T1haSeed); #endif -} - -template<ET1haFunc func_type> -TT1ha0HasherBase<func_type>::~TT1ha0HasherBase() { - SecureWipeBuffer((ui8*)&T1haSeed, sizeof T1haSeed); -} - -template class TT1ha0HasherBase<ET1haFunc::T1HA0_NO_AVX>; -template class TT1ha0HasherBase<ET1haFunc::T1HA0_AVX>; -template class TT1ha0HasherBase<ET1haFunc::T1HA0_AVX2>; - -//////////////////////////////////////////////////////////////////////////// +} + +template<ET1haFunc func_type> +TT1ha0HasherBase<func_type>::~TT1ha0HasherBase() { + SecureWipeBuffer((ui8*)&T1haSeed, sizeof T1haSeed); +} + +template class TT1ha0HasherBase<ET1haFunc::T1HA0_NO_AVX>; +template class TT1ha0HasherBase<ET1haFunc::T1HA0_AVX>; +template class TT1ha0HasherBase<ET1haFunc::T1HA0_AVX2>; + +//////////////////////////////////////////////////////////////////////////// // StreamCypher //////////////////////////////////////////////////////////////////////////// #define CYPHER_ROUNDS 8 @@ -203,7 +203,7 @@ void TStreamCypher::SetKey(const ui8 *key, ui32 sizeBytes) { } #else Y_UNUSED(key); - Y_UNUSED(sizeBytes); + Y_UNUSED(sizeBytes); #endif } @@ -319,7 +319,7 @@ void TStreamCypher::Encrypt(void* destination, const void* source, ui32 size) { ui32 tail = size % BLOCK_BYTES; ui32 largePart = size - tail; if (largePart) { - if ((intptr_t)(const ui8*)source % 8 == 0) { + if ((intptr_t)(const ui8*)source % 8 == 0) { if ((intptr_t)(ui8*)destination % 16 == 0) { Cypher->Encipher((const ui8*)source, (ui8*)destination, largePart); destination = (ui8*)destination + largePart; @@ -333,7 +333,7 @@ void TStreamCypher::Encrypt(void* destination, const void* source, ui32 size) { source = (ui8*)source + BLOCK_BYTES; size -= BLOCK_BYTES; } - SecureWipeBuffer(data, BLOCK_BYTES); + SecureWipeBuffer(data, BLOCK_BYTES); } } else { if ((intptr_t)(ui8*)destination % 16 == 0) { @@ -345,7 +345,7 @@ void TStreamCypher::Encrypt(void* destination, const void* source, ui32 size) { source = (ui8*)source + BLOCK_BYTES; size -= BLOCK_BYTES; } - SecureWipeBuffer(data, BLOCK_BYTES); + SecureWipeBuffer(data, BLOCK_BYTES); } else { alignas(16) ui8 data[BLOCK_BYTES] = {0}; while (size >= BLOCK_BYTES) { @@ -356,7 +356,7 @@ void TStreamCypher::Encrypt(void* destination, const void* source, ui32 size) { source = (ui8*)source + BLOCK_BYTES; size -= BLOCK_BYTES; } - SecureWipeBuffer(data, BLOCK_BYTES); + SecureWipeBuffer(data, BLOCK_BYTES); } } } @@ -365,7 +365,7 @@ void TStreamCypher::Encrypt(void* destination, const void* source, ui32 size) { Cypher->Encipher((const ui8*)zero, (ui8*)Leftover, BLOCK_BYTES); Xor(destination, source, Leftover, tail); UnusedBytes = BLOCK_BYTES - tail; - SecureWipeBuffer(zero, BLOCK_BYTES); + SecureWipeBuffer(zero, BLOCK_BYTES); } #else memcpy(destination, source, size); diff --git a/ydb/core/blobstorage/crypto/crypto.h b/ydb/core/blobstorage/crypto/crypto.h index 4d1301210b..45fb647ba4 100644 --- a/ydb/core/blobstorage/crypto/crypto.h +++ b/ydb/core/blobstorage/crypto/crypto.h @@ -66,28 +66,28 @@ public: ~THashCalculator(); }; -enum class ET1haFunc { - T1HA0_NO_AVX, - T1HA0_AVX, - T1HA0_AVX2, -}; - -template<ET1haFunc func_type> -class TT1ha0HasherBase { - ui64 T1haSeed = 0; - -public: - void SetKey(const ui64 key); - - ui64 Hash(const void* data, ui64 size) const; - - ~TT1ha0HasherBase(); -}; - -using TT1ha0NoAvxHasher = TT1ha0HasherBase<ET1haFunc::T1HA0_NO_AVX>; -using TT1ha0AvxHasher = TT1ha0HasherBase<ET1haFunc::T1HA0_AVX>; -using TT1ha0Avx2Hasher = TT1ha0HasherBase<ET1haFunc::T1HA0_AVX2>; - +enum class ET1haFunc { + T1HA0_NO_AVX, + T1HA0_AVX, + T1HA0_AVX2, +}; + +template<ET1haFunc func_type> +class TT1ha0HasherBase { + ui64 T1haSeed = 0; + +public: + void SetKey(const ui64 key); + + ui64 Hash(const void* data, ui64 size) const; + + ~TT1ha0HasherBase(); +}; + +using TT1ha0NoAvxHasher = TT1ha0HasherBase<ET1haFunc::T1HA0_NO_AVX>; +using TT1ha0AvxHasher = TT1ha0HasherBase<ET1haFunc::T1HA0_AVX>; +using TT1ha0Avx2Hasher = TT1ha0HasherBase<ET1haFunc::T1HA0_AVX2>; + //////////////////////////////////////////////////////////////////////////// // StreamCypher //////////////////////////////////////////////////////////////////////////// diff --git a/ydb/core/blobstorage/crypto/crypto_ut.cpp b/ydb/core/blobstorage/crypto/crypto_ut.cpp index f73607921d..98a83ab9c5 100644 --- a/ydb/core/blobstorage/crypto/crypto_ut.cpp +++ b/ydb/core/blobstorage/crypto/crypto_ut.cpp @@ -1,20 +1,20 @@ #include <ydb/core/blobstorage/crypto/crypto.h> #include <library/cpp/testing/unittest/registar.h> -#include <util/datetime/cputimer.h> -#include <util/generic/buffer.h> -#include <util/generic/string.h> -#include <util/random/fast.h> -#include <util/stream/format.h> -#include <util/string/printf.h> - -#include <numeric> - +#include <util/datetime/cputimer.h> +#include <util/generic/buffer.h> +#include <util/generic/string.h> +#include <util/random/fast.h> +#include <util/stream/format.h> +#include <util/string/printf.h> + +#include <numeric> + #include <ydb/core/blobstorage/crypto/ut/ut_helpers.h> - + namespace NKikimr { Y_UNIT_TEST_SUITE(TBlobStorageCrypto) { - + Y_UNIT_TEST(TestMixedStreamCypher) { TStreamCypher cypher1; TStreamCypher cypher2; @@ -43,7 +43,7 @@ Y_UNIT_TEST_SUITE(TBlobStorageCrypto) { cypher2.StartMessage(nonce, 0); cypher2.InplaceEncrypt(out, size - in_offset); - UNIT_ASSERT_ARRAYS_EQUAL(in + in_offset, out, size - in_offset); + UNIT_ASSERT_ARRAYS_EQUAL(in + in_offset, out, size - in_offset); } } @@ -116,209 +116,209 @@ Y_UNIT_TEST_SUITE(TBlobStorageCrypto) { } } } - - Y_UNIT_TEST(PerfTestStreamCypher) { - TStreamCypher cypher1; - constexpr size_t BUF_SIZE = 256 << 10; - constexpr size_t BUF_ALIGN = 32; - constexpr size_t REPETITIONS = 16; - - auto testCase = {std::make_pair(0,0), {4, 0}, {8, 0}, {0, 4}, {0, 8}, {4, 8}, {8, 8}}; - for (auto s : testCase) { - size_t inShift = s.first; - size_t outShift = s.second; - const size_t size = BUF_SIZE; - - Cout << "size# " << HumanReadableSize(size, SF_BYTES); - Cout << " inShift# " << LeftPad(inShift, 2); - Cout << " outShift# " << LeftPad(outShift, 2); - - TVector<TDuration> times; - times.reserve(REPETITIONS); - - for (ui32 i = 0; i < REPETITIONS; ++i) { - TAlignedBuf inBuf(BUF_SIZE, BUF_ALIGN); - TAlignedBuf outBuf(BUF_SIZE, BUF_ALIGN); - - ui8 *in = inBuf.Data() + inShift; - ui8 *out = outBuf.Data() + outShift; - for (ui32 i = 0; i < size; ++i) { - inBuf.Data()[i] = (ui8)i; - } - ui64 key = 123; - ui64 nonce = 1; - cypher1.SetKey(key); - - cypher1.StartMessage(nonce, 0); - - TSimpleTimer timer; - cypher1.Encrypt(out, in, size); - times.push_back(timer.Get()); - } - TDuration min_time = *std::min_element(times.begin(), times.end()); - Cout << " max_speed# " << HumanReadableSize(size / min_time.SecondsFloat(), SF_QUANTITY) << "/s"; - TDuration avg_time = std::accumulate(times.begin(), times.end(), TDuration()) / times.size(); - Cout << " avg_speed# " << HumanReadableSize(size / avg_time.SecondsFloat(), SF_QUANTITY) << "/s"; - Cout << Endl; - } - } - - -void Test(const ui8* a, const ui8* b, size_t size) { - for (ui32 i = 0; i < size; ++i) { - UNIT_ASSERT_EQUAL_C(a[i], b[i], - " a[" << i << "]# " << Hex(a[i], HF_FULL) << " != " - " b[" << i << "]# " << Hex(b[i], HF_FULL)); - } + + Y_UNIT_TEST(PerfTestStreamCypher) { + TStreamCypher cypher1; + constexpr size_t BUF_SIZE = 256 << 10; + constexpr size_t BUF_ALIGN = 32; + constexpr size_t REPETITIONS = 16; + + auto testCase = {std::make_pair(0,0), {4, 0}, {8, 0}, {0, 4}, {0, 8}, {4, 8}, {8, 8}}; + for (auto s : testCase) { + size_t inShift = s.first; + size_t outShift = s.second; + const size_t size = BUF_SIZE; + + Cout << "size# " << HumanReadableSize(size, SF_BYTES); + Cout << " inShift# " << LeftPad(inShift, 2); + Cout << " outShift# " << LeftPad(outShift, 2); + + TVector<TDuration> times; + times.reserve(REPETITIONS); + + for (ui32 i = 0; i < REPETITIONS; ++i) { + TAlignedBuf inBuf(BUF_SIZE, BUF_ALIGN); + TAlignedBuf outBuf(BUF_SIZE, BUF_ALIGN); + + ui8 *in = inBuf.Data() + inShift; + ui8 *out = outBuf.Data() + outShift; + for (ui32 i = 0; i < size; ++i) { + inBuf.Data()[i] = (ui8)i; + } + ui64 key = 123; + ui64 nonce = 1; + cypher1.SetKey(key); + + cypher1.StartMessage(nonce, 0); + + TSimpleTimer timer; + cypher1.Encrypt(out, in, size); + times.push_back(timer.Get()); + } + TDuration min_time = *std::min_element(times.begin(), times.end()); + Cout << " max_speed# " << HumanReadableSize(size / min_time.SecondsFloat(), SF_QUANTITY) << "/s"; + TDuration avg_time = std::accumulate(times.begin(), times.end(), TDuration()) / times.size(); + Cout << " avg_speed# " << HumanReadableSize(size / avg_time.SecondsFloat(), SF_QUANTITY) << "/s"; + Cout << Endl; + } + } + + +void Test(const ui8* a, const ui8* b, size_t size) { + for (ui32 i = 0; i < size; ++i) { + UNIT_ASSERT_EQUAL_C(a[i], b[i], + " a[" << i << "]# " << Hex(a[i], HF_FULL) << " != " + " b[" << i << "]# " << Hex(b[i], HF_FULL)); + } } - Y_UNIT_TEST(UnalignedTestStreamCypher) { - constexpr size_t BUF_ALIGN = 8; - - TStreamCypher cypher; - - for (size_t size = 151; size < 6923; size = 2*size + 1) { - auto testCase = {std::make_pair(0,0), {8, 0}}; - for (auto s : testCase) { - size_t inShift = s.first; - - Cout << " inShift# " << LeftPad(inShift, 2) << " "; - - TAlignedBuf inBuf(size, BUF_ALIGN); - TAlignedBuf outBuf(size, BUF_ALIGN); - - ui8 *in = inBuf.Data() + inShift; - ui8 *out = outBuf.Data(); - TReallyFastRng32 rng(692); - for (ui32 i = 0; i < size; ++i) { - in[i] = rng.GenRand() % 256; - } - ui64 key = 123; - ui64 nonce = 1; - cypher.SetKey(key); - cypher.StartMessage(nonce, 0); - - cypher.Encrypt(out, in, size); - } - } - } -} - -Y_UNIT_TEST_SUITE(TTest_t1ha) { - template<class THasher> - void TestZeroInputHashIsNotZeroImpl() { - THasher hasher; - ui8 zeros[128] = {0}; - for (ui32 i = 0; i < 12345; i += 97) { - hasher.SetKey(i); - UNIT_ASSERT_UNEQUAL(hasher.Hash(zeros, sizeof zeros), 0); - } - } - - Y_UNIT_TEST(TestZeroInputHashIsNotZero) { - TestZeroInputHashIsNotZeroImpl<TT1ha0NoAvxHasher>(); - TestZeroInputHashIsNotZeroImpl<TT1ha0AvxHasher>(); - TestZeroInputHashIsNotZeroImpl<TT1ha0Avx2Hasher>(); - } - - template<class THasher> - void PerfTestImpl() { - THasher hasher; - constexpr size_t BUF_SIZE = 256 << 10; - constexpr size_t BUF_ALIGN = 32; - constexpr size_t REPETITIONS = 16; - - auto testCase = {0, 1, 2, 4, 8}; - for (size_t inShift : testCase) { - const size_t size = BUF_SIZE; - - Cout << "size# " << HumanReadableSize(size, SF_BYTES); - Cout << " inShift# " << LeftPad(inShift, 2); - - TVector<TDuration> times; - times.reserve(REPETITIONS); - - for (ui32 i = 0; i < REPETITIONS; ++i) { - TAlignedBuf inBuf(BUF_SIZE, BUF_ALIGN); - - ui8 *in = inBuf.Data() + inShift; - for (ui32 i = 0; i < size; ++i) { - inBuf.Data()[i] = (ui8)i; - } - ui64 key = 123; - hasher.SetKey(key); - - TSimpleTimer timer; - hasher.Hash(in, size); - times.push_back(timer.Get()); - } - TDuration min_time = *std::min_element(times.begin(), times.end()); - Cout << " max_speed# " << HumanReadableSize(size / min_time.SecondsFloat(), SF_QUANTITY) << "/s"; - TDuration avg_time = std::accumulate(times.begin(), times.end(), TDuration()) / times.size(); - Cout << " avg_speed# " << HumanReadableSize(size / avg_time.SecondsFloat(), SF_QUANTITY) << "/s"; - Cout << Endl; - } - } - - Y_UNIT_TEST(PerfTest) { - PerfTestImpl<TT1ha0NoAvxHasher>(); - PerfTestImpl<TT1ha0AvxHasher>(); - PerfTestImpl<TT1ha0Avx2Hasher>(); - } - -static const size_t REFERENCE_BUF_SIZE = 256 << 10; -static const ui64 REFERENCE_KEY = 123; -static const ui64 NoAvxReferenceHash[] = { - 3857587077012991658ull, 14549119052884897871ull, 254398647225044890ull, 16058200316769016579ull, - 2181725451308207419ull, 3279780031906669142ull, 2103619312639464077ull, 822922730093285578ull, - 8753250818825642536ull, 11319388241306168379ull, 220099229643599001ull, 8504415541883480728ull, - 8223470624549234967ull, 16994463204673144995ull, 17432852776700040881ull, 7799421780457361217ull, - 17218742319902176397ull, 16967740127583990941ull -}; -static const ui64 Avx2ReferenceHash[] = { - 17755040588294046276ull, 446858626829897371ull, 16828903878074513235ull, 12657333507435006451ull, - 17541517186803958748ull, 6742999295364335038ull, 12165123664998125067ull, 12836101758180356638ull, - 15902773892737007852ull, 12440249596693842423ull, 1730928272460384897ull, 5176224758215524594ull, - 8223470624549234967ull, 16994463204673144995ull, 17432852776700040881ull, 7799421780457361217ull, - 17218742319902176397ull, 16967740127583990941ull -}; - - void PrepareBuf(TAlignedBuf *buf) { - ui64 *data64 = reinterpret_cast<ui64*>(buf->Data()); - UNIT_ASSERT(buf->Size() % sizeof(ui64) == 0); - const ui64 bufSize64 = buf->Size() / sizeof(ui64); - for (ui64 i = 0; i < bufSize64; ++i) { - data64[i] = (i + REFERENCE_KEY) * 6364136223846793005ull + 1442695040888963407ull; - } - } - - template<class THasher> - void T1haHashResultsStablilityTestImpl(const TAlignedBuf& buf) { - ui32 offset = 0; - ui32 resIdx = 0; - THasher hasher; - hasher.SetKey(REFERENCE_KEY); - for (ui32 size = buf.Size() / 2; size >= 8; size /= 2) { - UNIT_ASSERT(offset + size <= buf.Size()); - const ui64 hashRes = hasher.Hash(buf.Data() + offset, size); - if constexpr (std::is_same_v<THasher, TT1ha0AvxHasher> || std::is_same_v<THasher, TT1ha0NoAvxHasher>) { - UNIT_ASSERT_EQUAL(hashRes, NoAvxReferenceHash[resIdx]); - } else if constexpr (std::is_same_v<THasher, TT1ha0Avx2Hasher>) { - UNIT_ASSERT_EQUAL(hashRes, Avx2ReferenceHash[resIdx]); - } else { - UNIT_ASSERT(false); - } - ++resIdx; - offset += size; - } - } - - Y_UNIT_TEST(T1haHashResultsStablilityTest) { - TAlignedBuf buf(REFERENCE_BUF_SIZE, 32); - PrepareBuf(&buf); - T1haHashResultsStablilityTestImpl<TT1ha0NoAvxHasher>(buf); - T1haHashResultsStablilityTestImpl<TT1ha0AvxHasher>(buf); - T1haHashResultsStablilityTestImpl<TT1ha0Avx2Hasher>(buf); - } -} + Y_UNIT_TEST(UnalignedTestStreamCypher) { + constexpr size_t BUF_ALIGN = 8; + + TStreamCypher cypher; + + for (size_t size = 151; size < 6923; size = 2*size + 1) { + auto testCase = {std::make_pair(0,0), {8, 0}}; + for (auto s : testCase) { + size_t inShift = s.first; + + Cout << " inShift# " << LeftPad(inShift, 2) << " "; + + TAlignedBuf inBuf(size, BUF_ALIGN); + TAlignedBuf outBuf(size, BUF_ALIGN); + + ui8 *in = inBuf.Data() + inShift; + ui8 *out = outBuf.Data(); + TReallyFastRng32 rng(692); + for (ui32 i = 0; i < size; ++i) { + in[i] = rng.GenRand() % 256; + } + ui64 key = 123; + ui64 nonce = 1; + cypher.SetKey(key); + cypher.StartMessage(nonce, 0); + + cypher.Encrypt(out, in, size); + } + } + } +} + +Y_UNIT_TEST_SUITE(TTest_t1ha) { + template<class THasher> + void TestZeroInputHashIsNotZeroImpl() { + THasher hasher; + ui8 zeros[128] = {0}; + for (ui32 i = 0; i < 12345; i += 97) { + hasher.SetKey(i); + UNIT_ASSERT_UNEQUAL(hasher.Hash(zeros, sizeof zeros), 0); + } + } + + Y_UNIT_TEST(TestZeroInputHashIsNotZero) { + TestZeroInputHashIsNotZeroImpl<TT1ha0NoAvxHasher>(); + TestZeroInputHashIsNotZeroImpl<TT1ha0AvxHasher>(); + TestZeroInputHashIsNotZeroImpl<TT1ha0Avx2Hasher>(); + } + + template<class THasher> + void PerfTestImpl() { + THasher hasher; + constexpr size_t BUF_SIZE = 256 << 10; + constexpr size_t BUF_ALIGN = 32; + constexpr size_t REPETITIONS = 16; + + auto testCase = {0, 1, 2, 4, 8}; + for (size_t inShift : testCase) { + const size_t size = BUF_SIZE; + + Cout << "size# " << HumanReadableSize(size, SF_BYTES); + Cout << " inShift# " << LeftPad(inShift, 2); + + TVector<TDuration> times; + times.reserve(REPETITIONS); + + for (ui32 i = 0; i < REPETITIONS; ++i) { + TAlignedBuf inBuf(BUF_SIZE, BUF_ALIGN); + + ui8 *in = inBuf.Data() + inShift; + for (ui32 i = 0; i < size; ++i) { + inBuf.Data()[i] = (ui8)i; + } + ui64 key = 123; + hasher.SetKey(key); + + TSimpleTimer timer; + hasher.Hash(in, size); + times.push_back(timer.Get()); + } + TDuration min_time = *std::min_element(times.begin(), times.end()); + Cout << " max_speed# " << HumanReadableSize(size / min_time.SecondsFloat(), SF_QUANTITY) << "/s"; + TDuration avg_time = std::accumulate(times.begin(), times.end(), TDuration()) / times.size(); + Cout << " avg_speed# " << HumanReadableSize(size / avg_time.SecondsFloat(), SF_QUANTITY) << "/s"; + Cout << Endl; + } + } + + Y_UNIT_TEST(PerfTest) { + PerfTestImpl<TT1ha0NoAvxHasher>(); + PerfTestImpl<TT1ha0AvxHasher>(); + PerfTestImpl<TT1ha0Avx2Hasher>(); + } + +static const size_t REFERENCE_BUF_SIZE = 256 << 10; +static const ui64 REFERENCE_KEY = 123; +static const ui64 NoAvxReferenceHash[] = { + 3857587077012991658ull, 14549119052884897871ull, 254398647225044890ull, 16058200316769016579ull, + 2181725451308207419ull, 3279780031906669142ull, 2103619312639464077ull, 822922730093285578ull, + 8753250818825642536ull, 11319388241306168379ull, 220099229643599001ull, 8504415541883480728ull, + 8223470624549234967ull, 16994463204673144995ull, 17432852776700040881ull, 7799421780457361217ull, + 17218742319902176397ull, 16967740127583990941ull +}; +static const ui64 Avx2ReferenceHash[] = { + 17755040588294046276ull, 446858626829897371ull, 16828903878074513235ull, 12657333507435006451ull, + 17541517186803958748ull, 6742999295364335038ull, 12165123664998125067ull, 12836101758180356638ull, + 15902773892737007852ull, 12440249596693842423ull, 1730928272460384897ull, 5176224758215524594ull, + 8223470624549234967ull, 16994463204673144995ull, 17432852776700040881ull, 7799421780457361217ull, + 17218742319902176397ull, 16967740127583990941ull +}; + + void PrepareBuf(TAlignedBuf *buf) { + ui64 *data64 = reinterpret_cast<ui64*>(buf->Data()); + UNIT_ASSERT(buf->Size() % sizeof(ui64) == 0); + const ui64 bufSize64 = buf->Size() / sizeof(ui64); + for (ui64 i = 0; i < bufSize64; ++i) { + data64[i] = (i + REFERENCE_KEY) * 6364136223846793005ull + 1442695040888963407ull; + } + } + + template<class THasher> + void T1haHashResultsStablilityTestImpl(const TAlignedBuf& buf) { + ui32 offset = 0; + ui32 resIdx = 0; + THasher hasher; + hasher.SetKey(REFERENCE_KEY); + for (ui32 size = buf.Size() / 2; size >= 8; size /= 2) { + UNIT_ASSERT(offset + size <= buf.Size()); + const ui64 hashRes = hasher.Hash(buf.Data() + offset, size); + if constexpr (std::is_same_v<THasher, TT1ha0AvxHasher> || std::is_same_v<THasher, TT1ha0NoAvxHasher>) { + UNIT_ASSERT_EQUAL(hashRes, NoAvxReferenceHash[resIdx]); + } else if constexpr (std::is_same_v<THasher, TT1ha0Avx2Hasher>) { + UNIT_ASSERT_EQUAL(hashRes, Avx2ReferenceHash[resIdx]); + } else { + UNIT_ASSERT(false); + } + ++resIdx; + offset += size; + } + } + + Y_UNIT_TEST(T1haHashResultsStablilityTest) { + TAlignedBuf buf(REFERENCE_BUF_SIZE, 32); + PrepareBuf(&buf); + T1haHashResultsStablilityTestImpl<TT1ha0NoAvxHasher>(buf); + T1haHashResultsStablilityTestImpl<TT1ha0AvxHasher>(buf); + T1haHashResultsStablilityTestImpl<TT1ha0Avx2Hasher>(buf); + } +} } // namespace NKikimr diff --git a/ydb/core/blobstorage/crypto/ut/ut_helpers.h b/ydb/core/blobstorage/crypto/ut/ut_helpers.h index fcb5c34151..e9695541df 100644 --- a/ydb/core/blobstorage/crypto/ut/ut_helpers.h +++ b/ydb/core/blobstorage/crypto/ut/ut_helpers.h @@ -2,61 +2,61 @@ #include <library/cpp/actors/util/rope.h> #include <library/cpp/testing/unittest/registar.h> -#include <util/system/align.h> - -#include <utility> - -class TAlignedBuf { - ui8 *Buf; - ui8 *AlignedBuf; - size_t BufSize; - size_t Align; - - void AllocBuf() { - Buf = new ui8[BufSize + Align]; - AlignedBuf = reinterpret_cast<ui8*>(AlignUp<intptr_t>(intptr_t(Buf), Align)); - } - -public: - TAlignedBuf(size_t s, size_t align) - : BufSize(s) - , Align(align) - { - AllocBuf(); - } - - TAlignedBuf(const TAlignedBuf& other) - : BufSize(other.BufSize) - , Align(other.Align) - { - AllocBuf(); - } - - TAlignedBuf(TAlignedBuf&& other) - { - std::swap(Buf, other.Buf); - std::swap(AlignedBuf, other.AlignedBuf); - std::swap(BufSize, other.BufSize); - std::swap(Align, other.Align); - } - - ui8 *Data() { - return AlignedBuf; - } - - const ui8 *Data() const { - return AlignedBuf; - } - - size_t Size() const { - return BufSize; - } - - ~TAlignedBuf() { - delete[] Buf; - } -}; - +#include <util/system/align.h> + +#include <utility> + +class TAlignedBuf { + ui8 *Buf; + ui8 *AlignedBuf; + size_t BufSize; + size_t Align; + + void AllocBuf() { + Buf = new ui8[BufSize + Align]; + AlignedBuf = reinterpret_cast<ui8*>(AlignUp<intptr_t>(intptr_t(Buf), Align)); + } + +public: + TAlignedBuf(size_t s, size_t align) + : BufSize(s) + , Align(align) + { + AllocBuf(); + } + + TAlignedBuf(const TAlignedBuf& other) + : BufSize(other.BufSize) + , Align(other.Align) + { + AllocBuf(); + } + + TAlignedBuf(TAlignedBuf&& other) + { + std::swap(Buf, other.Buf); + std::swap(AlignedBuf, other.AlignedBuf); + std::swap(BufSize, other.BufSize); + std::swap(Align, other.Align); + } + + ui8 *Data() { + return AlignedBuf; + } + + const ui8 *Data() const { + return AlignedBuf; + } + + size_t Size() const { + return BufSize; + } + + ~TAlignedBuf() { + delete[] Buf; + } +}; + class TRopeAlignedBufferBackend : public IRopeChunkBackend { TAlignedBuf Buffer; @@ -74,26 +74,26 @@ public: } }; -void inline Print(const ui8* out, size_t size) { - for (ui32 i = 0; i < size; ++i) { - if (i % 16 == 0) { - Cerr << LeftPad(i, 3) << ": "; - } - Cerr << Hex(out[i], HF_FULL) << " "; - if ((i + 1) % 16 == 0) { - Cerr << Endl; - } - } - Cerr << Endl; -} - - -#define UNIT_ASSERT_ARRAYS_EQUAL(A, B, size) \ - do { \ - for (size_t i = 0; i < size; i++) { \ - UNIT_ASSERT_EQUAL_C((A)[i], (B)[i], \ - "arrays are not equal " \ - " a[" << i << "]# " << Hex((A)[i], HF_FULL) << " != " \ - " b[" << i << "]# " << Hex((B)[i], HF_FULL)); \ - } \ +void inline Print(const ui8* out, size_t size) { + for (ui32 i = 0; i < size; ++i) { + if (i % 16 == 0) { + Cerr << LeftPad(i, 3) << ": "; + } + Cerr << Hex(out[i], HF_FULL) << " "; + if ((i + 1) % 16 == 0) { + Cerr << Endl; + } + } + Cerr << Endl; +} + + +#define UNIT_ASSERT_ARRAYS_EQUAL(A, B, size) \ + do { \ + for (size_t i = 0; i < size; i++) { \ + UNIT_ASSERT_EQUAL_C((A)[i], (B)[i], \ + "arrays are not equal " \ + " a[" << i << "]# " << Hex((A)[i], HF_FULL) << " != " \ + " b[" << i << "]# " << Hex((B)[i], HF_FULL)); \ + } \ } while (0) diff --git a/ydb/core/blobstorage/docs/DriveLifeStages.drawio b/ydb/core/blobstorage/docs/DriveLifeStages.drawio index 8b9384271a..0c0ad3cb0f 100644 --- a/ydb/core/blobstorage/docs/DriveLifeStages.drawio +++ b/ydb/core/blobstorage/docs/DriveLifeStages.drawio @@ -1 +1 @@ -<mxfile host="drawio.yandex-team.ru" modified="2021-03-05T12:59:24.678Z" agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.41 YaBrowser/21.2.0.1122 Yowser/2.5 Safari/537.36" etag="91ZtNVjaC9g16XH5G1v5" version="12.7.0" type="device"><diagram id="C5RBs43oDa-KdzZeNtuy" name="Page-1">3Vhbl5owEP41Pm4PFxf10QvrblexVXftPvXkQBao0dAQL/TXN0ggYI6ubL1w+qTzZTIh38w3CdT07mLbJyDwhtiBqKYpzram92qapipGg/3ESMQR1TASxCW+wzEBTPw/MJ3K0ZXvwLDgSDFG1A+KoI2XS2jTAgYIwZui2ztGxVUD4EIJmNgAyejMd6iXoE2tIfBH6LsezfbXSkYWIHXmOwk94OBNDtLNmt4lGNPk32LbhShmL+Vl9hTN0GBu9L9+D3+Dl87z1Hq9S4I9lJmSbYHAJf106Hrb6mhk/j6kQfsJKnYHPtp3WhJ6DdCK88X3SqOUQOgwPrmJCfWwi5cAmQLtELxaOjBeRmGW8BlgHDBQZeAvSGnEiwOsKGaQRxeIj4aU4HmWnhg5ccecmRCviA2PbJNnMN5Krio4T32IF5CSiDkQiAD118XSAbwC3cwvm/oN++z5NIXLpV7nC6ViSe00BAXEhZTPErlif3KPIaBdBktkU5GyaY2mPyemaclZRYgpMM7exvMpnARgR+CGdYFibsplYg0JhdujHPNRrbnHVSqzjVBphnk5he5zmk9LgdCy7GlVFQNjnkQ/eMSd8RYbX+5Ts7fND/Yibp1ZRGn33tXwMT/1Omq7V4oVJFXGAbW1CQFRzi2IHcIS6xjKXq0lEc8qZVWqxfZgMOq2p2avelqW+t7NtdysqJTLZeDiCjI+OK+SniApSApkKLc9+NRWRfMtWrfo1m+FZn2V1t06sXVf6Z5k7NXdhRr3gWUOFbFeP+Z+mTavyVe2ahTujQrw/HeHfzpG5LYyNoej1yqewVILvvkZrMp3GHM8Ho2rz13Gye24MyTuXqxnazSr4Jtcxkx1Kq/xX3TVSx/D2kefK069/knv8me7/jFTfGBL3MV3St38Cw==</diagram></mxfile>
\ No newline at end of file +<mxfile host="drawio.yandex-team.ru" modified="2021-03-05T12:59:24.678Z" agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.41 YaBrowser/21.2.0.1122 Yowser/2.5 Safari/537.36" etag="91ZtNVjaC9g16XH5G1v5" version="12.7.0" type="device"><diagram id="C5RBs43oDa-KdzZeNtuy" name="Page-1">3Vhbl5owEP41Pm4PFxf10QvrblexVXftPvXkQBao0dAQL/TXN0ggYI6ubL1w+qTzZTIh38w3CdT07mLbJyDwhtiBqKYpzram92qapipGg/3ESMQR1TASxCW+wzEBTPw/MJ3K0ZXvwLDgSDFG1A+KoI2XS2jTAgYIwZui2ztGxVUD4EIJmNgAyejMd6iXoE2tIfBH6LsezfbXSkYWIHXmOwk94OBNDtLNmt4lGNPk32LbhShmL+Vl9hTN0GBu9L9+D3+Dl87z1Hq9S4I9lJmSbYHAJf106Hrb6mhk/j6kQfsJKnYHPtp3WhJ6DdCK88X3SqOUQOgwPrmJCfWwi5cAmQLtELxaOjBeRmGW8BlgHDBQZeAvSGnEiwOsKGaQRxeIj4aU4HmWnhg5ccecmRCviA2PbJNnMN5Krio4T32IF5CSiDkQiAD118XSAbwC3cwvm/oN++z5NIXLpV7nC6ViSe00BAXEhZTPErlif3KPIaBdBktkU5GyaY2mPyemaclZRYgpMM7exvMpnARgR+CGdYFibsplYg0JhdujHPNRrbnHVSqzjVBphnk5he5zmk9LgdCy7GlVFQNjnkQ/eMSd8RYbX+5Ts7fND/Yibp1ZRGn33tXwMT/1Omq7V4oVJFXGAbW1CQFRzi2IHcIS6xjKXq0lEc8qZVWqxfZgMOq2p2avelqW+t7NtdysqJTLZeDiCjI+OK+SniApSApkKLc9+NRWRfMtWrfo1m+FZn2V1t06sXVf6Z5k7NXdhRr3gWUOFbFeP+Z+mTavyVe2ahTujQrw/HeHfzpG5LYyNoej1yqewVILvvkZrMp3GHM8Ho2rz13Gye24MyTuXqxnazSr4Jtcxkx1Kq/xX3TVSx/D2kefK069/knv8me7/jFTfGBL3MV3St38Cw==</diagram></mxfile>
\ No newline at end of file diff --git a/ydb/core/blobstorage/docs/pdisk_chunk_state.graphml b/ydb/core/blobstorage/docs/pdisk_chunk_state.graphml index 68eeab784c..55c4c70703 100644 --- a/ydb/core/blobstorage/docs/pdisk_chunk_state.graphml +++ b/ydb/core/blobstorage/docs/pdisk_chunk_state.graphml @@ -1,660 +1,660 @@ -<?xml version="1.0" encoding="UTF-8" standalone="no"?> -<graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:java="http://www.yworks.com/xml/yfiles-common/1.0/java" xmlns:sys="http://www.yworks.com/xml/yfiles-common/markup/primitives/2.0" xmlns:x="http://www.yworks.com/xml/yfiles-common/markup/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:y="http://www.yworks.com/xml/graphml" xmlns:yed="http://www.yworks.com/xml/yed/3" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://www.yworks.com/xml/schema/graphml/1.1/ygraphml.xsd"> - <!--Created by yEd 3.18.2--> - <key attr.name="Description" attr.type="string" for="graph" id="d0"/> - <key for="port" id="d1" yfiles.type="portgraphics"/> - <key for="port" id="d2" yfiles.type="portgeometry"/> - <key for="port" id="d3" yfiles.type="portuserdata"/> - <key attr.name="url" attr.type="string" for="node" id="d4"/> - <key attr.name="description" attr.type="string" for="node" id="d5"/> - <key for="node" id="d6" yfiles.type="nodegraphics"/> - <key for="graphml" id="d7" yfiles.type="resources"/> - <key attr.name="url" attr.type="string" for="edge" id="d8"/> - <key attr.name="description" attr.type="string" for="edge" id="d9"/> - <key for="edge" id="d10" yfiles.type="edgegraphics"/> - <graph edgedefault="directed" id="G"> - <data key="d0" xml:space="preserve"/> - <node id="n0"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="301.8278580555715" y="32.23329615497357"/> - <y:Fill color="#FFCC00" transparent="false"/> - <y:BorderStyle color="#000000" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="28.71484375" x="29.642578125" xml:space="preserve" y="5.93359375">Free<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n1"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="184.66887391365057" y="175.51611786633129"/> - <y:Fill color="#FFFF99" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="56.435546875" x="15.7822265625" xml:space="preserve" y="5.93359375">Reserved<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n2"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="184.66887391365057" y="252.17678572882204"/> - <y:Fill color="#FFCC00" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="64.275390625" x="11.8623046875" xml:space="preserve" y="5.93359375">Commited<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n3"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="701.2401942572766" y="-34.173748916774315"/> - <y:Fill color="#FFFF99" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="78.806640625" x="4.5966796875" xml:space="preserve" y="5.93359375">Log reserved<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n4"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="47.24943843986151" y="77.64370557140234"/> - <y:Fill color="#FFFF99" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="53.7578125" x="17.12109375" xml:space="preserve" y="-1.1328125">Delete -reserved<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n5"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="47.24943843986151" y="156.96546634935225"/> - <y:Fill color="#FFFF99" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="62.119140625" x="12.9404296875" xml:space="preserve" y="-1.1328125">Delete -commited<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n6"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="701.2401942572766" y="37.46766193890454"/> - <y:Fill color="#FFCC00" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="87.16796875" x="0.416015625" xml:space="preserve" y="5.93359375">Log commited<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n7"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="657.197834429016" y="201.00808526073567"/> - <y:Fill color="#CCFFCC" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="68.470703125" x="9.7646484375" xml:space="preserve" y="5.93359375">Quarantine<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n8"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="387.47596608655624" y="252.17678572882204"/> - <y:Fill color="#00FF00" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="64.275390625" x="11.8623046875" xml:space="preserve" y="5.93359375">Commited<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n9"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="387.47596608655624" y="174.5663108821866"/> - <y:Fill color="#CCFFCC" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="56.435546875" x="15.7822265625" xml:space="preserve" y="5.93359375">Reserved<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n10"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="657.197834429016" y="122.63481365314385"/> - <y:Fill color="#FFFF99" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="68.470703125" x="9.7646484375" xml:space="preserve" y="5.93359375">Quarantine -<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n11"> - <data key="d5"/> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="114.97089218898864" width="239.87834856212052" x="416.66386647552287" y="-242.45491334722402"/> - <y:Fill hasColor="false" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="left" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="74.6640625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="187.6328125" x="26.122768031060275" xml:space="preserve" y="20.153414844494307">r — read -w — write -c — commit -+ — new request in flight -done — request written on disk<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="rectangle"/> - </y:ShapeNode> - </data> - </node> - <node id="n12"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="58.31484757457025" y="-242.45491334722402"/> - <y:Fill color="#FFCC00" transparent="false"/> - <y:BorderStyle color="#000000" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="sides" modelPosition="e" textColor="#000000" verticalTextPosition="bottom" visible="true" width="93.244140625" x="92.0" xml:space="preserve" y="5.93359375">Persistent state</y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n13"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="58.31484757457025" y="-193.90800385474196"/> - <y:Fill color="#00FF00" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="sides" modelPosition="e" textColor="#000000" verticalTextPosition="bottom" visible="true" width="224.76953125" x="92.0" xml:space="preserve" y="5.93359375">Persistent state with requests in flight -</y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n14"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="58.31484757457025" y="-145.3610943622599"/> - <y:Fill color="#FFFF99" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="sides" modelPosition="e" textColor="#000000" verticalTextPosition="bottom" visible="true" width="91.609375" x="92.0" xml:space="preserve" y="5.93359375">Temporal state</y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n15"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="58.314847574570265" y="-96.81418486977785"/> - <y:Fill color="#CCFFCC" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="sides" modelPosition="e" textColor="#000000" verticalTextPosition="bottom" visible="true" width="223.134765625" x="91.99999999999999" xml:space="preserve" y="5.93359375">Temporal state with requests in flight -</y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n16"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="657.1978344290161" y="329.1761235083221"/> - <y:Fill color="#CCFFCC" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="68.470703125" x="9.7646484375" xml:space="preserve" y="-1.1328125">Quarantine -commited<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <node id="n17"> - <data key="d6"> - <y:ShapeNode> - <y:Geometry height="30.0" width="88.0" x="547.85308396719" y="239.67475861812653"/> - <y:Fill color="#CCFFCC" transparent="false"/> - <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> - <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="68.470703125" x="9.7646484375" xml:space="preserve" y="-1.1328125">Quarantine -on kill<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> - <y:Shape type="ellipse"/> - </y:ShapeNode> - </data> - </node> - <edge id="e0" source="n0" target="n1"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="-1.9292672025543425" sy="13.746218964476157" tx="25.25652443572227" ty="-12.28936249214587"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="-31.05969337432157" anchorY="43.27401718327805" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="0.7933330110261343" upY="0.6087879217069007" verticalTextPosition="bottom" visible="true" width="46.205078125" x="-59.188786858345296" xml:space="preserve" y="43.27401718327805">reserve<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.770418224707337" distanceToCenter="true" position="left" ratio="0.6881208058219688" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e1" source="n9" target="n2"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="-30.481898196182556" sy="10.815580672127737" tx="0.0" ty="-15.0"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="-80.66373828846764" anchorY="4.209450516205635" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="0.2878441161956364" upY="0.9576772758980726" verticalTextPosition="bottom" visible="true" width="80.734375" x="-157.9812146098011" xml:space="preserve" y="4.209450516205635">commit done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.120886543350457" distanceToCenter="true" position="right" ratio="0.8968891559191847" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e2" source="n0" target="n3"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="34.75814998265491" sy="-9.198313983646038" tx="-34.767571381909875" ty="-9.198313983646038"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="71.001953125" x="209.5893985046822" xml:space="preserve" y="-69.52196539883">log allocate<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.899576593121719" distanceToCenter="true" position="left" ratio="0.8518598473263168" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e3" source="n3" target="n6"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="5.5" tx="0.0" ty="-15.0"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="70.0" x="2.6588137329752044" xml:space="preserve" y="7.735972317920741">сommit log<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="2.6587928155423675" distanceToCenter="false" position="left" ratio="0.20542799692019012" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e4" source="n5" target="n0"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="30.393865214780817" sy="-10.843275255829838" tx="-18.959151160012425" ty="6.799672617704289"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="65.44716863590605" anchorY="-32.89023053710895" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.46261727573465816" upY="-0.8865580952153351" verticalTextPosition="bottom" visible="true" width="72.419921875" x="57.058616315748694" xml:space="preserve" y="-82.46872921473505">delete done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="7.948517208199235" distanceToCenter="true" position="left" ratio="0.5" segment="0"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e5" source="n1" target="n4"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="-35.68139698198826" sy="-8.745020762370075" tx="0.0" ty="0.0"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="7.422413248786569" anchorY="-18.79208740967067" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.6589509006517482" upY="0.7521859547547068" verticalTextPosition="bottom" visible="true" width="39.56640625" x="-34.28751494080656" xml:space="preserve" y="-44.86440644366113">delete<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="9.959734230476897" distanceToCenter="true" position="right" ratio="-9.227372589907704" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e6" source="n4" target="n0"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="44.0" sy="0.0" tx="-38.297753912215285" ty="7.3794888613264416"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="41.320990156025914" anchorY="-6.828971852424033" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.21556008869735932" upY="-0.9764905776098337" verticalTextPosition="bottom" visible="true" width="72.419921875" x="37.41227948519333" xml:space="preserve" y="-40.14633718707068">delete done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="6.8276756015565185" distanceToCenter="true" position="left" ratio="0.3955015490500028" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e7" source="n6" target="n0"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="36.37407210989903" ty="5.2343657839309685"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="-33.1913128071767" anchorY="1.8444028557555825" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-1.2246467991473532E-16" upY="1.0" verticalTextPosition="bottom" visible="true" width="44.201171875" x="-77.3924846821767" xml:space="preserve" y="1.8444028557555772">сut log<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.91080997813034" distanceToCenter="true" position="left" ratio="0.10960384942234233" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e8" source="n10" target="n0"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="27.862218929353844" ty="11.192663010312629"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="-17.788315177226764" anchorY="-16.22954736584451" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" hasText="false" height="4.0" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.2350759151535802" upY="0.9719770131616832" verticalTextPosition="bottom" visible="true" width="4.0" x="-22.61652689048782" y="-17.169851026458833"> - <y:LabelModel> - <y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/> - </y:LabelModel> - <y:ModelParameter> - <y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="9.59314957410855" distanceToCenter="true" position="right" ratio="0.01851055095852848" segment="-1"/> - </y:ModelParameter> - <y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/> - </y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e9" source="n9" target="n1"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="-34.172146410915275" sy="-9.441323139003387" tx="31.72534507187663" ty="-10.391130123148073"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="standard" target="none"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="52.919921875" x="-93.65792667958408" xml:space="preserve" y="-21.5576817917227">+r+w+c<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="12.491278543694992" distanceToCenter="true" position="right" ratio="0.5" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e10" source="n9" target="n8"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="47.880859375" x="-53.94041574938126" xml:space="preserve" y="4.999996184920974">commit -done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="0.0" distance="30.0" distanceToCenter="true" position="right" ratio="0.0" segment="0"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e11" source="n8" target="n2"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="-34.82383517005076" sy="9.157115617061322" tx="34.85845016171942" ty="9.157115617061322"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="81.654296875" x="-106.33143476469621" xml:space="preserve" y="-17.041515509655596">last rwc done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="7.975117783273342" distanceToCenter="true" position="right" ratio="0.4549547877251474" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e12" source="n9" target="n7"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="39.69019001427455" anchorY="2.6034749748054367" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="0.09756577368082023" upY="-0.9952290790597222" verticalTextPosition="bottom" visible="true" width="60.876953125" x="39.69019001427455" xml:space="preserve" y="-15.442827310332184">kill owner<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.347751743621105" distanceToCenter="true" position="left" ratio="0.3015238597711589" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e13" source="n8" target="n17"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="34.333916567865344" sy="-5.855893959205844" tx="-33.66545563118473" ty="-9.663789067285506"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="11.332674393531136" anchorY="-3.7196742493306942" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.17386769945429842" upY="-0.984769020169943" verticalTextPosition="bottom" visible="true" width="60.876953125" x="8.179963999519991" xml:space="preserve" y="-32.1608420375119">kill owner<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.759027546315863" distanceToCenter="true" position="left" ratio="0.5" segment="0"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e14" source="n2" target="n5"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="-28.230843196605434" sy="-11.505833931186658" tx="33.83333434299962" ty="9.60320621427303"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="1.8064046893578052" anchorY="-27.562947890016602" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.701154105430335" upY="0.713009761811286" verticalTextPosition="bottom" visible="true" width="39.56640625" x="-39.11872513405677" xml:space="preserve" y="-55.30509606932857">delete<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="11.85282047344601" distanceToCenter="true" position="right" ratio="0.22859433300968157" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e15" source="n7" target="n10"> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="81.654296875" x="4.494206775091357" xml:space="preserve" y="-43.103712156877265">last rwc done -kill done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="45.32133572498151" distanceToCenter="true" position="right" ratio="0.955864940213314" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e16" source="n0" target="n0"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"> - <y:Point x="345.8278580555715" y="14.042778699409723"/> - </y:Path> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-38.28843346365477" xml:space="preserve" y="-39.41837475902949">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="0.851185318054408" distanceToCenter="true" position="left" ratio="11.065409134708744" segment="-2"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e17" source="n3" target="n0"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="-44.0" sy="0.0" tx="44.0" ty="0.0"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-119.92863738246035" xml:space="preserve" y="22.565682416565053">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.328015368693165" distanceToCenter="true" position="left" ratio="0.2805018761699686" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e18" source="n6" target="n6"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="19.017944130896808" sy="13.512544433732373" tx="44.0" ty="0.0"> - <y:Point x="789.2401942572766" y="74.28285443190316"/> - </y:Path> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="42.2265625" x="5.441708500135633" xml:space="preserve" y="15.798604455128782">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="0.0" distanceToCenter="false" position="right" ratio="6.720318614416623" segment="-2"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e19" source="n1" target="n0"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="11.100571159856088" sy="-14.501222602423539" tx="-13.929103404286025" ty="14.241300677086956"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="18.553360681792412" anchorY="-25.696531015585236" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.7792165417141262" upY="-0.626754801432807" verticalTextPosition="bottom" visible="true" width="42.2265625" x="4.423973233991733" xml:space="preserve" y="-69.96499431316647">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.714745422657094" distanceToCenter="true" position="left" ratio="0.342725244313297" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e20" source="n5" target="n2"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="14.454784649358558" sy="14.171691206036428" tx="-44.0" ty="0.0"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="16.803286936629164" anchorY="44.754330765062605" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="0.7162165042990318" upY="-0.6978781548162079" verticalTextPosition="bottom" visible="true" width="42.2265625" x="16.803286936629164" xml:space="preserve" y="32.099837035934335">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.13187428633631" distanceToCenter="true" position="right" ratio="0.6351411488884056" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e21" source="n4" target="n0"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="27.72843780550508" sy="-11.640146089764052" tx="-37.203018275484965" ty="-7.999525101672361"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="72.71794189796697" anchorY="-17.741924379478654" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.2150948732018046" upY="-0.9765931576261937" verticalTextPosition="bottom" visible="true" width="42.2265625" x="68.81766689248737" xml:space="preserve" y="-44.53302210218295">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.75179091058278" distanceToCenter="true" position="left" ratio="0.42706763912791285" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e22" source="n2" target="n2"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"> - <y:Point x="203.0886591696214" y="295.5033242120122"/> - </y:Path> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-9.590665738951628" xml:space="preserve" y="22.011191593549654">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="13.074784808336569" distanceToCenter="true" position="left" ratio="6.042022354967836" segment="-2"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e23" source="n8" target="n2"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="26.614829141286123" ty="11.950771246842464"> - <y:Point x="379.1254391971893" y="295.0875296808884"/> - <y:Point x="285.49418907424945" y="295.0875296808884"/> - </y:Path> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-96.78989693579211" xml:space="preserve" y="14.925386589537254">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="8.718441625445736" distanceToCenter="true" position="left" ratio="0.502281970046856" segment="1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e24" source="n9" target="n0"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="5.731400659936355" ty="11.192663010312629"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="-6.644791176402634" anchorY="-45.7548071943084" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.8539319547737003" upY="0.520384681381348" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-44.1030354724647" xml:space="preserve" y="-81.81341825330722">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="9.069496224652644" distanceToCenter="true" position="right" ratio="0.5454974242562394" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e25" source="n9" target="n1"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="-35.34252622324016" sy="8.942187263495612" tx="37.25981182851308" ty="7.992380279350925"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="75.5078125" x="-104.6739728663735" xml:space="preserve" y="-17.786774298462603">last rw done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="8.720367048636998" distanceToCenter="true" position="right" ratio="0.5293192616653534" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e26" source="n2" target="n8"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="31.953203361267015" sy="-10.307390918648395" tx="-31.973351254374222" ty="-10.307390918648395"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="52.919921875" x="45.03540296804721" xml:space="preserve" y="-19.072106154013795">+r+w+c<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.005709948562414" distanceToCenter="true" position="left" ratio="0.5400661519081636" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e27" source="n7" target="n0"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="17.64799511269962" ty="12.043848328367119"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="-121.55194649261693" anchorY="-76.73061303132732" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.4209598329465266" upY="0.9070792793607582" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-167.48797209584964" xml:space="preserve" y="-94.50629972723338">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="9.365856975346183" distanceToCenter="true" position="right" ratio="0.5000366300695136" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e28" source="n16" target="n2"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="18.064097820689597" ty="13.664632674132577"> - <y:Point x="275.457333963666" y="344.1761235083221"/> - </y:Path> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-192.14370846750944" xml:space="preserve" y="0.9279829026191919">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="9.994382587656453" distanceToCenter="true" position="left" ratio="0.40749739509495936" segment="0"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e29" source="n16" target="n7"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="72.419921875" x="5.706653002872827" xml:space="preserve" y="-93.1680316826237">delete done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="41.91659445276315" distanceToCenter="true" position="right" ratio="1.0" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e30" source="n9" target="n9"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"> - <y:Point x="431.47596608655624" y="156.0038208221403"/> - </y:Path> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="52.919921875" x="-37.89623429599874" xml:space="preserve" y="-36.15734837155037">±r±w+c<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="5.054648982679395" distanceToCenter="true" position="right" ratio="7.42970862240162" segment="-2"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e31" source="n8" target="n8"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="44.0" sy="0.0" tx="-32.204640726824664" ty="10.222598694162002"> - <y:Point x="445.1556747089578" y="246.3211708445953"/> - </y:Path> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="52.919921875" x="-41.32400424821128" xml:space="preserve" y="-48.89784390234962">±r±w±c<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="1.9573788154470968" distanceToCenter="false" position="right" ratio="0.8086000895714727" segment="0"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e32" source="n8" target="n16"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="31.49835082164276" sy="8.011063035855614" tx="-38.920119619969455" ty="-6.972592983787763"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="43.22912268674946" anchorY="30.978643960824343" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="0.2971110205213774" upY="-0.9548429407419555" verticalTextPosition="bottom" visible="true" width="39.56640625" x="43.22912268674946" xml:space="preserve" y="13.664655949401851">delete<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="7.669472523891792" distanceToCenter="true" position="right" ratio="0.22719141601432052" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e33" source="n17" target="n8"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="-43.33748350355211" sy="5.684438758359249" tx="36.257305354126174" ty="6.6461331514895505"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="-17.515595641966684" anchorY="-0.5434478957805595" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="0.1643989873053555" upY="0.9863939238321441" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-59.16762031628495" xml:space="preserve" y="-0.5434478957805595">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="5.6508049884797265" distanceToCenter="true" position="left" ratio="0.5" segment="0"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - <edge id="e34" source="n17" target="n7"> - <data key="d9"/> - <data key="d10"> - <y:PolyLineEdge> - <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/> - <y:LineStyle color="#000000" type="line" width="1.0"/> - <y:Arrows source="none" target="standard"/> - <y:EdgeLabel alignment="center" anchorX="18.66323554616406" anchorY="17.866926869038252" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.33339055393309996" upY="-0.9427888090915063" verticalTextPosition="bottom" visible="true" width="54.2734375" x="12.617927142424021" xml:space="preserve" y="-17.32273722529481">kill done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="14.000475017470663" distanceToCenter="true" position="right" ratio="12.17672099452552" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> - <y:BendStyle smoothed="false"/> - </y:PolyLineEdge> - </data> - </edge> - </graph> - <data key="d7"> - <y:Resources/> - </data> -</graphml> +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:java="http://www.yworks.com/xml/yfiles-common/1.0/java" xmlns:sys="http://www.yworks.com/xml/yfiles-common/markup/primitives/2.0" xmlns:x="http://www.yworks.com/xml/yfiles-common/markup/2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:y="http://www.yworks.com/xml/graphml" xmlns:yed="http://www.yworks.com/xml/yed/3" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://www.yworks.com/xml/schema/graphml/1.1/ygraphml.xsd"> + <!--Created by yEd 3.18.2--> + <key attr.name="Description" attr.type="string" for="graph" id="d0"/> + <key for="port" id="d1" yfiles.type="portgraphics"/> + <key for="port" id="d2" yfiles.type="portgeometry"/> + <key for="port" id="d3" yfiles.type="portuserdata"/> + <key attr.name="url" attr.type="string" for="node" id="d4"/> + <key attr.name="description" attr.type="string" for="node" id="d5"/> + <key for="node" id="d6" yfiles.type="nodegraphics"/> + <key for="graphml" id="d7" yfiles.type="resources"/> + <key attr.name="url" attr.type="string" for="edge" id="d8"/> + <key attr.name="description" attr.type="string" for="edge" id="d9"/> + <key for="edge" id="d10" yfiles.type="edgegraphics"/> + <graph edgedefault="directed" id="G"> + <data key="d0" xml:space="preserve"/> + <node id="n0"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="301.8278580555715" y="32.23329615497357"/> + <y:Fill color="#FFCC00" transparent="false"/> + <y:BorderStyle color="#000000" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="28.71484375" x="29.642578125" xml:space="preserve" y="5.93359375">Free<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n1"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="184.66887391365057" y="175.51611786633129"/> + <y:Fill color="#FFFF99" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="56.435546875" x="15.7822265625" xml:space="preserve" y="5.93359375">Reserved<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n2"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="184.66887391365057" y="252.17678572882204"/> + <y:Fill color="#FFCC00" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="64.275390625" x="11.8623046875" xml:space="preserve" y="5.93359375">Commited<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n3"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="701.2401942572766" y="-34.173748916774315"/> + <y:Fill color="#FFFF99" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="78.806640625" x="4.5966796875" xml:space="preserve" y="5.93359375">Log reserved<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n4"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="47.24943843986151" y="77.64370557140234"/> + <y:Fill color="#FFFF99" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="53.7578125" x="17.12109375" xml:space="preserve" y="-1.1328125">Delete +reserved<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n5"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="47.24943843986151" y="156.96546634935225"/> + <y:Fill color="#FFFF99" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="62.119140625" x="12.9404296875" xml:space="preserve" y="-1.1328125">Delete +commited<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n6"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="701.2401942572766" y="37.46766193890454"/> + <y:Fill color="#FFCC00" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="87.16796875" x="0.416015625" xml:space="preserve" y="5.93359375">Log commited<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n7"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="657.197834429016" y="201.00808526073567"/> + <y:Fill color="#CCFFCC" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="68.470703125" x="9.7646484375" xml:space="preserve" y="5.93359375">Quarantine<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n8"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="387.47596608655624" y="252.17678572882204"/> + <y:Fill color="#00FF00" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="64.275390625" x="11.8623046875" xml:space="preserve" y="5.93359375">Commited<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n9"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="387.47596608655624" y="174.5663108821866"/> + <y:Fill color="#CCFFCC" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="56.435546875" x="15.7822265625" xml:space="preserve" y="5.93359375">Reserved<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n10"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="657.197834429016" y="122.63481365314385"/> + <y:Fill color="#FFFF99" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="68.470703125" x="9.7646484375" xml:space="preserve" y="5.93359375">Quarantine +<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n11"> + <data key="d5"/> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="114.97089218898864" width="239.87834856212052" x="416.66386647552287" y="-242.45491334722402"/> + <y:Fill hasColor="false" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="left" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="74.6640625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="187.6328125" x="26.122768031060275" xml:space="preserve" y="20.153414844494307">r — read +w — write +c — commit ++ — new request in flight +done — request written on disk<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="rectangle"/> + </y:ShapeNode> + </data> + </node> + <node id="n12"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="58.31484757457025" y="-242.45491334722402"/> + <y:Fill color="#FFCC00" transparent="false"/> + <y:BorderStyle color="#000000" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="sides" modelPosition="e" textColor="#000000" verticalTextPosition="bottom" visible="true" width="93.244140625" x="92.0" xml:space="preserve" y="5.93359375">Persistent state</y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n13"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="58.31484757457025" y="-193.90800385474196"/> + <y:Fill color="#00FF00" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="sides" modelPosition="e" textColor="#000000" verticalTextPosition="bottom" visible="true" width="224.76953125" x="92.0" xml:space="preserve" y="5.93359375">Persistent state with requests in flight +</y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n14"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="58.31484757457025" y="-145.3610943622599"/> + <y:Fill color="#FFFF99" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="sides" modelPosition="e" textColor="#000000" verticalTextPosition="bottom" visible="true" width="91.609375" x="92.0" xml:space="preserve" y="5.93359375">Temporal state</y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n15"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="58.314847574570265" y="-96.81418486977785"/> + <y:Fill color="#CCFFCC" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="sides" modelPosition="e" textColor="#000000" verticalTextPosition="bottom" visible="true" width="223.134765625" x="91.99999999999999" xml:space="preserve" y="5.93359375">Temporal state with requests in flight +</y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n16"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="657.1978344290161" y="329.1761235083221"/> + <y:Fill color="#CCFFCC" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="68.470703125" x="9.7646484375" xml:space="preserve" y="-1.1328125">Quarantine +commited<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <node id="n17"> + <data key="d6"> + <y:ShapeNode> + <y:Geometry height="30.0" width="88.0" x="547.85308396719" y="239.67475861812653"/> + <y:Fill color="#CCFFCC" transparent="false"/> + <y:BorderStyle color="#000000" raised="false" type="line" width="1.0"/> + <y:NodeLabel alignment="center" autoSizePolicy="content" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" textColor="#000000" verticalTextPosition="bottom" visible="true" width="68.470703125" x="9.7646484375" xml:space="preserve" y="-1.1328125">Quarantine +on kill<y:LabelModel><y:SmartNodeLabelModel distance="4.0"/></y:LabelModel><y:ModelParameter><y:SmartNodeLabelModelParameter labelRatioX="0.0" labelRatioY="0.0" nodeRatioX="0.0" nodeRatioY="0.0" offsetX="0.0" offsetY="0.0" upX="0.0" upY="-1.0"/></y:ModelParameter></y:NodeLabel> + <y:Shape type="ellipse"/> + </y:ShapeNode> + </data> + </node> + <edge id="e0" source="n0" target="n1"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="-1.9292672025543425" sy="13.746218964476157" tx="25.25652443572227" ty="-12.28936249214587"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="-31.05969337432157" anchorY="43.27401718327805" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="0.7933330110261343" upY="0.6087879217069007" verticalTextPosition="bottom" visible="true" width="46.205078125" x="-59.188786858345296" xml:space="preserve" y="43.27401718327805">reserve<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.770418224707337" distanceToCenter="true" position="left" ratio="0.6881208058219688" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e1" source="n9" target="n2"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="-30.481898196182556" sy="10.815580672127737" tx="0.0" ty="-15.0"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="-80.66373828846764" anchorY="4.209450516205635" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="0.2878441161956364" upY="0.9576772758980726" verticalTextPosition="bottom" visible="true" width="80.734375" x="-157.9812146098011" xml:space="preserve" y="4.209450516205635">commit done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.120886543350457" distanceToCenter="true" position="right" ratio="0.8968891559191847" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e2" source="n0" target="n3"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="34.75814998265491" sy="-9.198313983646038" tx="-34.767571381909875" ty="-9.198313983646038"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="71.001953125" x="209.5893985046822" xml:space="preserve" y="-69.52196539883">log allocate<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.899576593121719" distanceToCenter="true" position="left" ratio="0.8518598473263168" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e3" source="n3" target="n6"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="5.5" tx="0.0" ty="-15.0"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="70.0" x="2.6588137329752044" xml:space="preserve" y="7.735972317920741">сommit log<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="2.6587928155423675" distanceToCenter="false" position="left" ratio="0.20542799692019012" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e4" source="n5" target="n0"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="30.393865214780817" sy="-10.843275255829838" tx="-18.959151160012425" ty="6.799672617704289"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="65.44716863590605" anchorY="-32.89023053710895" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.46261727573465816" upY="-0.8865580952153351" verticalTextPosition="bottom" visible="true" width="72.419921875" x="57.058616315748694" xml:space="preserve" y="-82.46872921473505">delete done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="7.948517208199235" distanceToCenter="true" position="left" ratio="0.5" segment="0"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e5" source="n1" target="n4"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="-35.68139698198826" sy="-8.745020762370075" tx="0.0" ty="0.0"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="7.422413248786569" anchorY="-18.79208740967067" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.6589509006517482" upY="0.7521859547547068" verticalTextPosition="bottom" visible="true" width="39.56640625" x="-34.28751494080656" xml:space="preserve" y="-44.86440644366113">delete<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="9.959734230476897" distanceToCenter="true" position="right" ratio="-9.227372589907704" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e6" source="n4" target="n0"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="44.0" sy="0.0" tx="-38.297753912215285" ty="7.3794888613264416"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="41.320990156025914" anchorY="-6.828971852424033" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.21556008869735932" upY="-0.9764905776098337" verticalTextPosition="bottom" visible="true" width="72.419921875" x="37.41227948519333" xml:space="preserve" y="-40.14633718707068">delete done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="6.8276756015565185" distanceToCenter="true" position="left" ratio="0.3955015490500028" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e7" source="n6" target="n0"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="36.37407210989903" ty="5.2343657839309685"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="-33.1913128071767" anchorY="1.8444028557555825" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-1.2246467991473532E-16" upY="1.0" verticalTextPosition="bottom" visible="true" width="44.201171875" x="-77.3924846821767" xml:space="preserve" y="1.8444028557555772">сut log<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.91080997813034" distanceToCenter="true" position="left" ratio="0.10960384942234233" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e8" source="n10" target="n0"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="27.862218929353844" ty="11.192663010312629"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="-17.788315177226764" anchorY="-16.22954736584451" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" hasText="false" height="4.0" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.2350759151535802" upY="0.9719770131616832" verticalTextPosition="bottom" visible="true" width="4.0" x="-22.61652689048782" y="-17.169851026458833"> + <y:LabelModel> + <y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/> + </y:LabelModel> + <y:ModelParameter> + <y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="9.59314957410855" distanceToCenter="true" position="right" ratio="0.01851055095852848" segment="-1"/> + </y:ModelParameter> + <y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/> + </y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e9" source="n9" target="n1"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="-34.172146410915275" sy="-9.441323139003387" tx="31.72534507187663" ty="-10.391130123148073"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="standard" target="none"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="52.919921875" x="-93.65792667958408" xml:space="preserve" y="-21.5576817917227">+r+w+c<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="12.491278543694992" distanceToCenter="true" position="right" ratio="0.5" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e10" source="n9" target="n8"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="47.880859375" x="-53.94041574938126" xml:space="preserve" y="4.999996184920974">commit +done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="0.0" distance="30.0" distanceToCenter="true" position="right" ratio="0.0" segment="0"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e11" source="n8" target="n2"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="-34.82383517005076" sy="9.157115617061322" tx="34.85845016171942" ty="9.157115617061322"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="81.654296875" x="-106.33143476469621" xml:space="preserve" y="-17.041515509655596">last rwc done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="7.975117783273342" distanceToCenter="true" position="right" ratio="0.4549547877251474" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e12" source="n9" target="n7"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="39.69019001427455" anchorY="2.6034749748054367" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="0.09756577368082023" upY="-0.9952290790597222" verticalTextPosition="bottom" visible="true" width="60.876953125" x="39.69019001427455" xml:space="preserve" y="-15.442827310332184">kill owner<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.347751743621105" distanceToCenter="true" position="left" ratio="0.3015238597711589" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e13" source="n8" target="n17"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="34.333916567865344" sy="-5.855893959205844" tx="-33.66545563118473" ty="-9.663789067285506"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="11.332674393531136" anchorY="-3.7196742493306942" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.17386769945429842" upY="-0.984769020169943" verticalTextPosition="bottom" visible="true" width="60.876953125" x="8.179963999519991" xml:space="preserve" y="-32.1608420375119">kill owner<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.759027546315863" distanceToCenter="true" position="left" ratio="0.5" segment="0"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e14" source="n2" target="n5"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="-28.230843196605434" sy="-11.505833931186658" tx="33.83333434299962" ty="9.60320621427303"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="1.8064046893578052" anchorY="-27.562947890016602" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.701154105430335" upY="0.713009761811286" verticalTextPosition="bottom" visible="true" width="39.56640625" x="-39.11872513405677" xml:space="preserve" y="-55.30509606932857">delete<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="11.85282047344601" distanceToCenter="true" position="right" ratio="0.22859433300968157" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e15" source="n7" target="n10"> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="32.265625" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="81.654296875" x="4.494206775091357" xml:space="preserve" y="-43.103712156877265">last rwc done +kill done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="45.32133572498151" distanceToCenter="true" position="right" ratio="0.955864940213314" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e16" source="n0" target="n0"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"> + <y:Point x="345.8278580555715" y="14.042778699409723"/> + </y:Path> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-38.28843346365477" xml:space="preserve" y="-39.41837475902949">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="0.851185318054408" distanceToCenter="true" position="left" ratio="11.065409134708744" segment="-2"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e17" source="n3" target="n0"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="-44.0" sy="0.0" tx="44.0" ty="0.0"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-119.92863738246035" xml:space="preserve" y="22.565682416565053">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.328015368693165" distanceToCenter="true" position="left" ratio="0.2805018761699686" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e18" source="n6" target="n6"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="19.017944130896808" sy="13.512544433732373" tx="44.0" ty="0.0"> + <y:Point x="789.2401942572766" y="74.28285443190316"/> + </y:Path> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="42.2265625" x="5.441708500135633" xml:space="preserve" y="15.798604455128782">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="0.0" distanceToCenter="false" position="right" ratio="6.720318614416623" segment="-2"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e19" source="n1" target="n0"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="11.100571159856088" sy="-14.501222602423539" tx="-13.929103404286025" ty="14.241300677086956"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="18.553360681792412" anchorY="-25.696531015585236" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.7792165417141262" upY="-0.626754801432807" verticalTextPosition="bottom" visible="true" width="42.2265625" x="4.423973233991733" xml:space="preserve" y="-69.96499431316647">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.714745422657094" distanceToCenter="true" position="left" ratio="0.342725244313297" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e20" source="n5" target="n2"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="14.454784649358558" sy="14.171691206036428" tx="-44.0" ty="0.0"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="16.803286936629164" anchorY="44.754330765062605" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="0.7162165042990318" upY="-0.6978781548162079" verticalTextPosition="bottom" visible="true" width="42.2265625" x="16.803286936629164" xml:space="preserve" y="32.099837035934335">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.13187428633631" distanceToCenter="true" position="right" ratio="0.6351411488884056" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e21" source="n4" target="n0"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="27.72843780550508" sy="-11.640146089764052" tx="-37.203018275484965" ty="-7.999525101672361"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="72.71794189796697" anchorY="-17.741924379478654" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.2150948732018046" upY="-0.9765931576261937" verticalTextPosition="bottom" visible="true" width="42.2265625" x="68.81766689248737" xml:space="preserve" y="-44.53302210218295">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.75179091058278" distanceToCenter="true" position="left" ratio="0.42706763912791285" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e22" source="n2" target="n2"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"> + <y:Point x="203.0886591696214" y="295.5033242120122"/> + </y:Path> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-9.590665738951628" xml:space="preserve" y="22.011191593549654">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="13.074784808336569" distanceToCenter="true" position="left" ratio="6.042022354967836" segment="-2"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e23" source="n8" target="n2"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="26.614829141286123" ty="11.950771246842464"> + <y:Point x="379.1254391971893" y="295.0875296808884"/> + <y:Point x="285.49418907424945" y="295.0875296808884"/> + </y:Path> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-96.78989693579211" xml:space="preserve" y="14.925386589537254">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="8.718441625445736" distanceToCenter="true" position="left" ratio="0.502281970046856" segment="1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e24" source="n9" target="n0"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="5.731400659936355" ty="11.192663010312629"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="-6.644791176402634" anchorY="-45.7548071943084" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.8539319547737003" upY="0.520384681381348" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-44.1030354724647" xml:space="preserve" y="-81.81341825330722">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="9.069496224652644" distanceToCenter="true" position="right" ratio="0.5454974242562394" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e25" source="n9" target="n1"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="-35.34252622324016" sy="8.942187263495612" tx="37.25981182851308" ty="7.992380279350925"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="75.5078125" x="-104.6739728663735" xml:space="preserve" y="-17.786774298462603">last rw done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="8.720367048636998" distanceToCenter="true" position="right" ratio="0.5293192616653534" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e26" source="n2" target="n8"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="31.953203361267015" sy="-10.307390918648395" tx="-31.973351254374222" ty="-10.307390918648395"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="52.919921875" x="45.03540296804721" xml:space="preserve" y="-19.072106154013795">+r+w+c<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="10.005709948562414" distanceToCenter="true" position="left" ratio="0.5400661519081636" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e27" source="n7" target="n0"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="17.64799511269962" ty="12.043848328367119"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="-121.55194649261693" anchorY="-76.73061303132732" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.4209598329465266" upY="0.9070792793607582" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-167.48797209584964" xml:space="preserve" y="-94.50629972723338">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="9.365856975346183" distanceToCenter="true" position="right" ratio="0.5000366300695136" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e28" source="n16" target="n2"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="18.064097820689597" ty="13.664632674132577"> + <y:Point x="275.457333963666" y="344.1761235083221"/> + </y:Path> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-192.14370846750944" xml:space="preserve" y="0.9279829026191919">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="9.994382587656453" distanceToCenter="true" position="left" ratio="0.40749739509495936" segment="0"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e29" source="n16" target="n7"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="72.419921875" x="5.706653002872827" xml:space="preserve" y="-93.1680316826237">delete done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="41.91659445276315" distanceToCenter="true" position="right" ratio="1.0" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e30" source="n9" target="n9"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"> + <y:Point x="431.47596608655624" y="156.0038208221403"/> + </y:Path> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="52.919921875" x="-37.89623429599874" xml:space="preserve" y="-36.15734837155037">±r±w+c<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="5.054648982679395" distanceToCenter="true" position="right" ratio="7.42970862240162" segment="-2"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e31" source="n8" target="n8"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="44.0" sy="0.0" tx="-32.204640726824664" ty="10.222598694162002"> + <y:Point x="445.1556747089578" y="246.3211708445953"/> + </y:Path> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" verticalTextPosition="bottom" visible="true" width="52.919921875" x="-41.32400424821128" xml:space="preserve" y="-48.89784390234962">±r±w±c<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="false" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="1.9573788154470968" distanceToCenter="false" position="right" ratio="0.8086000895714727" segment="0"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e32" source="n8" target="n16"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="31.49835082164276" sy="8.011063035855614" tx="-38.920119619969455" ty="-6.972592983787763"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="43.22912268674946" anchorY="30.978643960824343" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="0.2971110205213774" upY="-0.9548429407419555" verticalTextPosition="bottom" visible="true" width="39.56640625" x="43.22912268674946" xml:space="preserve" y="13.664655949401851">delete<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="7.669472523891792" distanceToCenter="true" position="right" ratio="0.22719141601432052" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e33" source="n17" target="n8"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="-43.33748350355211" sy="5.684438758359249" tx="36.257305354126174" ty="6.6461331514895505"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="-17.515595641966684" anchorY="-0.5434478957805595" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="0.1643989873053555" upY="0.9863939238321441" verticalTextPosition="bottom" visible="true" width="42.2265625" x="-59.16762031628495" xml:space="preserve" y="-0.5434478957805595">restart<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="5.6508049884797265" distanceToCenter="true" position="left" ratio="0.5" segment="0"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + <edge id="e34" source="n17" target="n7"> + <data key="d9"/> + <data key="d10"> + <y:PolyLineEdge> + <y:Path sx="0.0" sy="0.0" tx="0.0" ty="0.0"/> + <y:LineStyle color="#000000" type="line" width="1.0"/> + <y:Arrows source="none" target="standard"/> + <y:EdgeLabel alignment="center" anchorX="18.66323554616406" anchorY="17.866926869038252" configuration="AutoFlippingLabel" distance="2.0" fontFamily="Dialog" fontSize="12" fontStyle="plain" hasBackgroundColor="false" hasLineColor="false" height="18.1328125" horizontalTextPosition="center" iconTextGap="4" modelName="custom" preferredPlacement="anywhere" ratio="0.5" textColor="#000000" upX="-0.33339055393309996" upY="-0.9427888090915063" verticalTextPosition="bottom" visible="true" width="54.2734375" x="12.617927142424021" xml:space="preserve" y="-17.32273722529481">kill done<y:LabelModel><y:SmartEdgeLabelModel autoRotationEnabled="true" defaultAngle="0.0" defaultDistance="10.0"/></y:LabelModel><y:ModelParameter><y:SmartEdgeLabelModelParameter angle="6.283185307179586" distance="14.000475017470663" distanceToCenter="true" position="right" ratio="12.17672099452552" segment="-1"/></y:ModelParameter><y:PreferredPlacementDescriptor angle="0.0" angleOffsetOnRightSide="0" angleReference="absolute" angleRotationOnRightSide="co" distance="-1.0" frozen="true" placement="anywhere" side="anywhere" sideReference="relative_to_edge_flow"/></y:EdgeLabel> + <y:BendStyle smoothed="false"/> + </y:PolyLineEdge> + </data> + </edge> + </graph> + <data key="d7"> + <y:Resources/> + </data> +</graphml> diff --git a/ydb/core/blobstorage/dsproxy/dsproxy.h b/ydb/core/blobstorage/dsproxy/dsproxy.h index 3b3f69d113..27592e971f 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy.h @@ -23,8 +23,8 @@ namespace NKikimr { -LWTRACE_USING(BLOBSTORAGE_PROVIDER); - +LWTRACE_USING(BLOBSTORAGE_PROVIDER); + constexpr ui32 TypicalPartsInBlob = 6; constexpr ui32 TypicalDisksInSubring = 8; @@ -100,32 +100,32 @@ struct TNodeLayoutInfo : TThrRefBase { using TNodeLayoutInfoPtr = TIntrusivePtr<TNodeLayoutInfo>; -inline TStoragePoolCounters::EHandleClass HandleClassToHandleClass(NKikimrBlobStorage::EGetHandleClass handleClass) { - switch (handleClass) { - case NKikimrBlobStorage::FastRead: - return TStoragePoolCounters::EHandleClass::HcGetFast; - case NKikimrBlobStorage::AsyncRead: - return TStoragePoolCounters::EHandleClass::HcGetAsync; - case NKikimrBlobStorage::Discover: - return TStoragePoolCounters::EHandleClass::HcGetDiscover; +inline TStoragePoolCounters::EHandleClass HandleClassToHandleClass(NKikimrBlobStorage::EGetHandleClass handleClass) { + switch (handleClass) { + case NKikimrBlobStorage::FastRead: + return TStoragePoolCounters::EHandleClass::HcGetFast; + case NKikimrBlobStorage::AsyncRead: + return TStoragePoolCounters::EHandleClass::HcGetAsync; + case NKikimrBlobStorage::Discover: + return TStoragePoolCounters::EHandleClass::HcGetDiscover; case NKikimrBlobStorage::LowRead: return TStoragePoolCounters::EHandleClass::HcGetLow; - } - return TStoragePoolCounters::EHandleClass::HcCount; -} - -inline TStoragePoolCounters::EHandleClass HandleClassToHandleClass(NKikimrBlobStorage::EPutHandleClass handleClass) { - switch (handleClass) { - case NKikimrBlobStorage::TabletLog: - return TStoragePoolCounters::EHandleClass::HcPutTabletLog; - case NKikimrBlobStorage::UserData: - return TStoragePoolCounters::EHandleClass::HcPutUserData; - case NKikimrBlobStorage::AsyncBlob: - return TStoragePoolCounters::EHandleClass::HcPutAsync; - } - return TStoragePoolCounters::EHandleClass::HcCount; -} - + } + return TStoragePoolCounters::EHandleClass::HcCount; +} + +inline TStoragePoolCounters::EHandleClass HandleClassToHandleClass(NKikimrBlobStorage::EPutHandleClass handleClass) { + switch (handleClass) { + case NKikimrBlobStorage::TabletLog: + return TStoragePoolCounters::EHandleClass::HcPutTabletLog; + case NKikimrBlobStorage::UserData: + return TStoragePoolCounters::EHandleClass::HcPutUserData; + case NKikimrBlobStorage::AsyncBlob: + return TStoragePoolCounters::EHandleClass::HcPutAsync; + } + return TStoragePoolCounters::EHandleClass::HcCount; +} + NActors::NLog::EPriority PriorityForStatusOutbound(NKikimrProto::EReplyStatus status); NActors::NLog::EPriority PriorityForStatusResult(NKikimrProto::EReplyStatus status); NActors::NLog::EPriority PriorityForStatusInbound(NKikimrProto::EReplyStatus status); @@ -380,9 +380,9 @@ public: const ui64 cyclesPerUs = NHPTimer::GetCyclesPerSecond() / 1000000; request->Record.MutableTimestamps()->SetSentByDSProxyUs(GetCycleCountFast() / cyclesPerUs); TLogoBlobID id = GetBlobId(request); - TVDiskID vDiskId = VDiskIDFromVDiskID(request->Record.GetVDiskID()); - LWTRACK(DSProxyPutVPutIsSent, request->Orbit, Info->GetFailDomainOrderNumber(vDiskId), - Info->GroupID, id.Channel(), id.PartId(), id.ToString(), id.BlobSize()); + TVDiskID vDiskId = VDiskIDFromVDiskID(request->Record.GetVDiskID()); + LWTRACK(DSProxyPutVPutIsSent, request->Orbit, Info->GetFailDomainOrderNumber(vDiskId), + Info->GroupID, id.Channel(), id.PartId(), id.ToString(), id.BlobSize()); SendToQueue(std::move(request), messageCookie, TraceId.SeparateBranch(), timeStatsEnabled); } } @@ -532,7 +532,7 @@ IActor* CreateBlobStorageGroupPutRequest(const TIntrusivePtr<TBlobStorageGroupIn const TIntrusivePtr<TGroupQueues> &state, const TActorId &source, const TIntrusivePtr<TBlobStorageGroupProxyMon> &mon, TEvBlobStorage::TEvPut *ev, ui64 cookie, NWilson::TTraceId traceId, bool timeStatsEnabled, - TDiskResponsivenessTracker::TPerDiskStatsPtr stats, + TDiskResponsivenessTracker::TPerDiskStatsPtr stats, TMaybe<TGroupStat::EKind> latencyQueueKind, TInstant now, TIntrusivePtr<TStoragePoolCounters> &storagePoolCounters, bool enableRequestMod3x3ForMinLatecy); diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_discover.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_discover.cpp index aa03476005..2605a19a2e 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_discover.cpp +++ b/ydb/core/blobstorage/dsproxy/dsproxy_discover.cpp @@ -7,8 +7,8 @@ namespace NKikimr { -LWTRACE_USING(BLOBSTORAGE_PROVIDER); - +LWTRACE_USING(BLOBSTORAGE_PROVIDER); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // DISCOVER request //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -301,7 +301,7 @@ class TBlobStorageGroupDiscoverRequest : public TBlobStorageGroupRequestActor<TB const bool success = result->Status == NKikimrProto::OK; WILSON_TRACE_FROM_ACTOR(*TlsActivationContext, *this, &TraceId, EvDiscoverResultSent); LWPROBE(DSProxyRequestDuration, TEvBlobStorage::EvDiscover, 0, duration.SecondsFloat() * 1000.0, - TabletId, Info->GroupID, TLogoBlobID::MaxChannel, "", success); + TabletId, Info->GroupID, TLogoBlobID::MaxChannel, "", success); SendResponseAndDie(std::move(result)); } @@ -541,7 +541,7 @@ class TBlobStorageGroupDiscoverRequest : public TBlobStorageGroupRequestActor<TB if (id >= stepToId || isAllRead) { // Y_VERIFY(id.PartId() == 0); const TLogoBlobID fullid = id.FullID(); - TVDiskID vDiskId(Info->CreateVDiskID(vId)); + TVDiskID vDiskId(Info->CreateVDiskID(vId)); TIngress ingress = curVDisk.Blobs[blobIdx].Ingress; TEntryInfo &entry = GroupResponseTracker.EntryInfo[fullid]; entry.RegisterBlob(ingress, Info.Get(), vDiskId, fullid); @@ -704,7 +704,7 @@ class TBlobStorageGroupDiscoverRequest : public TBlobStorageGroupRequestActor<TB for (TVDiskInfoContainer::iterator vDiskIt = VDiskInfo.begin(); vDiskIt != VDiskInfo.end(); ++vDiskIt) { TVDiskIdShort vId(vDiskIt->first); TVDiskInfo &curVDisk = vDiskIt->second; - TVDiskID vDiskId = Info->CreateVDiskID(vId); + TVDiskID vDiskId = Info->CreateVDiskID(vId); if (!curVDisk.IsError && !curVDisk.IsAllRead && !curVDisk.IsMoreRequested) { const TActorId &vdisk = Info->GetActorId(vDiskId); diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_discover_m3dc.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_discover_m3dc.cpp index 2360188f5e..a4e48dba76 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_discover_m3dc.cpp +++ b/ydb/core/blobstorage/dsproxy/dsproxy_discover_m3dc.cpp @@ -8,8 +8,8 @@ namespace NKikimr { -LWTRACE_USING(BLOBSTORAGE_PROVIDER); - +LWTRACE_USING(BLOBSTORAGE_PROVIDER); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // DISCOVER request //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -422,7 +422,7 @@ private: class TBlobStorageGroupMirror3dcDiscoverRequest : public TBlobStorageGroupRequestActor<TBlobStorageGroupMirror3dcDiscoverRequest>{ const ui64 TabletId; const ui32 MinGeneration; - const TInstant StartTime; + const TInstant StartTime; const TInstant Deadline; const bool ReadBody; const bool DiscoverBlockedGeneration; @@ -656,7 +656,7 @@ public: Y_VERIFY(!Responded); const TDuration duration = TActivationContext::Now() - StartTime; LWPROBE(DSProxyRequestDuration, TEvBlobStorage::EvDiscover, 0, duration.SecondsFloat() * 1000.0, - TabletId, Info->GroupID, TLogoBlobID::MaxChannel, "", true); + TabletId, Info->GroupID, TLogoBlobID::MaxChannel, "", true); SendResponseAndDie(std::move(response)); Responded = true; } @@ -669,7 +669,7 @@ public: Y_VERIFY(status != NKikimrProto::OK); const TDuration duration = TActivationContext::Now() - StartTime; LWPROBE(DSProxyRequestDuration, TEvBlobStorage::EvDiscover, 0, duration.SecondsFloat() * 1000.0, - TabletId, Info->GroupID, TLogoBlobID::MaxChannel, "", false); + TabletId, Info->GroupID, TLogoBlobID::MaxChannel, "", false); std::unique_ptr<TEvBlobStorage::TEvDiscoverResult> response(new TEvBlobStorage::TEvDiscoverResult( status, MinGeneration, 0U)); response->ErrorReason = ErrorReason; diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_get.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_get.cpp index 2256bc73d7..9b08b41861 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_get.cpp +++ b/ydb/core/blobstorage/dsproxy/dsproxy_get.cpp @@ -7,13 +7,13 @@ #include <library/cpp/containers/stack_vector/stack_vec.h> #include <library/cpp/digest/crc32c/crc32c.h> #include <util/generic/set.h> -#include <util/system/datetime.h> +#include <util/system/datetime.h> #include "dsproxy_get_impl.h" namespace NKikimr { -LWTRACE_USING(BLOBSTORAGE_PROVIDER); - +LWTRACE_USING(BLOBSTORAGE_PROVIDER); + struct TEvAccelerateGet : public TEventLocal<TEvAccelerateGet, TEvBlobStorage::EvAccelerateGet> { ui64 CauseIdx; TEvAccelerateGet(ui64 causeIdx) @@ -185,38 +185,38 @@ class TBlobStorageGroupGetRequest : public TBlobStorageGroupRequestActor<TBlobSt ProcessReplyFromQueue(ev); CountEvent(*ev->Get()); - const ui64 cyclesPerUs = NHPTimer::GetCyclesPerSecond() / 1000000; + const ui64 cyclesPerUs = NHPTimer::GetCyclesPerSecond() / 1000000; ev->Get()->Record.MutableTimestamps()->SetReceivedByDSProxyUs(GetCycleCountFast() / cyclesPerUs); const NKikimrBlobStorage::TEvVGetResult &record = ev->Get()->Record; Y_VERIFY(record.HasStatus()); - ui64 totalSize = 0; - ui64 tabletId = 0; - ui32 channel = 0; - for (ui32 i = 0; i < record.ResultSize(); ++i) { - const NKikimrBlobStorage::TQueryResult &queryResult = record.GetResult(i); - if (record.GetStatus() == NKikimrProto::OK) { + ui64 totalSize = 0; + ui64 tabletId = 0; + ui32 channel = 0; + for (ui32 i = 0; i < record.ResultSize(); ++i) { + const NKikimrBlobStorage::TQueryResult &queryResult = record.GetResult(i); + if (record.GetStatus() == NKikimrProto::OK) { totalSize += queryResult.GetBuffer().size(); - } - const TLogoBlobID blob = LogoBlobIDFromLogoBlobID(queryResult.GetBlobID()); - tabletId = blob.TabletID(); - channel = blob.Channel(); - } + } + const TLogoBlobID blob = LogoBlobIDFromLogoBlobID(queryResult.GetBlobID()); + tabletId = blob.TabletID(); + channel = blob.Channel(); + } ++GeneratedSubrequests; GeneratedSubrequestBytes += totalSize; - - Y_VERIFY(record.HasVDiskID()); - const TVDiskID vdisk = VDiskIDFromVDiskID(record.GetVDiskID()); + + Y_VERIFY(record.HasVDiskID()); + const TVDiskID vdisk = VDiskIDFromVDiskID(record.GetVDiskID()); const TVDiskIdShort shortId(vdisk); - LWPROBE(DSProxyVDiskRequestDuration, TEvBlobStorage::EvVGet, totalSize, tabletId, vdisk.GroupID, channel, + LWPROBE(DSProxyVDiskRequestDuration, TEvBlobStorage::EvVGet, totalSize, tabletId, vdisk.GroupID, channel, Info->GetFailDomainOrderNumber(shortId), GetStartTime(record.GetTimestamps()), GetTotalTimeMs(record.GetTimestamps()), GetVDiskTimeMs(record.GetTimestamps()), GetTotalTimeMs(record.GetTimestamps()) - GetVDiskTimeMs(record.GetTimestamps()), - NKikimrBlobStorage::EGetHandleClass_Name(GetImpl.GetHandleClass()), - NKikimrProto::EReplyStatus_Name(record.GetStatus())); + NKikimrBlobStorage::EGetHandleClass_Name(GetImpl.GetHandleClass()), + NKikimrProto::EReplyStatus_Name(record.GetStatus())); Y_VERIFY(record.HasCookie()); TVGetCookie cookie(record.GetCookie()); if (RootCauseTrack.IsOn) { @@ -224,7 +224,7 @@ class TBlobStorageGroupGetRequest : public TBlobStorageGroupRequestActor<TBlobSt GetTotalTimeMs(record.GetTimestamps()) - GetVDiskTimeMs(record.GetTimestamps()), GetVDiskTimeMs(record.GetTimestamps())); } - + ui32 orderNumber = Info->GetOrderNumber(shortId); if (DiskCounters.size() <= orderNumber) { DiskCounters.resize(orderNumber + 1); @@ -353,12 +353,12 @@ class TBlobStorageGroupGetRequest : public TBlobStorageGroupRequestActor<TBlobSt WILSON_TRACE_FROM_ACTOR(*TlsActivationContext, *this, &TraceId, EvVPutResultReceived, MergedNode = std::move(ev->TraceId)); - const ui64 cyclesPerUs = NHPTimer::GetCyclesPerSecond() / 1000000; + const ui64 cyclesPerUs = NHPTimer::GetCyclesPerSecond() / 1000000; ev->Get()->Record.MutableTimestamps()->SetReceivedByDSProxyUs(GetCycleCountFast() / cyclesPerUs); const auto &record = ev->Get()->Record; - const TVDiskID vDiskId = VDiskIDFromVDiskID(record.GetVDiskID()); + const TVDiskID vDiskId = VDiskIDFromVDiskID(record.GetVDiskID()); TVDiskIdShort shortId(vDiskId); - const NKikimrProto::EReplyStatus status = record.GetStatus(); + const NKikimrProto::EReplyStatus status = record.GetStatus(); NActors::NLog::EPriority priority = PriorityForStatusInbound(status); A_LOG_LOG_S(priority != NActors::NLog::PRI_DEBUG, priority, "BPG30", "Handle VPuEventResult" << " status# " << NKikimrProto::EReplyStatus_Name(status).data() @@ -372,9 +372,9 @@ class TBlobStorageGroupGetRequest : public TBlobStorageGroupRequestActor<TBlobSt GetTotalTimeMs(record.GetTimestamps()), GetVDiskTimeMs(record.GetTimestamps()), GetTotalTimeMs(record.GetTimestamps()) - GetVDiskTimeMs(record.GetTimestamps()), - NKikimrBlobStorage::EPutHandleClass_Name(GetImpl.GetPutHandleClass()), - NKikimrProto::EReplyStatus_Name(status)); - + NKikimrBlobStorage::EPutHandleClass_Name(GetImpl.GetPutHandleClass()), + NKikimrProto::EReplyStatus_Name(status)); + Y_VERIFY(record.HasCookie()); TBlobCookie cookie(record.GetCookie()); if (RootCauseTrack.IsOn) { @@ -454,26 +454,26 @@ class TBlobStorageGroupGetRequest : public TBlobStorageGroupRequestActor<TBlobSt const NKikimrProto::EReplyStatus status = evResult->Status; const TInstant now = TActivationContext::Now(); const TDuration duration = (now > StartTime) ? (now - StartTime) : TDuration::MilliSeconds(0); - Mon->CountGetResponseTime(Info->GetDeviceType(), GetImpl.GetHandleClass(), evResult->PayloadSizeBytes(), duration); + Mon->CountGetResponseTime(Info->GetDeviceType(), GetImpl.GetHandleClass(), evResult->PayloadSizeBytes(), duration); *Mon->ActiveGetCapacity -= ReportedBytes; ReportedBytes = 0; bool success = evResult->Status == NKikimrProto::OK; WILSON_TRACE_FROM_ACTOR(*TlsActivationContext, *this, &TraceId, EvGetResultSent, ReplyStatus = status, ResponseSize = GetImpl.GetReplyBytes()); - ui64 requestSize = 0; - ui64 tabletId = 0; - ui32 channel = 0; - for (ui32 i = 0; i < evResult->ResponseSz; ++i) { - tabletId = evResult->Responses[i].Id.TabletID(); - channel = evResult->Responses[i].Id.Channel(); - requestSize += evResult->Responses[i].RequestedSize; - } + ui64 requestSize = 0; + ui64 tabletId = 0; + ui32 channel = 0; + for (ui32 i = 0; i < evResult->ResponseSz; ++i) { + tabletId = evResult->Responses[i].Id.TabletID(); + channel = evResult->Responses[i].Id.Channel(); + requestSize += evResult->Responses[i].RequestedSize; + } RootCauseTrack.RenderTrack(Orbit); LWTRACK(DSProxyGetReply, Orbit); evResult->Orbit = std::move(Orbit); LWPROBE(DSProxyRequestDuration, TEvBlobStorage::EvGet, requestSize, duration.SecondsFloat() * 1000.0, tabletId, - evResult->GroupId, channel, NKikimrBlobStorage::EGetHandleClass_Name(GetImpl.GetHandleClass()), - success); + evResult->GroupId, channel, NKikimrBlobStorage::EGetHandleClass_Name(GetImpl.GetHandleClass()), + success); return SendResponseAndDie(std::unique_ptr<TEvBlobStorage::TEvGetResult>(evResult.Release())); } @@ -501,7 +501,7 @@ public: NWilson::TTraceId traceId, TNodeLayoutInfoPtr&& nodeLayout, TMaybe<TGroupStat::EKind> latencyQueueKind, TInstant now, TIntrusivePtr<TStoragePoolCounters> &storagePoolCounters, bool isVMultiPutMode) : TBlobStorageGroupRequestActor(info, state, mon, source, cookie, std::move(traceId), - NKikimrServices::BS_PROXY_GET, ev->IsVerboseNoDataEnabled || ev->CollectDebugInfo, + NKikimrServices::BS_PROXY_GET, ev->IsVerboseNoDataEnabled || ev->CollectDebugInfo, latencyQueueKind, now, storagePoolCounters, ev->RestartCounter) , GetImpl(info, state, ev, std::move(nodeLayout), LogCtx.RequestPrefix) , Orbit(std::move(ev->Orbit)) @@ -517,7 +517,7 @@ public: MaxSaneRequests = ev->QuerySize * info->Type.TotalPartCount() * (1 + info->Type.Handoff()) * 3; RequestBytes = GetImpl.CountRequestBytes(); - RequestHandleClass = HandleClassToHandleClass(ev->GetHandleClass); + RequestHandleClass = HandleClassToHandleClass(ev->GetHandleClass); if (Orbit.HasShuttles()) { RootCauseTrack.IsOn = true; } diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_get_impl.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_get_impl.cpp index 1a6fd868b2..0583b454df 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_get_impl.cpp +++ b/ydb/core/blobstorage/dsproxy/dsproxy_get_impl.cpp @@ -164,7 +164,7 @@ ui64 TGetImpl::GetTimeToAccelerateNs(TLogContext &logCtx, NKikimrBlobStorage::EV *Info, *Blackboard.GroupQueues, queueId, &worstPredictedNs, &nextToWorstPredictedNs, &worstOrderNumber); } - return nextToWorstPredictedNs * 1; + return nextToWorstPredictedNs * 1; } ui64 TGetImpl::GetTimeToAccelerateGetNs(TLogContext &logCtx) { diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_get_impl.h b/ydb/core/blobstorage/dsproxy/dsproxy_get_impl.h index 3568bfea70..4908ae9bda 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_get_impl.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy_get_impl.h @@ -97,14 +97,14 @@ public: return ReplyBytes; } - NKikimrBlobStorage::EGetHandleClass GetHandleClass() const { + NKikimrBlobStorage::EGetHandleClass GetHandleClass() const { return Blackboard.GetHandleClass; - } - - NKikimrBlobStorage::EPutHandleClass GetPutHandleClass() const { + } + + NKikimrBlobStorage::EPutHandleClass GetPutHandleClass() const { return Blackboard.PutHandleClass; - } - + } + void ReportBytes(i64 bytes) { BytesToReport += bytes; } diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_mon.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_mon.cpp index 3d407b73cb..73afac8121 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_mon.cpp +++ b/ydb/core/blobstorage/dsproxy/dsproxy_mon.cpp @@ -204,6 +204,6 @@ void TBlobStorageGroupProxyMon::ThroughputUpdate() { } } - + } // NKikimr diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_mon.h b/ydb/core/blobstorage/dsproxy/dsproxy_mon.h index cfa3b17291..941bea6664 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_mon.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy_mon.h @@ -299,8 +299,8 @@ public: PutThroughput->Count(size); } - void CountPutPesponseTime(TPDiskCategory::EDeviceType type, NKikimrBlobStorage::EPutHandleClass cls, ui32 size, - TDuration duration) { + void CountPutPesponseTime(TPDiskCategory::EDeviceType type, NKikimrBlobStorage::EPutHandleClass cls, ui32 size, + TDuration duration) { const ui32 durationMs = duration.MilliSeconds(); PutResponseTime.Increment(durationMs); switch (cls) { @@ -321,14 +321,14 @@ public: default: Y_FAIL("Unexpected case, HandleClass# %" PRIu64, (ui64)cls); } - NodeMon->CountPutPesponseTime(type, cls, size, duration); + NodeMon->CountPutPesponseTime(type, cls, size, duration); } - void CountGetResponseTime(TPDiskCategory::EDeviceType type, NKikimrBlobStorage::EGetHandleClass cls, ui32 size, - TDuration duration) { + void CountGetResponseTime(TPDiskCategory::EDeviceType type, NKikimrBlobStorage::EGetHandleClass cls, ui32 size, + TDuration duration) { *EventGetResBytes += size; GetResponseTime.Increment(duration.MilliSeconds()); - NodeMon->CountGetResponseTime(type, cls, size, duration); + NodeMon->CountGetResponseTime(type, cls, size, duration); } void CountBlockResponseTime(TDuration duration) { diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_nodemon.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_nodemon.cpp index d9d42b0217..addca8c533 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_nodemon.cpp +++ b/ydb/core/blobstorage/dsproxy/dsproxy_nodemon.cpp @@ -5,9 +5,9 @@ #include <ydb/core/blobstorage/base/common_latency_hist_bounds.h> namespace NKikimr { -TDsProxyNodeMon::TDsProxyNodeMon(TIntrusivePtr<NMonitoring::TDynamicCounters> &counters, bool initForAllDeviceTypes) - : Group(GetServiceCounters(counters, "dsproxynode")) -{ +TDsProxyNodeMon::TDsProxyNodeMon(TIntrusivePtr<NMonitoring::TDynamicCounters> &counters, bool initForAllDeviceTypes) + : Group(GetServiceCounters(counters, "dsproxynode")) +{ TVector<float> percentiles4; percentiles4.push_back(0.50f); percentiles4.push_back(0.90f); @@ -17,34 +17,34 @@ TDsProxyNodeMon::TDsProxyNodeMon(TIntrusivePtr<NMonitoring::TDynamicCounters> &c TVector<float> percentiles1; percentiles1.push_back(1.0f); - PutResponseTime.Initialize(Group, "event", "put", "latency", percentiles4); - PutTabletLogResponseTime.Initialize(Group, "event", "putTabletLogAll", "latency", percentiles1); - PutTabletLogResponseTime256.Initialize(Group, "event", "putTabletLog256", "latency", percentiles1); - PutTabletLogResponseTime512.Initialize(Group, "event", "putTabletLog512", "latency", percentiles1); - PutAsyncBlobResponseTime.Initialize(Group, "event", "putAsyncBlob", "latency", percentiles1); - PutUserDataResponseTime.Initialize(Group, "event", "putUserData", "latency", percentiles1); - - GetResponseTime.Initialize(Group, "event", "get", "latency", percentiles1); - GetAsyncReadResponseTime.Initialize(Group, "event", "getAsyncRead", "latency", percentiles1); - GetFastReadResponseTime256Ki.Initialize(Group, "event", "getFastRead256Ki", "latency", percentiles1); - GetFastReadResponseTimeInf.Initialize(Group, "event", "getFastReadInf", "latency", percentiles1); - GetDiscoverResponseTime.Initialize(Group, "event", "getDiscover", "latency", percentiles1); + PutResponseTime.Initialize(Group, "event", "put", "latency", percentiles4); + PutTabletLogResponseTime.Initialize(Group, "event", "putTabletLogAll", "latency", percentiles1); + PutTabletLogResponseTime256.Initialize(Group, "event", "putTabletLog256", "latency", percentiles1); + PutTabletLogResponseTime512.Initialize(Group, "event", "putTabletLog512", "latency", percentiles1); + PutAsyncBlobResponseTime.Initialize(Group, "event", "putAsyncBlob", "latency", percentiles1); + PutUserDataResponseTime.Initialize(Group, "event", "putUserData", "latency", percentiles1); + + GetResponseTime.Initialize(Group, "event", "get", "latency", percentiles1); + GetAsyncReadResponseTime.Initialize(Group, "event", "getAsyncRead", "latency", percentiles1); + GetFastReadResponseTime256Ki.Initialize(Group, "event", "getFastRead256Ki", "latency", percentiles1); + GetFastReadResponseTimeInf.Initialize(Group, "event", "getFastReadInf", "latency", percentiles1); + GetDiscoverResponseTime.Initialize(Group, "event", "getDiscover", "latency", percentiles1); GetLowReadResponseTime.Initialize(Group, "event", "getLowRead", "latency", percentiles1); - BlockResponseTime.Initialize(Group, "event", "block", "latency", percentiles1); - DiscoverResponseTime.Initialize(Group, "event", "discover", "latency", percentiles1); - IndexRestoreGetResponseTime.Initialize(Group, "event", "indexRestoreGet", "latency", + BlockResponseTime.Initialize(Group, "event", "block", "latency", percentiles1); + DiscoverResponseTime.Initialize(Group, "event", "discover", "latency", percentiles1); + IndexRestoreGetResponseTime.Initialize(Group, "event", "indexRestoreGet", "latency", percentiles1); - RangeResponseTime.Initialize(Group, "event", "range", "latency", percentiles1); + RangeResponseTime.Initialize(Group, "event", "range", "latency", percentiles1); PatchResponseTime.Initialize(Group, "event", "patch", "latency", percentiles4); - - IsCountersPresentedForIdx.fill(false); - if (initForAllDeviceTypes) { - CheckNodeMonCountersForDeviceType(TPDiskCategory::DEVICE_TYPE_ROT); - CheckNodeMonCountersForDeviceType(TPDiskCategory::DEVICE_TYPE_SSD); - CheckNodeMonCountersForDeviceType(TPDiskCategory::DEVICE_TYPE_NVME); - CheckNodeMonCountersForDeviceType(TPDiskCategory::DEVICE_TYPE_UNKNOWN); - } + + IsCountersPresentedForIdx.fill(false); + if (initForAllDeviceTypes) { + CheckNodeMonCountersForDeviceType(TPDiskCategory::DEVICE_TYPE_ROT); + CheckNodeMonCountersForDeviceType(TPDiskCategory::DEVICE_TYPE_SSD); + CheckNodeMonCountersForDeviceType(TPDiskCategory::DEVICE_TYPE_NVME); + CheckNodeMonCountersForDeviceType(TPDiskCategory::DEVICE_TYPE_UNKNOWN); + } // restart counters { @@ -84,92 +84,92 @@ TDsProxyNodeMon::TDsProxyNodeMon(TIntrusivePtr<NMonitoring::TDynamicCounters> &c } } -ui32 IdxForType(TPDiskCategory::EDeviceType type) { - switch (type) { - case TPDiskCategory::DEVICE_TYPE_ROT: return 0; - case TPDiskCategory::DEVICE_TYPE_SSD: return 1; - case TPDiskCategory::DEVICE_TYPE_NVME: return 2; - case TPDiskCategory::DEVICE_TYPE_UNKNOWN: return 3; - } - return 3; -} - -void TDsProxyNodeMon::CountPutPesponseTime(TPDiskCategory::EDeviceType type, NKikimrBlobStorage::EPutHandleClass cls, - ui32 size, TDuration duration) { - const ui32 durationMs = duration.MilliSeconds(); - const double durationMsFloat = duration.MicroSeconds() / 1000.0; - PutResponseTime.Increment(durationMs); - const ui32 idx = IdxForType(type); - Y_VERIFY(IsCountersPresentedForIdx[idx]); - switch (cls) { - case NKikimrBlobStorage::EPutHandleClass::TabletLog: - PutTabletLogResponseTime.Increment(durationMs); - if (size < (256 << 10)) { - PutTabletLogResponseTime256.Increment(durationMs); - Y_VERIFY_DEBUG(PutTabletLogResponseTimeHist256Ki[idx]); - PutTabletLogResponseTimeHist256Ki[idx]->Collect(durationMsFloat); - } else { - Y_VERIFY_DEBUG(PutTabletLogResponseTimeHistInf[idx]); - PutTabletLogResponseTimeHistInf[idx]->Collect(durationMsFloat); - if (size < (512 << 10)) { - PutTabletLogResponseTime512.Increment(durationMs); - } - } - break; - case NKikimrBlobStorage::EPutHandleClass::AsyncBlob: - PutAsyncBlobResponseTime.Increment(durationMs); - Y_VERIFY_DEBUG(PutAsyncBlobResponseTimeHist[idx]); - PutAsyncBlobResponseTimeHist[idx]->Collect(durationMsFloat); - break; - case NKikimrBlobStorage::EPutHandleClass::UserData: - PutUserDataResponseTime.Increment(durationMs); - Y_VERIFY_DEBUG(PutUserDataResponseTimeHist[idx]); - PutUserDataResponseTimeHist[idx]->Collect(durationMsFloat); - break; - default: - Y_FAIL("Unexpected case, HandleClass# %" PRIu64, (ui64)cls); - } -} - -void TDsProxyNodeMon::CountGetResponseTime(TPDiskCategory::EDeviceType type, NKikimrBlobStorage::EGetHandleClass cls, - ui32 size, TDuration duration) { - const ui32 durationMs = duration.MilliSeconds(); - const double durationMsFloat = duration.MicroSeconds() / 1000.0; - GetResponseTime.Increment(durationMs); - const ui32 idx = IdxForType(type); - Y_VERIFY(IsCountersPresentedForIdx[idx]); - switch (cls) { - case NKikimrBlobStorage::EGetHandleClass::AsyncRead: - GetAsyncReadResponseTime.Increment(durationMs); - Y_VERIFY_DEBUG(GetAsyncReadResponseTimeHist[idx]); - GetAsyncReadResponseTimeHist[idx]->Collect(durationMsFloat); - break; - case NKikimrBlobStorage::EGetHandleClass::FastRead: - if (size < (256 << 10)) { - GetFastReadResponseTime256Ki.Increment(durationMs); - Y_VERIFY_DEBUG(GetFastReadResponseTimeHist256Ki[idx]); - GetFastReadResponseTimeHist256Ki[idx]->Collect(durationMsFloat); - } else { - GetFastReadResponseTimeInf.Increment(durationMs); - Y_VERIFY_DEBUG(GetFastReadResponseTimeHistInf[idx]); - GetFastReadResponseTimeHistInf[idx]->Collect(durationMsFloat); - } - break; - case NKikimrBlobStorage::EGetHandleClass::Discover: - GetDiscoverResponseTime.Increment(durationMs); - Y_VERIFY_DEBUG(GetDiscoverResponseTimeHist[idx]); - GetDiscoverResponseTimeHist[idx]->Collect(durationMsFloat); - break; +ui32 IdxForType(TPDiskCategory::EDeviceType type) { + switch (type) { + case TPDiskCategory::DEVICE_TYPE_ROT: return 0; + case TPDiskCategory::DEVICE_TYPE_SSD: return 1; + case TPDiskCategory::DEVICE_TYPE_NVME: return 2; + case TPDiskCategory::DEVICE_TYPE_UNKNOWN: return 3; + } + return 3; +} + +void TDsProxyNodeMon::CountPutPesponseTime(TPDiskCategory::EDeviceType type, NKikimrBlobStorage::EPutHandleClass cls, + ui32 size, TDuration duration) { + const ui32 durationMs = duration.MilliSeconds(); + const double durationMsFloat = duration.MicroSeconds() / 1000.0; + PutResponseTime.Increment(durationMs); + const ui32 idx = IdxForType(type); + Y_VERIFY(IsCountersPresentedForIdx[idx]); + switch (cls) { + case NKikimrBlobStorage::EPutHandleClass::TabletLog: + PutTabletLogResponseTime.Increment(durationMs); + if (size < (256 << 10)) { + PutTabletLogResponseTime256.Increment(durationMs); + Y_VERIFY_DEBUG(PutTabletLogResponseTimeHist256Ki[idx]); + PutTabletLogResponseTimeHist256Ki[idx]->Collect(durationMsFloat); + } else { + Y_VERIFY_DEBUG(PutTabletLogResponseTimeHistInf[idx]); + PutTabletLogResponseTimeHistInf[idx]->Collect(durationMsFloat); + if (size < (512 << 10)) { + PutTabletLogResponseTime512.Increment(durationMs); + } + } + break; + case NKikimrBlobStorage::EPutHandleClass::AsyncBlob: + PutAsyncBlobResponseTime.Increment(durationMs); + Y_VERIFY_DEBUG(PutAsyncBlobResponseTimeHist[idx]); + PutAsyncBlobResponseTimeHist[idx]->Collect(durationMsFloat); + break; + case NKikimrBlobStorage::EPutHandleClass::UserData: + PutUserDataResponseTime.Increment(durationMs); + Y_VERIFY_DEBUG(PutUserDataResponseTimeHist[idx]); + PutUserDataResponseTimeHist[idx]->Collect(durationMsFloat); + break; + default: + Y_FAIL("Unexpected case, HandleClass# %" PRIu64, (ui64)cls); + } +} + +void TDsProxyNodeMon::CountGetResponseTime(TPDiskCategory::EDeviceType type, NKikimrBlobStorage::EGetHandleClass cls, + ui32 size, TDuration duration) { + const ui32 durationMs = duration.MilliSeconds(); + const double durationMsFloat = duration.MicroSeconds() / 1000.0; + GetResponseTime.Increment(durationMs); + const ui32 idx = IdxForType(type); + Y_VERIFY(IsCountersPresentedForIdx[idx]); + switch (cls) { + case NKikimrBlobStorage::EGetHandleClass::AsyncRead: + GetAsyncReadResponseTime.Increment(durationMs); + Y_VERIFY_DEBUG(GetAsyncReadResponseTimeHist[idx]); + GetAsyncReadResponseTimeHist[idx]->Collect(durationMsFloat); + break; + case NKikimrBlobStorage::EGetHandleClass::FastRead: + if (size < (256 << 10)) { + GetFastReadResponseTime256Ki.Increment(durationMs); + Y_VERIFY_DEBUG(GetFastReadResponseTimeHist256Ki[idx]); + GetFastReadResponseTimeHist256Ki[idx]->Collect(durationMsFloat); + } else { + GetFastReadResponseTimeInf.Increment(durationMs); + Y_VERIFY_DEBUG(GetFastReadResponseTimeHistInf[idx]); + GetFastReadResponseTimeHistInf[idx]->Collect(durationMsFloat); + } + break; + case NKikimrBlobStorage::EGetHandleClass::Discover: + GetDiscoverResponseTime.Increment(durationMs); + Y_VERIFY_DEBUG(GetDiscoverResponseTimeHist[idx]); + GetDiscoverResponseTimeHist[idx]->Collect(durationMsFloat); + break; case NKikimrBlobStorage::EGetHandleClass::LowRead: GetLowReadResponseTime.Increment(durationMs); Y_VERIFY_DEBUG(GetLowReadResponseTimeHist[idx]); GetLowReadResponseTimeHist[idx]->Collect(durationMsFloat); break; - default: - Y_FAIL("Unexpected case, HandleClass# %" PRIu64, (ui64)cls); - } -} - + default: + Y_FAIL("Unexpected case, HandleClass# %" PRIu64, (ui64)cls); + } +} + void TDsProxyNodeMon::CountPatchResponseTime(TPDiskCategory::EDeviceType type, TDuration duration) { const ui32 durationMs = duration.MilliSeconds(); const double durationMsFloat = duration.MicroSeconds() / 1000.0; @@ -179,29 +179,29 @@ void TDsProxyNodeMon::CountPatchResponseTime(TPDiskCategory::EDeviceType type, T PatchResponseTimeHist[idx]->Collect(durationMsFloat); } -void TDsProxyNodeMon::CheckNodeMonCountersForDeviceType(TPDiskCategory::EDeviceType type) { - const ui32 idx = IdxForType(type); - - if (!IsCountersPresentedForIdx[idx]) { - IsCountersPresentedForIdx[idx] = true; - TIntrusivePtr<NMonitoring::TDynamicCounters> subGroup = - Group->GetSubgroup("media", to_lower(TPDiskCategory::DeviceTypeStr(type, true))); - - auto getNamedHisto = [&subGroup, &type] (const TString& name) { - auto buckets = NMonitoring::ExplicitHistogram(GetCommonLatencyHistBounds(type)); +void TDsProxyNodeMon::CheckNodeMonCountersForDeviceType(TPDiskCategory::EDeviceType type) { + const ui32 idx = IdxForType(type); + + if (!IsCountersPresentedForIdx[idx]) { + IsCountersPresentedForIdx[idx] = true; + TIntrusivePtr<NMonitoring::TDynamicCounters> subGroup = + Group->GetSubgroup("media", to_lower(TPDiskCategory::DeviceTypeStr(type, true))); + + auto getNamedHisto = [&subGroup, &type] (const TString& name) { + auto buckets = NMonitoring::ExplicitHistogram(GetCommonLatencyHistBounds(type)); return subGroup->GetHistogram(name, std::move(buckets)); - }; - - PutTabletLogResponseTimeHist256Ki[idx] = getNamedHisto("putTabletLog256KiMs"); - PutTabletLogResponseTimeHistInf[idx] = getNamedHisto("putTabletLogInfMs"); - PutAsyncBlobResponseTimeHist[idx] = getNamedHisto("putAsyncBlobMs"); - PutUserDataResponseTimeHist[idx] = getNamedHisto("putUserDataMs"); - GetAsyncReadResponseTimeHist[idx] = getNamedHisto("getAsyncReadMs"); - GetFastReadResponseTimeHist256Ki[idx] = getNamedHisto("getFastRead256KiMs"); - GetFastReadResponseTimeHistInf[idx] = getNamedHisto("getFastReadInfMs"); - GetDiscoverResponseTimeHist[idx] = getNamedHisto("getDiscoverMs"); + }; + + PutTabletLogResponseTimeHist256Ki[idx] = getNamedHisto("putTabletLog256KiMs"); + PutTabletLogResponseTimeHistInf[idx] = getNamedHisto("putTabletLogInfMs"); + PutAsyncBlobResponseTimeHist[idx] = getNamedHisto("putAsyncBlobMs"); + PutUserDataResponseTimeHist[idx] = getNamedHisto("putUserDataMs"); + GetAsyncReadResponseTimeHist[idx] = getNamedHisto("getAsyncReadMs"); + GetFastReadResponseTimeHist256Ki[idx] = getNamedHisto("getFastRead256KiMs"); + GetFastReadResponseTimeHistInf[idx] = getNamedHisto("getFastReadInfMs"); + GetDiscoverResponseTimeHist[idx] = getNamedHisto("getDiscoverMs"); GetLowReadResponseTimeHist[idx] = getNamedHisto("getLowReadMs"); PatchResponseTimeHist[idx] = getNamedHisto("patchMs"); - } -} + } +} } // NKikimr diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_nodemon.h b/ydb/core/blobstorage/dsproxy/dsproxy_nodemon.h index cf8c540e55..150da172fb 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_nodemon.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy_nodemon.h @@ -11,10 +11,10 @@ #include <ydb/core/base/group_stat.h> #include <ydb/core/util/throughput_meter.h> #include <ydb/core/mon/mon.h> - + #include <library/cpp/monlib/dynamic_counters/percentile/percentile.h> #include <library/cpp/monlib/metrics/histogram_snapshot.h> - + #include <util/generic/ptr.h> namespace NKikimr { @@ -24,7 +24,7 @@ namespace NKikimr { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct TDsProxyNodeMon : public TThrRefBase { - TIntrusivePtr<NMonitoring::TDynamicCounters> Group; + TIntrusivePtr<NMonitoring::TDynamicCounters> Group; TIntrusivePtr<NMonitoring::TDynamicCounters> LatencyOverview; NMonitoring::TPercentileTracker<4, 512, 15> PutResponseTime; @@ -33,25 +33,25 @@ struct TDsProxyNodeMon : public TThrRefBase { NMonitoring::TPercentileTracker<4, 512, 15> PutTabletLogResponseTime256; NMonitoring::TPercentileTracker<4, 512, 15> PutTabletLogResponseTime512; - static constexpr ui32 KnownDeviceTypesCount = 4; - using THistoPtrForDeviceType = std::array<NMonitoring::THistogramPtr, KnownDeviceTypesCount>; - THistoPtrForDeviceType PutTabletLogResponseTimeHist256Ki; - THistoPtrForDeviceType PutTabletLogResponseTimeHistInf; - + static constexpr ui32 KnownDeviceTypesCount = 4; + using THistoPtrForDeviceType = std::array<NMonitoring::THistogramPtr, KnownDeviceTypesCount>; + THistoPtrForDeviceType PutTabletLogResponseTimeHist256Ki; + THistoPtrForDeviceType PutTabletLogResponseTimeHistInf; + NMonitoring::TPercentileTracker<4, 512, 15> PutAsyncBlobResponseTime; - THistoPtrForDeviceType PutAsyncBlobResponseTimeHist; + THistoPtrForDeviceType PutAsyncBlobResponseTimeHist; NMonitoring::TPercentileTracker<4, 512, 15> PutUserDataResponseTime; - THistoPtrForDeviceType PutUserDataResponseTimeHist; + THistoPtrForDeviceType PutUserDataResponseTimeHist; NMonitoring::TPercentileTracker<16, 512, 15> GetResponseTime; - NMonitoring::TPercentileTracker<16, 512, 15> GetAsyncReadResponseTime; - THistoPtrForDeviceType GetAsyncReadResponseTimeHist; - NMonitoring::TPercentileTracker<16, 512, 15> GetFastReadResponseTime256Ki; - THistoPtrForDeviceType GetFastReadResponseTimeHist256Ki; - NMonitoring::TPercentileTracker<16, 512, 15> GetFastReadResponseTimeInf; - THistoPtrForDeviceType GetFastReadResponseTimeHistInf; - NMonitoring::TPercentileTracker<16, 512, 15> GetDiscoverResponseTime; - THistoPtrForDeviceType GetDiscoverResponseTimeHist; + NMonitoring::TPercentileTracker<16, 512, 15> GetAsyncReadResponseTime; + THistoPtrForDeviceType GetAsyncReadResponseTimeHist; + NMonitoring::TPercentileTracker<16, 512, 15> GetFastReadResponseTime256Ki; + THistoPtrForDeviceType GetFastReadResponseTimeHist256Ki; + NMonitoring::TPercentileTracker<16, 512, 15> GetFastReadResponseTimeInf; + THistoPtrForDeviceType GetFastReadResponseTimeHistInf; + NMonitoring::TPercentileTracker<16, 512, 15> GetDiscoverResponseTime; + THistoPtrForDeviceType GetDiscoverResponseTimeHist; NMonitoring::TPercentileTracker<16, 512, 15> GetLowReadResponseTime; THistoPtrForDeviceType GetLowReadResponseTimeHist; @@ -63,7 +63,7 @@ struct TDsProxyNodeMon : public TThrRefBase { NMonitoring::TPercentileTracker<16, 512, 15> IndexRestoreGetResponseTime; NMonitoring::TPercentileTracker<16, 512, 15> RangeResponseTime; - std::array<bool, KnownDeviceTypesCount> IsCountersPresentedForIdx; + std::array<bool, KnownDeviceTypesCount> IsCountersPresentedForIdx; // restart counters NMonitoring::TDynamicCounters::TCounterPtr RestartPut; @@ -90,15 +90,15 @@ struct TDsProxyNodeMon : public TThrRefBase { NMonitoring::TDynamicCounters::TCounterPtr UnconfiguredTimeout; NMonitoring::TDynamicCounters::TCounterPtr UnconfiguredTimeout5min; - TDsProxyNodeMon(TIntrusivePtr<NMonitoring::TDynamicCounters> &counters, bool initForAllDeviceTypes); - void CountPutPesponseTime(TPDiskCategory::EDeviceType type, NKikimrBlobStorage::EPutHandleClass cls, ui32 size, - TDuration duration); - void CountGetResponseTime(TPDiskCategory::EDeviceType type, NKikimrBlobStorage::EGetHandleClass cls, ui32 size, - TDuration duration); + TDsProxyNodeMon(TIntrusivePtr<NMonitoring::TDynamicCounters> &counters, bool initForAllDeviceTypes); + void CountPutPesponseTime(TPDiskCategory::EDeviceType type, NKikimrBlobStorage::EPutHandleClass cls, ui32 size, + TDuration duration); + void CountGetResponseTime(TPDiskCategory::EDeviceType type, NKikimrBlobStorage::EGetHandleClass cls, ui32 size, + TDuration duration); void CountPatchResponseTime(TPDiskCategory::EDeviceType type, TDuration duration); - - // Called only from NodeWarder - void CheckNodeMonCountersForDeviceType(TPDiskCategory::EDeviceType type); + + // Called only from NodeWarder + void CheckNodeMonCountersForDeviceType(TPDiskCategory::EDeviceType type); }; } // NKikimr diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_put.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_put.cpp index fce3e2700c..d9e0e2b320 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_put.cpp +++ b/ydb/core/blobstorage/dsproxy/dsproxy_put.cpp @@ -8,10 +8,10 @@ #include <ydb/core/blobstorage/base/wilson_events.h> #include <util/generic/ymath.h> -#include <util/system/datetime.h> +#include <util/system/datetime.h> #include <util/system/hp_timer.h> -LWTRACE_USING(BLOBSTORAGE_PROVIDER); +LWTRACE_USING(BLOBSTORAGE_PROVIDER); namespace NKikimr { @@ -146,7 +146,7 @@ class TBlobStorageGroupPutRequest : public TBlobStorageGroupRequestActor<TBlobSt WILSON_TRACE_FROM_ACTOR(*TlsActivationContext, *this, &TraceId, EvVPutResultReceived, MergedNode = std::move(ev->TraceId)); ResponsesReceived++; - const ui64 cyclesPerUs = NHPTimer::GetCyclesPerSecond() / 1000000; + const ui64 cyclesPerUs = NHPTimer::GetCyclesPerSecond() / 1000000; ev->Get()->Record.MutableTimestamps()->SetReceivedByDSProxyUs(GetCycleCountFast() / cyclesPerUs); const NKikimrBlobStorage::TEvVPutResult &record = ev->Get()->Record; const TLogoBlobID blob = LogoBlobIDFromLogoBlobID(record.GetBlobID()); @@ -169,24 +169,24 @@ class TBlobStorageGroupPutRequest : public TBlobStorageGroupRequestActor<TBlobSt TimeStats.ApplyPut(ItemsInfo[idx].BufferSize, record.GetMsgQoS().GetExecTimeStats()); } - Y_VERIFY(record.HasVDiskID()); - TVDiskID vDiskId = VDiskIDFromVDiskID(record.GetVDiskID()); + Y_VERIFY(record.HasVDiskID()); + TVDiskID vDiskId = VDiskIDFromVDiskID(record.GetVDiskID()); const TVDiskIdShort shortId(vDiskId); - - LWPROBE(DSProxyVDiskRequestDuration, TEvBlobStorage::EvVPut, blob.BlobSize(), blob.TabletID(), + + LWPROBE(DSProxyVDiskRequestDuration, TEvBlobStorage::EvVPut, blob.BlobSize(), blob.TabletID(), Info->GroupID, blob.Channel(), Info->GetFailDomainOrderNumber(shortId), GetStartTime(record.GetTimestamps()), GetTotalTimeMs(record.GetTimestamps()), GetVDiskTimeMs(record.GetTimestamps()), GetTotalTimeMs(record.GetTimestamps()) - GetVDiskTimeMs(record.GetTimestamps()), NKikimrBlobStorage::EPutHandleClass_Name(PutImpl.GetPutHandleClass()), - NKikimrProto::EReplyStatus_Name(status)); + NKikimrProto::EReplyStatus_Name(status)); if (RootCauseTrack.IsOn) { RootCauseTrack.OnReply(cookie.GetCauseIdx(), GetTotalTimeMs(record.GetTimestamps()) - GetVDiskTimeMs(record.GetTimestamps()), GetVDiskTimeMs(record.GetTimestamps())); } - + TDeque<std::unique_ptr<TEvBlobStorage::TEvVPut>> vPuts; TPutImpl::TPutResultVec putResults; PutImpl.OnVPutEventResult(LogCtx, ev->Sender, *ev->Get(), vPuts, putResults); @@ -446,7 +446,7 @@ public: const TIntrusivePtr<TGroupQueues> &state, const TActorId &source, const TIntrusivePtr<TBlobStorageGroupProxyMon> &mon, TEvBlobStorage::TEvPut *ev, ui64 cookie, NWilson::TTraceId traceId, bool timeStatsEnabled, - TDiskResponsivenessTracker::TPerDiskStatsPtr stats, + TDiskResponsivenessTracker::TPerDiskStatsPtr stats, TMaybe<TGroupStat::EKind> latencyQueueKind, TInstant now, TIntrusivePtr<TStoragePoolCounters> &storagePoolCounters, bool enableRequestMod3x3ForMinLatecy) @@ -475,7 +475,7 @@ public: ReportBytes(ItemsInfo[0].Buffer.capacity() + sizeof(*this)); RequestBytes = ev->Buffer.size(); - RequestHandleClass = HandleClassToHandleClass(HandleClass); + RequestHandleClass = HandleClassToHandleClass(HandleClass); MaxSaneRequests = info->Type.TotalPartCount() * (1ull + info->Type.Handoff()) * 2; } @@ -724,7 +724,7 @@ IActor* CreateBlobStorageGroupPutRequest(const TIntrusivePtr<TBlobStorageGroupIn const TIntrusivePtr<TGroupQueues> &state, const TActorId &source, const TIntrusivePtr<TBlobStorageGroupProxyMon> &mon, TEvBlobStorage::TEvPut *ev, ui64 cookie, NWilson::TTraceId traceId, bool timeStatsEnabled, - TDiskResponsivenessTracker::TPerDiskStatsPtr stats, + TDiskResponsivenessTracker::TPerDiskStatsPtr stats, TMaybe<TGroupStat::EKind> latencyQueueKind, TInstant now, TIntrusivePtr<TStoragePoolCounters> &storagePoolCounters, bool enableRequestMod3x3ForMinLatecy) { diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_put_impl.h b/ydb/core/blobstorage/dsproxy/dsproxy_put_impl.h index 8d432b08a7..34f675032b 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_put_impl.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy_put_impl.h @@ -38,7 +38,7 @@ private: ui32 VPutResponses = 0; ui32 VMultiPutRequests = 0; ui32 VMultiPutResponses = 0; - bool AtLeastOneResponseWasNotOk = false; + bool AtLeastOneResponseWasNotOk = false; bool EnableRequestMod3x3ForMinLatecy = false; ui64 DoneBlobs = 0; diff --git a/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut.cpp b/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut.cpp index 7cf2ff3b5c..6a8fbff0e2 100644 --- a/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut.cpp +++ b/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut.cpp @@ -64,7 +64,7 @@ Y_UNIT_TEST_SUITE(TBsProxyFaultToleranceTest) { //ERASURE_TEST(Erasure4Plus2Stripe) //ERASURE_TEST(Erasure3Plus2Stripe) //ERASURE_TEST(ErasureMirror3Plus2) - ERASURE_TEST(ErasureMirror3dc) + ERASURE_TEST(ErasureMirror3dc) ERASURE_TEST(ErasureMirror3of4) Y_UNIT_TEST(CheckGetHardenedErasureMirror3dcCount4Idx0) { RunTest<TGetHardenedFaultToleranceTest>(TBlobStorageGroupType::ErasureMirror3dc, true, 4, 0); } diff --git a/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut_base.h b/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut_base.h index e4347b9314..c4b5c76f39 100644 --- a/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut_base.h +++ b/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut_base.h @@ -66,10 +66,10 @@ public: } } - NKikimrProto::EReplyStatus PutWithResult(const TLogoBlobID& id, const TString& buffer, TEvBlobStorage::TEvPut::ETactic tactic - = TEvBlobStorage::TEvPut::TacticDefault) { - SendToBSProxy(GetActorContext(), Info->GroupID, new TEvBlobStorage::TEvPut(id, buffer, TInstant::Max(), - NKikimrBlobStorage::TabletLog, tactic)); + NKikimrProto::EReplyStatus PutWithResult(const TLogoBlobID& id, const TString& buffer, TEvBlobStorage::TEvPut::ETactic tactic + = TEvBlobStorage::TEvPut::TacticDefault) { + SendToBSProxy(GetActorContext(), Info->GroupID, new TEvBlobStorage::TEvPut(id, buffer, TInstant::Max(), + NKikimrBlobStorage::TabletLog, tactic)); auto resp = WaitForSpecificEvent<TEvBlobStorage::TEvPutResult>(); CTEST << (TStringBuilder() << "PutResult: " << resp->Get()->ToString() << Endl); if (resp->Get()->Status == NKikimrProto::OK && Info->Type.GetErasure() == TBlobStorageGroupType::ErasureMirror3of4) { diff --git a/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut_range.h b/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut_range.h index d2f3db7740..c055d68eab 100644 --- a/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut_range.h +++ b/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut_range.h @@ -13,11 +13,11 @@ class TRangeFaultToleranceTest : public TFaultToleranceTestBase<TRangeFaultToler public: using TFaultToleranceTestBase::TFaultToleranceTestBase; - void Check(ui64 tabletId, const TBlobStorageGroupInfo::TGroupVDisks& disks, - NKikimrProto::EReplyStatus defaultExpectedStatus = NKikimrProto::OK) { + void Check(ui64 tabletId, const TBlobStorageGroupInfo::TGroupVDisks& disks, + NKikimrProto::EReplyStatus defaultExpectedStatus = NKikimrProto::OK) { // Cerr << (TStringBuilder() << "]] " << disks.ToString() << Endl); for (ui32 generation = 1; generation <= 4; ++generation) { - NKikimrProto::EReplyStatus expectedStatus = defaultExpectedStatus; + NKikimrProto::EReplyStatus expectedStatus = defaultExpectedStatus; TVector<TEvBlobStorage::TEvRangeResult::TResponse> expectedResponse; ui32 statusMap = generation - 1; @@ -32,11 +32,11 @@ public: } buffer += b; TLogoBlobID id(tabletId, generation, step, 0 /*channel*/, buffer.size(), 0); - if (defaultExpectedStatus == NKikimrProto::OK) { - UNIT_ASSERT_VALUES_EQUAL(NKikimrProto::OK, PutWithResult(id, buffer, TEvBlobStorage::TEvPut::TacticMaxThroughput)); - } else { - PutWithResult(id, buffer, TEvBlobStorage::TEvPut::TacticMaxThroughput); - } + if (defaultExpectedStatus == NKikimrProto::OK) { + UNIT_ASSERT_VALUES_EQUAL(NKikimrProto::OK, PutWithResult(id, buffer, TEvBlobStorage::TEvPut::TacticMaxThroughput)); + } else { + PutWithResult(id, buffer, TEvBlobStorage::TEvPut::TacticMaxThroughput); + } TBlobStorageGroupInfo::TVDiskIds vdisks; TBlobStorageGroupInfo::TServiceIds services; @@ -152,10 +152,10 @@ public: SetFailedDisks(disks); Check(tabletId++, TBlobStorageGroupInfo::TGroupVDisks(&Info->GetTopology())); } - for (const auto& disks : FaultsExceedingFailModel) { - SetFailedDisks(disks); - Check(tabletId++, TBlobStorageGroupInfo::TGroupVDisks(&Info->GetTopology()), NKikimrProto::ERROR); - } + for (const auto& disks : FaultsExceedingFailModel) { + SetFailedDisks(disks); + Check(tabletId++, TBlobStorageGroupInfo::TGroupVDisks(&Info->GetTopology()), NKikimrProto::ERROR); + } } }; diff --git a/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut_runtime.h b/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut_runtime.h index 3984e2fa16..bc33af4b86 100644 --- a/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut_runtime.h +++ b/ydb/core/blobstorage/dsproxy/ut/dsproxy_fault_tolerance_ut_runtime.h @@ -82,7 +82,7 @@ public: TActorSetupCmd(CreateVDiskMockActor(pair.first, shared, GroupInfo->PickTopology()), TMailboxType::Simple, 0)); } - TIntrusivePtr<TDsProxyNodeMon> nodeMon(new TDsProxyNodeMon(Counters, true)); + TIntrusivePtr<TDsProxyNodeMon> nodeMon(new TDsProxyNodeMon(Counters, true)); TDsProxyPerPoolCounters perPoolCounters(Counters); TIntrusivePtr<TStoragePoolCounters> storagePoolCounters = perPoolCounters.GetPoolCounters("pool_name"); TControlWrapper enablePutBatching(DefaultEnablePutBatching, false, true); diff --git a/ydb/core/blobstorage/dsproxy/ut/dsproxy_get_ut.cpp b/ydb/core/blobstorage/dsproxy/ut/dsproxy_get_ut.cpp index d1799a030a..85e16c2aef 100644 --- a/ydb/core/blobstorage/dsproxy/ut/dsproxy_get_ut.cpp +++ b/ydb/core/blobstorage/dsproxy/ut/dsproxy_get_ut.cpp @@ -10,25 +10,25 @@ #include <library/cpp/containers/stack_vector/stack_vec.h> #include <library/cpp/testing/unittest/registar.h> -#include <util/random/shuffle.h> -#include <util/stream/str.h> +#include <util/random/shuffle.h> +#include <util/stream/str.h> namespace NKikimr { namespace NDSProxyGetTest { constexpr bool IsVerbose = false; - -#define VERBOSE(str) \ -do { \ - if (IsVerbose) { \ - Cout << str << Endl; \ - } \ -} while (false) \ - + +#define VERBOSE(str) \ +do { \ + if (IsVerbose) { \ + Cout << str << Endl; \ + } \ +} while (false) \ + Y_UNIT_TEST_SUITE(TDSProxyGetTest) { -void TestIntervalsAndCrcAllOk(TErasureType::EErasureSpecies erasureSpecies, bool isVerboseNoDataEnabled, bool checkCrc) { +void TestIntervalsAndCrcAllOk(TErasureType::EErasureSpecies erasureSpecies, bool isVerboseNoDataEnabled, bool checkCrc) { TActorSystemStub actorSystemStub; TBlobStorageGroupType groupType(erasureSpecies); @@ -42,13 +42,13 @@ void TestIntervalsAndCrcAllOk(TErasureType::EErasureSpecies erasureSpecies, bool const ui64 maxQueryCount = 32; TBlobTestSet blobSet; - if (checkCrc) { - blobSet.GenerateSet(2, maxQueryCount); - } else { - blobSet.GenerateSet(0, maxQueryCount); - } + if (checkCrc) { + blobSet.GenerateSet(2, maxQueryCount); + } else { + blobSet.GenerateSet(0, maxQueryCount); + } group.PutBlobSet(blobSet); - + for (ui64 queryCount = 1; queryCount <= maxQueryCount; ++queryCount) { CTEST << "queryCount# " << queryCount << Endl; for (ui64 blobCount = 1; blobCount <= maxQueryCount; ++blobCount) { @@ -57,18 +57,18 @@ void TestIntervalsAndCrcAllOk(TErasureType::EErasureSpecies erasureSpecies, bool for (ui64 queryIdx = 0; queryIdx < queryCount; ++queryIdx) { TEvBlobStorage::TEvGet::TQuery &q = queriesA[queryIdx]; q.Id = blobSet.Get(queryIdx % blobCount).Id; - if (checkCrc) { - q.Shift = (queryIdx % groupType.DataParts()) * groupType.PartUserSize(q.Id.BlobSize()); - q.Shift = q.Shift <= q.Id.BlobSize() ? q.Shift : 0; - q.Size = Max((ui64)16, (ui64)(queryIdx * 177) % (q.Id.BlobSize() - q.Shift)); - } else { - q.Shift = (queryIdx * 177) % q.Id.BlobSize(); - q.Size = Min((ui64)70, (ui64)q.Id.BlobSize() - (ui64)q.Shift); - } + if (checkCrc) { + q.Shift = (queryIdx % groupType.DataParts()) * groupType.PartUserSize(q.Id.BlobSize()); + q.Shift = q.Shift <= q.Id.BlobSize() ? q.Shift : 0; + q.Size = Max((ui64)16, (ui64)(queryIdx * 177) % (q.Id.BlobSize() - q.Shift)); + } else { + q.Shift = (queryIdx * 177) % q.Id.BlobSize(); + q.Size = Min((ui64)70, (ui64)q.Id.BlobSize() - (ui64)q.Shift); + } queriesB[queryIdx] = q; CTEST << "query# " << queryIdx << " shift# " << q.Shift << " size# " << q.Size << Endl; } - TEvBlobStorage::TEvGet ev(queriesA, queryCount, TInstant::Max(), + TEvBlobStorage::TEvGet ev(queriesA, queryCount, TInstant::Max(), NKikimrBlobStorage::EGetHandleClass::FastRead, false, false); ev.IsVerboseNoDataEnabled = isVerboseNoDataEnabled; TGetImpl getImpl(group.GetInfo(), groupQueues, &ev, nullptr); @@ -127,32 +127,32 @@ void TestIntervalsAndCrcAllOk(TErasureType::EErasureSpecies erasureSpecies, bool return; } -// Without CRC +// Without CRC Y_UNIT_TEST(TestBlock42GetIntervalsAllOk) { - TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, false, false); + TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, false, false); } Y_UNIT_TEST(TestBlock42GetIntervalsAllOkVerbose) { - TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, true, false); + TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, true, false); } Y_UNIT_TEST(TestMirror32GetIntervalsAllOk) { - TestIntervalsAndCrcAllOk(TErasureType::ErasureMirror3Plus2, false, false); + TestIntervalsAndCrcAllOk(TErasureType::ErasureMirror3Plus2, false, false); +} + +// With CRC +Y_UNIT_TEST(TestBlock42GetBlobCrcCheck) { + TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, false, true); +} + +Y_UNIT_TEST(TestBlock42GetBlobCrcCheckVerbose) { + TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, true, true); +} + +Y_UNIT_TEST(TestMirror32GetBlobCrcCheck) { + TestIntervalsAndCrcAllOk(TErasureType::ErasureMirror3Plus2, false, true); } -// With CRC -Y_UNIT_TEST(TestBlock42GetBlobCrcCheck) { - TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, false, true); -} - -Y_UNIT_TEST(TestBlock42GetBlobCrcCheckVerbose) { - TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, true, true); -} - -Y_UNIT_TEST(TestMirror32GetBlobCrcCheck) { - TestIntervalsAndCrcAllOk(TErasureType::ErasureMirror3Plus2, false, true); -} - class TTestWipedAllOkStep { enum ETVPutEventKind { TVPEK_VPUT, @@ -1332,7 +1332,7 @@ void SpecificTest(ui32 badA, ui32 badB, ui32 blobSize, TMap<i64, i64> sizeForOff } } } -} +} Y_UNIT_TEST(TestBlock42GetSpecific) { TMap<i64, i64> sizeForOffset; @@ -1357,217 +1357,217 @@ Y_UNIT_TEST(TestBlock42GetSpecific3) { } Y_UNIT_TEST_SUITE(TDSProxyLooksLikeLostTheBlob) { - -class TTestPossibleBlobLost { - TErasureType::EErasureSpecies ErasureSpecies; - TBlobStorageGroupType GroupType; - const ui32 DomainCount; - const ui64 MaxQueryCount; - const ui64 BlobSize; - TGroupMock Group; - TVector<NKikimrProto::EReplyStatus> ErroneousVDisks; - TBlobTestSet BlobSet; - ui32 InitialRequestsSize; - TVector<ui64> RequestsOrder; + +class TTestPossibleBlobLost { + TErasureType::EErasureSpecies ErasureSpecies; + TBlobStorageGroupType GroupType; + const ui32 DomainCount; + const ui64 MaxQueryCount; + const ui64 BlobSize; + TGroupMock Group; + TVector<NKikimrProto::EReplyStatus> ErroneousVDisks; + TBlobTestSet BlobSet; + ui32 InitialRequestsSize; + TVector<ui64> RequestsOrder; TActorSystemStub actorSystemStub; - - static constexpr ui64 RunTestStep = 100; - const ui64 SeedDays = TInstant::Now().Days(); - ui64 TestIteration = SeedDays; - - ui32 CalculateInitialRequestsSize() { - TArrayHolder<TEvBlobStorage::TEvGet::TQuery> queriesA(new TEvBlobStorage::TEvGet::TQuery[MaxQueryCount]); - TArrayHolder<TEvBlobStorage::TEvGet::TQuery> queriesB(new TEvBlobStorage::TEvGet::TQuery[MaxQueryCount]); - for (ui64 queryIdx = 0; queryIdx < MaxQueryCount; ++queryIdx) { - TEvBlobStorage::TEvGet::TQuery &q = queriesA[queryIdx]; - q.Id = BlobSet.Get(queryIdx).Id; - q.Shift = 0; - q.Size = BlobSize; - queriesB[queryIdx] = q; - VERBOSE("query# " << queryIdx << " shift# " << q.Shift << " size# " << q.Size); - } + + static constexpr ui64 RunTestStep = 100; + const ui64 SeedDays = TInstant::Now().Days(); + ui64 TestIteration = SeedDays; + + ui32 CalculateInitialRequestsSize() { + TArrayHolder<TEvBlobStorage::TEvGet::TQuery> queriesA(new TEvBlobStorage::TEvGet::TQuery[MaxQueryCount]); + TArrayHolder<TEvBlobStorage::TEvGet::TQuery> queriesB(new TEvBlobStorage::TEvGet::TQuery[MaxQueryCount]); + for (ui64 queryIdx = 0; queryIdx < MaxQueryCount; ++queryIdx) { + TEvBlobStorage::TEvGet::TQuery &q = queriesA[queryIdx]; + q.Id = BlobSet.Get(queryIdx).Id; + q.Shift = 0; + q.Size = BlobSize; + queriesB[queryIdx] = q; + VERBOSE("query# " << queryIdx << " shift# " << q.Shift << " size# " << q.Size); + } TIntrusivePtr<TGroupQueues> groupQueues = Group.MakeGroupQueues(); - TLogContext logCtx(NKikimrServices::BS_PROXY_GET, false); - logCtx.LogAcc.IsLogEnabled = false; - TEvBlobStorage::TEvGet ev(queriesA, MaxQueryCount, TInstant::Max(), - NKikimrBlobStorage::EGetHandleClass::Discover, true, false); - ev.IsVerboseNoDataEnabled = false; + TLogContext logCtx(NKikimrServices::BS_PROXY_GET, false); + logCtx.LogAcc.IsLogEnabled = false; + TEvBlobStorage::TEvGet ev(queriesA, MaxQueryCount, TInstant::Max(), + NKikimrBlobStorage::EGetHandleClass::Discover, true, false); + ev.IsVerboseNoDataEnabled = false; TGetImpl getImpl(Group.GetInfo(), groupQueues, &ev, nullptr); TDeque<std::unique_ptr<TEvBlobStorage::TEvVGet>> vGets; getImpl.GenerateInitialRequests(logCtx, vGets); - return vGets.size(); - } - - -public: - TTestPossibleBlobLost(TErasureType::EErasureSpecies erasure, const ui64 blobSize, const ui64 maxQueryCount, - TVector<NKikimrProto::EReplyStatus> injectErrors) - : ErasureSpecies(erasure) - , GroupType(erasure) - , DomainCount(GroupType.BlobSubgroupSize()) - , MaxQueryCount(maxQueryCount) - , BlobSize(blobSize) - , Group(0, ErasureSpecies, DomainCount, 1) - , ErroneousVDisks(Group.GetInfo()->Type.BlobSubgroupSize(), NKikimrProto::OK) - { - BlobSet.GenerateSet(0, MaxQueryCount, BlobSize); + return vGets.size(); + } + + +public: + TTestPossibleBlobLost(TErasureType::EErasureSpecies erasure, const ui64 blobSize, const ui64 maxQueryCount, + TVector<NKikimrProto::EReplyStatus> injectErrors) + : ErasureSpecies(erasure) + , GroupType(erasure) + , DomainCount(GroupType.BlobSubgroupSize()) + , MaxQueryCount(maxQueryCount) + , BlobSize(blobSize) + , Group(0, ErasureSpecies, DomainCount, 1) + , ErroneousVDisks(Group.GetInfo()->Type.BlobSubgroupSize(), NKikimrProto::OK) + { + BlobSet.GenerateSet(0, MaxQueryCount, BlobSize); Group.PutBlobSet(BlobSet); - - UNIT_ASSERT(injectErrors.size() <= ErroneousVDisks.size()); - Copy(injectErrors.begin(), injectErrors.end(), ErroneousVDisks.begin()); - - InitialRequestsSize = CalculateInitialRequestsSize(); - RequestsOrder.resize(InitialRequestsSize); - for (ui32 i = 0; i < RequestsOrder.size(); ++i) { - RequestsOrder[i] = i; - } - Sort(ErroneousVDisks.begin(), ErroneousVDisks.end()); - } - - void TestStep(NKikimrProto::EReplyStatus &gotResultPrevStatus, - TVector<NKikimrProto::EReplyStatus> &gotResultPrevBlobStatus, bool isFirstIteraion) { - TStringStream currentTestState; - TAutoPtr<TEvBlobStorage::TEvGetResult> getResult; - currentTestState << "VDisk's errors mask# { "; - for (const NKikimrProto::EReplyStatus error : ErroneousVDisks) { - currentTestState << error << ", "; - } - currentTestState << "} "; - currentTestState << "Requests's order # { "; - for (const ui32 idx : RequestsOrder) { - currentTestState << idx << ", "; - } - currentTestState << "} "; - currentTestState << "SeedDays# " << SeedDays << " "; - VERBOSE(currentTestState.Str()); - TArrayHolder<TEvBlobStorage::TEvGet::TQuery> queriesA(new TEvBlobStorage::TEvGet::TQuery[MaxQueryCount]); - TArrayHolder<TEvBlobStorage::TEvGet::TQuery> queriesB(new TEvBlobStorage::TEvGet::TQuery[MaxQueryCount]); - for (ui64 queryIdx = 0; queryIdx < MaxQueryCount; ++queryIdx) { - TEvBlobStorage::TEvGet::TQuery &q = queriesA[queryIdx]; - q.Id = BlobSet.Get(queryIdx).Id; - q.Shift = 0; - q.Size = BlobSize; - queriesB[queryIdx] = q; - VERBOSE("query# " << queryIdx << " shift# " << q.Shift << " size# " << q.Size); - } - + + UNIT_ASSERT(injectErrors.size() <= ErroneousVDisks.size()); + Copy(injectErrors.begin(), injectErrors.end(), ErroneousVDisks.begin()); + + InitialRequestsSize = CalculateInitialRequestsSize(); + RequestsOrder.resize(InitialRequestsSize); + for (ui32 i = 0; i < RequestsOrder.size(); ++i) { + RequestsOrder[i] = i; + } + Sort(ErroneousVDisks.begin(), ErroneousVDisks.end()); + } + + void TestStep(NKikimrProto::EReplyStatus &gotResultPrevStatus, + TVector<NKikimrProto::EReplyStatus> &gotResultPrevBlobStatus, bool isFirstIteraion) { + TStringStream currentTestState; + TAutoPtr<TEvBlobStorage::TEvGetResult> getResult; + currentTestState << "VDisk's errors mask# { "; + for (const NKikimrProto::EReplyStatus error : ErroneousVDisks) { + currentTestState << error << ", "; + } + currentTestState << "} "; + currentTestState << "Requests's order # { "; + for (const ui32 idx : RequestsOrder) { + currentTestState << idx << ", "; + } + currentTestState << "} "; + currentTestState << "SeedDays# " << SeedDays << " "; + VERBOSE(currentTestState.Str()); + TArrayHolder<TEvBlobStorage::TEvGet::TQuery> queriesA(new TEvBlobStorage::TEvGet::TQuery[MaxQueryCount]); + TArrayHolder<TEvBlobStorage::TEvGet::TQuery> queriesB(new TEvBlobStorage::TEvGet::TQuery[MaxQueryCount]); + for (ui64 queryIdx = 0; queryIdx < MaxQueryCount; ++queryIdx) { + TEvBlobStorage::TEvGet::TQuery &q = queriesA[queryIdx]; + q.Id = BlobSet.Get(queryIdx).Id; + q.Shift = 0; + q.Size = BlobSize; + queriesB[queryIdx] = q; + VERBOSE("query# " << queryIdx << " shift# " << q.Shift << " size# " << q.Size); + } + TIntrusivePtr<TGroupQueues> groupQueues = Group.MakeGroupQueues(); - TLogContext logCtx(NKikimrServices::BS_PROXY_GET, false); - logCtx.LogAcc.IsLogEnabled = false; - TEvBlobStorage::TEvGet ev(queriesA, MaxQueryCount, TInstant::Max(), - NKikimrBlobStorage::EGetHandleClass::Discover, true, false); - ev.IsVerboseNoDataEnabled = false; + TLogContext logCtx(NKikimrServices::BS_PROXY_GET, false); + logCtx.LogAcc.IsLogEnabled = false; + TEvBlobStorage::TEvGet ev(queriesA, MaxQueryCount, TInstant::Max(), + NKikimrBlobStorage::EGetHandleClass::Discover, true, false); + ev.IsVerboseNoDataEnabled = false; TGetImpl getImpl(Group.GetInfo(), groupQueues, &ev, nullptr); TDeque<std::unique_ptr<TEvBlobStorage::TEvVGet>> vGets; TDeque<std::unique_ptr<TEvBlobStorage::TEvVPut>> vPuts; getImpl.GenerateInitialRequests(logCtx, vGets); - for (ui32 i = 0; i < ErroneousVDisks.size(); ++i) { - if (ErroneousVDisks[i] == NKikimrProto::OK) { - Group.UnsetError(i); - } else { - Group.SetError(i, ErroneousVDisks[i]); - } - } - - Y_VERIFY(RequestsOrder.size() == vGets.size()); - for (ui64 vDIdx = 0; vDIdx < RequestsOrder.size(); ++vDIdx) { - const ui64 vGetIdx = RequestsOrder[vDIdx]; - auto &request = vGets[vGetIdx]->Record; - VERBOSE("vGetIdx# " << vGetIdx); - VERBOSE("Send TEvVGet to VDiskID# " << VDiskIDFromVDiskID(request.GetVDiskID())); - Y_VERIFY(request.HasCookie()); - //ui64 messageCookie = request->Record.GetCookie(); - TEvBlobStorage::TEvVGetResult vGetResult; - Group.OnVGet(*vGets[vGetIdx], vGetResult); - VERBOSE("vGetResult.ToString()# " << vGetResult.ToString()); - + for (ui32 i = 0; i < ErroneousVDisks.size(); ++i) { + if (ErroneousVDisks[i] == NKikimrProto::OK) { + Group.UnsetError(i); + } else { + Group.SetError(i, ErroneousVDisks[i]); + } + } + + Y_VERIFY(RequestsOrder.size() == vGets.size()); + for (ui64 vDIdx = 0; vDIdx < RequestsOrder.size(); ++vDIdx) { + const ui64 vGetIdx = RequestsOrder[vDIdx]; + auto &request = vGets[vGetIdx]->Record; + VERBOSE("vGetIdx# " << vGetIdx); + VERBOSE("Send TEvVGet to VDiskID# " << VDiskIDFromVDiskID(request.GetVDiskID())); + Y_VERIFY(request.HasCookie()); + //ui64 messageCookie = request->Record.GetCookie(); + TEvBlobStorage::TEvVGetResult vGetResult; + Group.OnVGet(*vGets[vGetIdx], vGetResult); + VERBOSE("vGetResult.ToString()# " << vGetResult.ToString()); + TDeque<std::unique_ptr<TEvBlobStorage::TEvVGet>> nextVGets; TDeque<std::unique_ptr<TEvBlobStorage::TEvVPut>> nextVPuts; getImpl.OnVGetResult(logCtx, vGetResult, nextVGets, nextVPuts, getResult); - for (ui64 i = 0; i < nextVPuts.size(); ++i) { - auto &vDiskID = nextVPuts[i]->Record.GetVDiskID(); - VERBOSE("Additional TEvVPut to VDiskID# " << VDiskIDFromVDiskID(vDiskID)); + for (ui64 i = 0; i < nextVPuts.size(); ++i) { + auto &vDiskID = nextVPuts[i]->Record.GetVDiskID(); + VERBOSE("Additional TEvVPut to VDiskID# " << VDiskIDFromVDiskID(vDiskID)); vPuts.push_back(std::move(nextVPuts[i])); - } - for (ui64 vPutIdx = 0; vPutIdx < vPuts.size(); ++vPutIdx) { - auto &putRequest = vPuts[vPutIdx]->Record; - Y_VERIFY(putRequest.HasCookie()); - TEvBlobStorage::TEvVPutResult vPutResult; + } + for (ui64 vPutIdx = 0; vPutIdx < vPuts.size(); ++vPutIdx) { + auto &putRequest = vPuts[vPutIdx]->Record; + Y_VERIFY(putRequest.HasCookie()); + TEvBlobStorage::TEvVPutResult vPutResult; vPutResult.MakeError(NKikimrProto::OK, TString(), putRequest); - + TDeque<std::unique_ptr<TEvBlobStorage::TEvVGet>> nextVGets; TDeque<std::unique_ptr<TEvBlobStorage::TEvVPut>> nextVPuts; getImpl.OnVPutResult(logCtx, vPutResult, nextVGets, nextVPuts, getResult); - UNIT_ASSERT_C(nextVGets.empty(), currentTestState.Str()); + UNIT_ASSERT_C(nextVGets.empty(), currentTestState.Str()); std::move(nextVPuts.begin(), nextVPuts.end(), std::back_inserter(vPuts)); - if (getResult) { - break; - } - } - for (ui64 i = 0; i < nextVGets.size(); ++i) { - auto &vDiskID = nextVGets[i]->Record.GetVDiskID(); - VERBOSE("Additional TEvVGet to VDiskID# " << VDiskIDFromVDiskID(vDiskID)); + if (getResult) { + break; + } + } + for (ui64 i = 0; i < nextVGets.size(); ++i) { + auto &vDiskID = nextVGets[i]->Record.GetVDiskID(); + VERBOSE("Additional TEvVGet to VDiskID# " << VDiskIDFromVDiskID(vDiskID)); vGets.push_back(std::move(nextVGets[i])); - RequestsOrder.push_back(RequestsOrder.size()); - } - if (getResult) { - break; - } - } - UNIT_ASSERT_C(getResult, currentTestState.Str()); - currentTestState << "getResult # " << getResult->Print(false); - if (isFirstIteraion) { - gotResultPrevStatus = getResult->Status; - for (ui64 queryIdx = 0; queryIdx < MaxQueryCount; ++queryIdx) { - gotResultPrevBlobStatus[queryIdx] = getResult->Responses[queryIdx].Status; - } - } else { - UNIT_ASSERT_C(gotResultPrevStatus == getResult->Status, currentTestState.Str()); - for (ui64 queryIdx = 0; queryIdx < MaxQueryCount; ++queryIdx) { - UNIT_ASSERT_C(gotResultPrevBlobStatus[queryIdx] == getResult->Responses[queryIdx].Status, - currentTestState.Str()); - } - } - for (ui64 queryIdx = 0; queryIdx < MaxQueryCount; ++queryIdx) { - if (getResult->Responses[queryIdx].Status == NKikimrProto::OK) { - TEvBlobStorage::TEvGetResult::TResponse &a = getResult->Responses[queryIdx]; - TEvBlobStorage::TEvGet::TQuery &q = queriesB[queryIdx]; - UNIT_ASSERT_VALUES_EQUAL_C(q.Id, a.Id, currentTestState.Str()); - UNIT_ASSERT_VALUES_EQUAL_C(a.Status, NKikimrProto::OK, currentTestState.Str()); - UNIT_ASSERT_VALUES_EQUAL_C(q.Shift, a.Shift, currentTestState.Str()); - UNIT_ASSERT_VALUES_EQUAL_C(q.Size, a.RequestedSize, currentTestState.Str()); - BlobSet.Check(queryIdx, q.Id, q.Shift, q.Size, a.Buffer); - } - } - RequestsOrder.resize(InitialRequestsSize); - } - - void Run() { - do { // while(std::next_permutation(ErroneousVDisks.begin(), ErroneousVDisks.end())); - NKikimrProto::EReplyStatus gotResultPrevStatus; - TVector<NKikimrProto::EReplyStatus> gotResultPrevBlobStatus(MaxQueryCount); - bool isFirstIteraion = true; - Sort(RequestsOrder.begin(), RequestsOrder.end()); - do { // while(std::next_permutation(RequestsOrder.begin(), RequestsOrder.end())); - ++TestIteration; - if (TestIteration % RunTestStep != 0) { - continue; // Comment this statement to run all tests - } - TestStep(gotResultPrevStatus, gotResultPrevBlobStatus, isFirstIteraion); - isFirstIteraion = false; - } while(std::next_permutation(RequestsOrder.begin(), RequestsOrder.end())); - } while(std::next_permutation(ErroneousVDisks.begin(), ErroneousVDisks.end())); - } -}; - + RequestsOrder.push_back(RequestsOrder.size()); + } + if (getResult) { + break; + } + } + UNIT_ASSERT_C(getResult, currentTestState.Str()); + currentTestState << "getResult # " << getResult->Print(false); + if (isFirstIteraion) { + gotResultPrevStatus = getResult->Status; + for (ui64 queryIdx = 0; queryIdx < MaxQueryCount; ++queryIdx) { + gotResultPrevBlobStatus[queryIdx] = getResult->Responses[queryIdx].Status; + } + } else { + UNIT_ASSERT_C(gotResultPrevStatus == getResult->Status, currentTestState.Str()); + for (ui64 queryIdx = 0; queryIdx < MaxQueryCount; ++queryIdx) { + UNIT_ASSERT_C(gotResultPrevBlobStatus[queryIdx] == getResult->Responses[queryIdx].Status, + currentTestState.Str()); + } + } + for (ui64 queryIdx = 0; queryIdx < MaxQueryCount; ++queryIdx) { + if (getResult->Responses[queryIdx].Status == NKikimrProto::OK) { + TEvBlobStorage::TEvGetResult::TResponse &a = getResult->Responses[queryIdx]; + TEvBlobStorage::TEvGet::TQuery &q = queriesB[queryIdx]; + UNIT_ASSERT_VALUES_EQUAL_C(q.Id, a.Id, currentTestState.Str()); + UNIT_ASSERT_VALUES_EQUAL_C(a.Status, NKikimrProto::OK, currentTestState.Str()); + UNIT_ASSERT_VALUES_EQUAL_C(q.Shift, a.Shift, currentTestState.Str()); + UNIT_ASSERT_VALUES_EQUAL_C(q.Size, a.RequestedSize, currentTestState.Str()); + BlobSet.Check(queryIdx, q.Id, q.Shift, q.Size, a.Buffer); + } + } + RequestsOrder.resize(InitialRequestsSize); + } + + void Run() { + do { // while(std::next_permutation(ErroneousVDisks.begin(), ErroneousVDisks.end())); + NKikimrProto::EReplyStatus gotResultPrevStatus; + TVector<NKikimrProto::EReplyStatus> gotResultPrevBlobStatus(MaxQueryCount); + bool isFirstIteraion = true; + Sort(RequestsOrder.begin(), RequestsOrder.end()); + do { // while(std::next_permutation(RequestsOrder.begin(), RequestsOrder.end())); + ++TestIteration; + if (TestIteration % RunTestStep != 0) { + continue; // Comment this statement to run all tests + } + TestStep(gotResultPrevStatus, gotResultPrevBlobStatus, isFirstIteraion); + isFirstIteraion = false; + } while(std::next_permutation(RequestsOrder.begin(), RequestsOrder.end())); + } while(std::next_permutation(ErroneousVDisks.begin(), ErroneousVDisks.end())); + } +}; + Y_UNIT_TEST(TDSProxyLooksLikeLostTheBlobBlock42) { - const ui64 blobSize = 128; - for (ui32 i = 1; i < 3; ++i) { - TTestPossibleBlobLost test(TErasureType::Erasure4Plus2Block, blobSize, i, {NKikimrProto::ERROR, NKikimrProto::ERROR}); - test.Run(); - } -} - + const ui64 blobSize = 128; + for (ui32 i = 1; i < 3; ++i) { + TTestPossibleBlobLost test(TErasureType::Erasure4Plus2Block, blobSize, i, {NKikimrProto::ERROR, NKikimrProto::ERROR}); + test.Run(); + } +} + diff --git a/ydb/core/blobstorage/dsproxy/ut/dsproxy_put_ut.cpp b/ydb/core/blobstorage/dsproxy/ut/dsproxy_put_ut.cpp index 5f889dd0e7..d2d39a3550 100644 --- a/ydb/core/blobstorage/dsproxy/ut/dsproxy_put_ut.cpp +++ b/ydb/core/blobstorage/dsproxy/ut/dsproxy_put_ut.cpp @@ -39,7 +39,7 @@ void TestPutMaxPartCountOnHandoff(TErasureType::EErasureSpecies erasureSpecies) TIntrusivePtr<TGroupQueues> groupQueues = group.MakeGroupQueues(); TIntrusivePtr<NMonitoring::TDynamicCounters> counters(new NMonitoring::TDynamicCounters()); - TIntrusivePtr<TDsProxyNodeMon> nodeMon(new TDsProxyNodeMon(counters, true)); + TIntrusivePtr<TDsProxyNodeMon> nodeMon(new TDsProxyNodeMon(counters, true)); TIntrusivePtr<TBlobStorageGroupProxyMon> mon(new TBlobStorageGroupProxyMon(counters, counters, counters, group.GetInfo(), nodeMon, false)); diff --git a/ydb/core/blobstorage/dsproxy/ut_fat/dsproxy_ut.cpp b/ydb/core/blobstorage/dsproxy/ut_fat/dsproxy_ut.cpp index d6d7bdc8c9..13b1bec356 100644 --- a/ydb/core/blobstorage/dsproxy/ut_fat/dsproxy_ut.cpp +++ b/ydb/core/blobstorage/dsproxy/ut_fat/dsproxy_ut.cpp @@ -4179,13 +4179,13 @@ public: TAppData appData(0, 0, 0, 0, TMap<TString, ui32>(), nullptr, nullptr, nullptr, nullptr); appData.Counters = counters; - auto ioContext = std::make_shared<NKikimr::NPDisk::TIoContextFactoryOSS>(); - appData.IoContextFactory = ioContext.get(); + auto ioContext = std::make_shared<NKikimr::NPDisk::TIoContextFactoryOSS>(); + appData.IoContextFactory = ioContext.get(); THolder<TActorSystemSetup> setup1 = BuildActorSystemSetup(1, *counters, nameserverTable, interconnect); THolder<TActorSystemSetup> setup2 = BuildActorSystemSetup(2, *counters, nameserverTable, interconnect); - TIntrusivePtr<TDsProxyNodeMon> dsProxyNodeMon(new TDsProxyNodeMon(counters, true)); + TIntrusivePtr<TDsProxyNodeMon> dsProxyNodeMon(new TDsProxyNodeMon(counters, true)); TDsProxyPerPoolCounters perPoolCounters(counters); TIntrusivePtr<TStoragePoolCounters> storagePoolCounters = perPoolCounters.GetPoolCounters("pool_name"); std::unique_ptr<IActor> proxyActor{CreateBlobStorageGroupProxyConfigured(TIntrusivePtr(bsInfo), false, @@ -4225,7 +4225,7 @@ public: pDiskConfig->GetDriveDataSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; pDiskConfig->WriteCacheSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; pDiskConfig->SectorMap = SectorMapByPath[filePath]; - pDiskConfig->EnableSectorEncryption = !pDiskConfig->SectorMap; + pDiskConfig->EnableSectorEncryption = !pDiskConfig->SectorMap; TActorSetupCmd pDiskSetup( CreatePDisk(pDiskConfig.Get(), mainKey, counters), diff --git a/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.cpp b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.cpp index baee27bcf3..054d5be665 100644 --- a/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.cpp +++ b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.cpp @@ -457,21 +457,21 @@ IBlobToDiskMapper *TBlobStorageGroupInfo::TTopology::CreateMapper(TBlobStorageGr case TBlobStorageGroupType::Erasure4Plus2Stripe: case TBlobStorageGroupType::Erasure3Plus2Stripe: case TBlobStorageGroupType::ErasureMirror3Plus2: - case TBlobStorageGroupType::Erasure4Plus3Block: - case TBlobStorageGroupType::Erasure4Plus3Stripe: - case TBlobStorageGroupType::Erasure3Plus3Block: - case TBlobStorageGroupType::Erasure3Plus3Stripe: - case TBlobStorageGroupType::Erasure2Plus3Block: - case TBlobStorageGroupType::Erasure2Plus3Stripe: - case TBlobStorageGroupType::Erasure2Plus2Block: - case TBlobStorageGroupType::Erasure2Plus2Stripe: + case TBlobStorageGroupType::Erasure4Plus3Block: + case TBlobStorageGroupType::Erasure4Plus3Stripe: + case TBlobStorageGroupType::Erasure3Plus3Block: + case TBlobStorageGroupType::Erasure3Plus3Stripe: + case TBlobStorageGroupType::Erasure2Plus3Block: + case TBlobStorageGroupType::Erasure2Plus3Stripe: + case TBlobStorageGroupType::Erasure2Plus2Block: + case TBlobStorageGroupType::Erasure2Plus2Stripe: case TBlobStorageGroupType::ErasureMirror3of4: return IBlobToDiskMapper::CreateBasicMapper(topology); case TBlobStorageGroupType::ErasureMirror3dc: return IBlobToDiskMapper::CreateMirror3dcMapper(topology); - default: + default: Y_FAIL("unexpected erasure type 0x%08" PRIx32, static_cast<ui32>(gtype.GetErasure())); } @@ -489,14 +489,14 @@ TBlobStorageGroupInfo::IQuorumChecker *TBlobStorageGroupInfo::TTopology::CreateQ case TBlobStorageGroupType::Erasure4Plus2Stripe: case TBlobStorageGroupType::Erasure3Plus2Stripe: case TBlobStorageGroupType::ErasureMirror3Plus2: - case TBlobStorageGroupType::Erasure4Plus3Block: - case TBlobStorageGroupType::Erasure4Plus3Stripe: - case TBlobStorageGroupType::Erasure3Plus3Block: - case TBlobStorageGroupType::Erasure3Plus3Stripe: - case TBlobStorageGroupType::Erasure2Plus3Block: - case TBlobStorageGroupType::Erasure2Plus3Stripe: - case TBlobStorageGroupType::Erasure2Plus2Block: - case TBlobStorageGroupType::Erasure2Plus2Stripe: + case TBlobStorageGroupType::Erasure4Plus3Block: + case TBlobStorageGroupType::Erasure4Plus3Stripe: + case TBlobStorageGroupType::Erasure3Plus3Block: + case TBlobStorageGroupType::Erasure3Plus3Stripe: + case TBlobStorageGroupType::Erasure2Plus3Block: + case TBlobStorageGroupType::Erasure2Plus3Stripe: + case TBlobStorageGroupType::Erasure2Plus2Block: + case TBlobStorageGroupType::Erasure2Plus2Stripe: return new TQuorumCheckerOrdinary(topology); case TBlobStorageGroupType::ErasureMirror3dc: @@ -505,7 +505,7 @@ TBlobStorageGroupInfo::IQuorumChecker *TBlobStorageGroupInfo::TTopology::CreateQ case TBlobStorageGroupType::ErasureMirror3of4: return new TQuorumCheckerMirror3of4(topology); - default: + default: Y_FAIL("unexpected erasure type 0x%08" PRIx32, static_cast<ui32>(topology->GType.GetErasure())); } @@ -593,7 +593,7 @@ TBlobStorageGroupInfo::TBlobStorageGroupInfo(std::shared_ptr<TTopology> topology , Dynamic(std::move(dyn)) , AcceptedScope(acceptedScope) , StoragePoolName(std::move(storagePoolName)) - , DeviceType(deviceType) + , DeviceType(deviceType) {} TBlobStorageGroupInfo::TBlobStorageGroupInfo(TTopology&& topology, TDynamicInfo&& dyn, TString storagePoolName, @@ -748,10 +748,10 @@ const TBlobStorageGroupInfo::IQuorumChecker& TBlobStorageGroupInfo::GetQuorumChe return Topology->GetQuorumChecker(); } -TVDiskID TBlobStorageGroupInfo::CreateVDiskID(const TVDiskIdShort &id) const { - return TVDiskID(GroupID, GroupGeneration, id.FailRealm, id.FailDomain, id.VDisk); -} - +TVDiskID TBlobStorageGroupInfo::CreateVDiskID(const TVDiskIdShort &id) const { + return TVDiskID(GroupID, GroupGeneration, id.FailRealm, id.FailDomain, id.VDisk); +} + TString TBlobStorageGroupInfo::BlobStateToString(EBlobState state) { switch (state) { case EBS_DISINTEGRATED: diff --git a/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.h b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.h index f8418ff456..e2d1445be4 100644 --- a/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.h +++ b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.h @@ -29,8 +29,8 @@ namespace NBlobMapper { } // NBlobMapper static constexpr ui8 MaxHandoffNodes = 6; -static constexpr ui8 MaxNodesPerBlob = 10; -static constexpr ui8 MaxTotalPartCount = 7; +static constexpr ui8 MaxNodesPerBlob = 10; +static constexpr ui8 MaxTotalPartCount = 7; static constexpr ui8 MaxVDisksInGroup = 32; // mapper interface forward declaration @@ -312,7 +312,7 @@ public: TBlobStorageGroupInfo(const TIntrusivePtr<TBlobStorageGroupInfo>& info, const TVDiskID& vdiskId, const TActorId& actorId); ~TBlobStorageGroupInfo(); - + static TIntrusivePtr<TBlobStorageGroupInfo> Parse(const NKikimrBlobStorage::TGroupInfo& group, const TEncryptionKey *key, IOutputStream *err); @@ -324,12 +324,12 @@ public: return StoragePoolName ? StoragePoolName : "static"; } - TPDiskCategory::EDeviceType GetDeviceType() const { - return DeviceType; - } - - TVDiskID CreateVDiskID(const TVDiskIdShort &id) const; - + TPDiskCategory::EDeviceType GetDeviceType() const { + return DeviceType; + } + + TVDiskID CreateVDiskID(const TVDiskIdShort &id) const; + static TString BlobStateToString(EBlobState); EBlobState BlobState(ui32 effectiveReplicas, ui32 errorDomains) const; void PickSubgroup(ui32 hash, TVDiskIds *outVDisk, TServiceIds *outServiceIds) const; @@ -428,7 +428,7 @@ private: // access control TMaybe<TKikimrScopeId> AcceptedScope; TString StoragePoolName; - TPDiskCategory::EDeviceType DeviceType = TPDiskCategory::DEVICE_TYPE_UNKNOWN; + TPDiskCategory::EDeviceType DeviceType = TPDiskCategory::DEVICE_TYPE_UNKNOWN; }; // physical fail domain description @@ -465,44 +465,44 @@ struct TFailDomain { template<> inline void Out<NKikimr::TBlobStorageGroupInfo::EEncryptionMode>(IOutputStream& o, NKikimr::TBlobStorageGroupInfo::EEncryptionMode e) { - using E = NKikimr::TBlobStorageGroupInfo::EEncryptionMode; - switch (e) { - case E::EEM_NONE: - o << "NONE"; - break; - case E::EEM_ENC_V1: - o << "ENC_V1"; - break; - } -} - + using E = NKikimr::TBlobStorageGroupInfo::EEncryptionMode; + switch (e) { + case E::EEM_NONE: + o << "NONE"; + break; + case E::EEM_ENC_V1: + o << "ENC_V1"; + break; + } +} + template<> inline void Out<NKikimr::TBlobStorageGroupInfo::ELifeCyclePhase>(IOutputStream& o, NKikimr::TBlobStorageGroupInfo::ELifeCyclePhase e) { - using E = NKikimr::TBlobStorageGroupInfo::ELifeCyclePhase; - switch (e) { - case E::ELCP_INITIAL: - o << "INITIAL"; - break; - case E::ELCP_PROPOSE: - o << "PROPOSE"; - break; - case E::ELCP_IN_TRANSITION: - o << "IN_TRANSITION"; - break; - case E::ELCP_IN_USE: - o << "IN_USE"; - break; - case E::ELCP_KEY_CRC_ERROR: - o << "KEY_CRC_ERROR"; - break; - case E::ELCP_KEY_VERSION_ERROR: - o << "KEY_VERSION_ERROR"; - break; - case E::ELCP_KEY_ID_ERROR: - o << "KEY_ID_ERROR"; - break; + using E = NKikimr::TBlobStorageGroupInfo::ELifeCyclePhase; + switch (e) { + case E::ELCP_INITIAL: + o << "INITIAL"; + break; + case E::ELCP_PROPOSE: + o << "PROPOSE"; + break; + case E::ELCP_IN_TRANSITION: + o << "IN_TRANSITION"; + break; + case E::ELCP_IN_USE: + o << "IN_USE"; + break; + case E::ELCP_KEY_CRC_ERROR: + o << "KEY_CRC_ERROR"; + break; + case E::ELCP_KEY_VERSION_ERROR: + o << "KEY_VERSION_ERROR"; + break; + case E::ELCP_KEY_ID_ERROR: + o << "KEY_ID_ERROR"; + break; case E::ELCP_KEY_NOT_LOADED: o << "KEY_NOT_LOADED"; break; - } -} + } +} diff --git a/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_sets.h b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_sets.h index 37862537cb..132e0cce85 100644 --- a/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_sets.h +++ b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_sets.h @@ -158,7 +158,7 @@ namespace NKikimr { // combine two sets and return the result friend TDerived operator |(const TDerived& x, const TDerived& y) { TDerived res(x); - return res |= y; + return res |= y; } // union of two nonintersecting subsets diff --git a/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_ut.cpp b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_ut.cpp index 4f071b631d..e93d36aaa2 100644 --- a/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_ut.cpp +++ b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_ut.cpp @@ -16,9 +16,9 @@ Y_UNIT_TEST_SUITE(TBlobStorageGroupInfoTest) { continue; } - const auto erasureType = TErasureType::EErasureSpecies(species); - const ui32 numFailDomains = TBlobStorageGroupType(erasureType).BlobSubgroupSize(); - TBlobStorageGroupInfo info(erasureType, disks, numFailDomains); + const auto erasureType = TErasureType::EErasureSpecies(species); + const ui32 numFailDomains = TBlobStorageGroupType(erasureType).BlobSubgroupSize(); + TBlobStorageGroupInfo info(erasureType, disks, numFailDomains); for (ui32 hashIdx = 0; hashIdx < 1000; ++hashIdx) { ui32 hash = 640480 + 13 * hashIdx; @@ -70,9 +70,9 @@ Y_UNIT_TEST_SUITE(TBlobStorageGroupInfoTest) { continue; } - const auto erasureType = TErasureType::EErasureSpecies(species); - const ui32 numFailDomains = TBlobStorageGroupType(erasureType).BlobSubgroupSize(); - TBlobStorageGroupInfo info(erasureType, 1, numFailDomains); + const auto erasureType = TErasureType::EErasureSpecies(species); + const ui32 numFailDomains = TBlobStorageGroupType(erasureType).BlobSubgroupSize(); + TBlobStorageGroupInfo info(erasureType, 1, numFailDomains); TBlobStorageGroupInfo::TVDiskIds ids; info.PickSubgroup(id.Hash(), &ids, nullptr); diff --git a/ydb/core/blobstorage/incrhuge/incrhuge_keeper.cpp b/ydb/core/blobstorage/incrhuge/incrhuge_keeper.cpp index 0b50693860..05aafde94d 100644 --- a/ydb/core/blobstorage/incrhuge/incrhuge_keeper.cpp +++ b/ydb/core/blobstorage/incrhuge/incrhuge_keeper.cpp @@ -70,8 +70,8 @@ namespace NKikimr { " BlocksInIndexSection# %" PRIu32, State.BlockSize, State.BlocksInChunk, State.BlocksInMinBlob, State.MaxBlobsPerChunk, State.BlocksInDataSection, State.BlocksInIndexSection); - auto chunksIt = msg->StartingPoints.find(TLogSignature::SignatureIncrHugeChunks); - auto deletesIt = msg->StartingPoints.find(TLogSignature::SignatureIncrHugeDeletes); + auto chunksIt = msg->StartingPoints.find(TLogSignature::SignatureIncrHugeChunks); + auto deletesIt = msg->StartingPoints.find(TLogSignature::SignatureIncrHugeDeletes); auto end = msg->StartingPoints.end(); Recovery.ApplyYardInit(msg->Status, chunksIt != end ? &chunksIt->second : nullptr, deletesIt != end ? &deletesIt->second : nullptr, ctx); diff --git a/ydb/core/blobstorage/incrhuge/incrhuge_keeper_log.cpp b/ydb/core/blobstorage/incrhuge/incrhuge_keeper_log.cpp index 5919a734dc..62da8c0f7e 100644 --- a/ydb/core/blobstorage/incrhuge/incrhuge_keeper_log.cpp +++ b/ydb/core/blobstorage/incrhuge/incrhuge_keeper_log.cpp @@ -442,7 +442,7 @@ namespace NKikimr { // issue log record TLsnSeg seg(item.Lsn, item.Lsn); ctx.Send(Keeper.State.Settings.PDiskActorId, new NPDisk::TEvLog(Keeper.State.PDiskParams->Owner, - Keeper.State.PDiskParams->OwnerRound, TLogSignature::SignatureIncrHugeChunks, commit, data, + Keeper.State.PDiskParams->OwnerRound, TLogSignature::SignatureIncrHugeChunks, commit, data, seg, Keeper.RegisterYardCallback(MakeCallback(std::move(callback))))); if (item.Entrypoint) { @@ -660,7 +660,7 @@ namespace NKikimr { // send record to logger TLsnSeg seg(item.Lsn, item.Lsn); ctx.Send(Keeper.State.Settings.PDiskActorId, new NPDisk::TEvLog(Keeper.State.PDiskParams->Owner, - Keeper.State.PDiskParams->OwnerRound, TLogSignature::SignatureIncrHugeDeletes, commit, data, + Keeper.State.PDiskParams->OwnerRound, TLogSignature::SignatureIncrHugeDeletes, commit, data, seg, Keeper.RegisterYardCallback(MakeCallback(std::move(callback))))); } diff --git a/ydb/core/blobstorage/incrhuge/incrhuge_keeper_recovery_read_log.cpp b/ydb/core/blobstorage/incrhuge/incrhuge_keeper_recovery_read_log.cpp index 297c6be4bb..b5499213ca 100644 --- a/ydb/core/blobstorage/incrhuge/incrhuge_keeper_recovery_read_log.cpp +++ b/ydb/core/blobstorage/incrhuge/incrhuge_keeper_recovery_read_log.cpp @@ -37,7 +37,7 @@ namespace NKikimr { void Handle(TEvents::TEvBootstrap::TPtr& ev, const TActorContext& ctx) { KeeperActorId = ev->Sender; - ctx.Send(PDiskActorId, new NPDisk::TEvReadLog(Owner, OwnerRound)); + ctx.Send(PDiskActorId, new NPDisk::TEvReadLog(Owner, OwnerRound)); } void Handle(NPDisk::TEvReadLogResult::TPtr& ev, const TActorContext& ctx) { @@ -61,7 +61,7 @@ namespace NKikimr { maxLsn = item.Lsn; switch (item.Signature) { - case TLogSignature::SignatureIncrHugeChunks: + case TLogSignature::SignatureIncrHugeChunks: if (!ChunksEntrypointLsn || item.Lsn >= *ChunksEntrypointLsn) { NKikimrVDiskData::TIncrHugeChunks record; bool status = record.ParseFromString(item.Data); @@ -70,7 +70,7 @@ namespace NKikimr { } break; - case TLogSignature::SignatureIncrHugeDeletes: + case TLogSignature::SignatureIncrHugeDeletes: if (!DeletesEntrypointLsn || item.Lsn >= *DeletesEntrypointLsn) { NKikimrVDiskData::TIncrHugeDelete record; bool status = record.ParseFromString(item.Data); @@ -104,7 +104,7 @@ namespace NKikimr { break; default: - Y_FAIL_S("unexpected log record " << item.Signature.ToString()); + Y_FAIL_S("unexpected log record " << item.Signature.ToString()); } } diff --git a/ydb/core/blobstorage/incrhuge/ut/incrhuge_basic_ut.cpp b/ydb/core/blobstorage/incrhuge/ut/incrhuge_basic_ut.cpp index 9dceb8d91f..59d3cb999c 100644 --- a/ydb/core/blobstorage/incrhuge/ut/incrhuge_basic_ut.cpp +++ b/ydb/core/blobstorage/incrhuge/ut/incrhuge_basic_ut.cpp @@ -34,7 +34,7 @@ public: std::unique_ptr<TActorSystem> ActorSystem; TTempDir TempDir; std::unique_ptr<TAppData> AppData; - std::shared_ptr<NPDisk::IIoContextFactory> IoContext; + std::shared_ptr<NPDisk::IIoContextFactory> IoContext; void Setup(bool format = true, ui32 counter = 0, TManualEvent *event = nullptr, ui32 numChunks = 1000, ui32 chunkSize = 16 << 20) { @@ -115,8 +115,8 @@ public: NActors::TActorSetupCmd loggerActorCmd{loggerActor, NActors::TMailboxType::Simple, 2}; setup->LocalServices.emplace_back(loggerActorId, loggerActorCmd); AppData.reset(new TAppData(0, 1, 2, 1, TMap<TString, ui32>(), nullptr, nullptr, nullptr, nullptr)); - IoContext = std::make_shared<NPDisk::TIoContextFactoryOSS>(); - AppData->IoContextFactory = IoContext.get(); + IoContext = std::make_shared<NPDisk::TIoContextFactoryOSS>(); + AppData->IoContextFactory = IoContext.get(); ActorSystem.reset(new TActorSystem{setup, AppData.get(), logSettings}); } diff --git a/ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.cpp b/ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.cpp index ae6fe8ef35..0f6a440f57 100644 --- a/ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.cpp +++ b/ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.cpp @@ -26,26 +26,26 @@ void TRequestTypeField::ToString(ui32 value, TString *out) { } }} - -namespace NKikimr { - -void TBlobPutTactics::ToString(ui64 value, TString *out) { - *out = TEvBlobStorage::TEvPut::TacticName(TEvBlobStorage::TEvPut::ETactic(value)); -} - -void TEventTypeField::ToString(ui64 value, TString* out) { -#define CASE(EVENT) case TEvBlobStorage::EVENT: *out = #EVENT; break; - switch(TEvBlobStorage::EEv(value)) { + +namespace NKikimr { + +void TBlobPutTactics::ToString(ui64 value, TString *out) { + *out = TEvBlobStorage::TEvPut::TacticName(TEvBlobStorage::TEvPut::ETactic(value)); +} + +void TEventTypeField::ToString(ui64 value, TString* out) { +#define CASE(EVENT) case TEvBlobStorage::EVENT: *out = #EVENT; break; + switch(TEvBlobStorage::EEv(value)) { CASE(EvPatch); - CASE(EvPut); - CASE(EvVPut); - CASE(EvGet); - CASE(EvVGet); - CASE(EvDiscover); - default: *out = "Unknown"; - } - *out += "(" + ::ToString(value) + ")"; -#undef CASE -} - -} + CASE(EvPut); + CASE(EvVPut); + CASE(EvGet); + CASE(EvVGet); + CASE(EvDiscover); + default: *out = "Unknown"; + } + *out += "(" + ::ToString(value) + ")"; +#undef CASE +} + +} diff --git a/ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.h b/ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.h index 3a05831c29..7e20a7bd81 100644 --- a/ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.h +++ b/ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.h @@ -1,7 +1,7 @@ #pragma once //#include <ydb/core/protos/base.pb.h> - + #include <library/cpp/lwtrace/all.h> // Helper class for printing pdisk id in the same was as it done for counters @@ -31,30 +31,30 @@ struct TRequestTypeField { }} -namespace NKikimr { - -struct TBlobPutTactics { - typedef ui64 TStoreType; +namespace NKikimr { + +struct TBlobPutTactics { + typedef ui64 TStoreType; typedef ui64 TFuncParam; - static void ToString(ui64 value, TString* out); + static void ToString(ui64 value, TString* out); static ui64 ToStoreType(ui64 value) { return value; } -}; - -struct TEventTypeField { - typedef ui64 TStoreType; +}; + +struct TEventTypeField { + typedef ui64 TStoreType; typedef ui64 TFuncParam; - static void ToString(ui64 value, TString* out); + static void ToString(ui64 value, TString* out); static ui64 ToStoreType(ui64 value) { return value; } -}; - -} - +}; + +} + #define BLOBSTORAGE_PROVIDER(PROBE, EVENT, GROUPS, TYPES, NAMES) \ PROBE(DSProxyBatchedPutRequest, GROUPS("DSProxy"), \ TYPES(ui64, ui32), \ @@ -65,12 +65,12 @@ struct TEventTypeField { PROBE(ProxyPutBootstrapPart, GROUPS("Durations"), \ TYPES(ui64, double, double, ui64, double), \ NAMES("size", "waitMs", "splitMs", "splitCount", "splitElapsedMs")) \ - PROBE(DSProxyRequestDuration, GROUPS("DSProxyRequest", "DSProxy"), \ + PROBE(DSProxyRequestDuration, GROUPS("DSProxyRequest", "DSProxy"), \ TYPES(NKikimr::TEventTypeField, ui64, double, ui64, ui32, ui32, TString, bool), \ - NAMES("type", "size", "durationMs", "tabletId", "groupId", "channel", "handleClass", "isOk")) \ - PROBE(DSProxyVDiskRequestDuration, GROUPS("VDisk", "DSProxy"), \ + NAMES("type", "size", "durationMs", "tabletId", "groupId", "channel", "handleClass", "isOk")) \ + PROBE(DSProxyVDiskRequestDuration, GROUPS("VDisk", "DSProxy"), \ TYPES(NKikimr::TEventTypeField, ui64, ui64, ui32, ui32, ui32, double, double, double, double, TString, TString), \ - NAMES("type", "size", "tabletId", "groupId", "channel", "vdiskOrderNum", "startTime", "totalDurationMs", \ + NAMES("type", "size", "tabletId", "groupId", "channel", "vdiskOrderNum", "startTime", "totalDurationMs", \ "vdiskDurationMs", "transferDurationMs", "handleClass", "status")) \ PROBE(VDiskSkeletonFrontVMovedPatchRecieved, GROUPS("VDisk", "DSProxy"), \ TYPES(ui32, ui32, ui32, ui64, ui64), \ @@ -84,48 +84,48 @@ struct TEventTypeField { PROBE(VDiskSkeletonFrontVPatchXorDiffRecieved, GROUPS("VDisk", "DSProxy"), \ TYPES(ui32, ui32, ui32, ui64, ui64), \ NAMES("nodeId", "groupId", "vdiskOrderNum", "tabletId", "size")) \ - PROBE(VDiskSkeletonFrontVPutRecieved, GROUPS("VDisk", "DSProxy"), \ - TYPES(ui32, ui32, ui32, ui64, ui64), \ - NAMES("nodeId", "groupId", "vdiskOrderNum", "tabletId", "size")) \ + PROBE(VDiskSkeletonFrontVPutRecieved, GROUPS("VDisk", "DSProxy"), \ + TYPES(ui32, ui32, ui32, ui64, ui64), \ + NAMES("nodeId", "groupId", "vdiskOrderNum", "tabletId", "size")) \ PROBE(VDiskSkeletonFrontVMultiPutRecieved, GROUPS("VDisk", "DSProxy"), \ TYPES(ui32, ui32, ui32, ui64, ui64), \ NAMES("nodeId", "groupId", "vdiskOrderNum", "count", "size")) \ - PROBE(VDiskSkeletonVPutRecieved, GROUPS("VDisk", "DSProxy"), \ - TYPES(ui32, ui32, ui32, ui64, ui64), \ - NAMES("nodeId", "groupId", "vdiskOrderNum", "tabletId", "size")) \ + PROBE(VDiskSkeletonVPutRecieved, GROUPS("VDisk", "DSProxy"), \ + TYPES(ui32, ui32, ui32, ui64, ui64), \ + NAMES("nodeId", "groupId", "vdiskOrderNum", "tabletId", "size")) \ PROBE(VDiskSkeletonVMultiPutRecieved, GROUPS("VDisk", "DSProxy"), \ TYPES(ui32, ui32, ui32, ui64, ui64), \ NAMES("nodeId", "groupId", "vdiskOrderNum", "tabletId", "size")) \ - PROBE(VDiskRecoveryLogWriterVPutIsRecieved, GROUPS("VDisk"), \ - TYPES(ui32, ui64), \ - NAMES("owner", "lsn")) \ - PROBE(VDiskRecoveryLogWriterVPutIsSent, GROUPS("VDisk"), \ - TYPES(ui32, ui64), \ - NAMES("owner", "lsn")) \ + PROBE(VDiskRecoveryLogWriterVPutIsRecieved, GROUPS("VDisk"), \ + TYPES(ui32, ui64), \ + NAMES("owner", "lsn")) \ + PROBE(VDiskRecoveryLogWriterVPutIsSent, GROUPS("VDisk"), \ + TYPES(ui32, ui64), \ + NAMES("owner", "lsn")) \ PROBE(VDiskSkeletonRecordLogged, GROUPS("VDisk"), \ - TYPES(ui64), \ - NAMES("lsn")) \ - PROBE(DSProxyBlobPutTactics, GROUPS("DSProxyRequest", "DSProxy"), \ - TYPES(ui64, ui32, TString, NKikimr::TBlobPutTactics, TString), \ - NAMES("tabletId", "groupId", "blob", "tactics", "handleClass")) \ + TYPES(ui64), \ + NAMES("lsn")) \ + PROBE(DSProxyBlobPutTactics, GROUPS("DSProxyRequest", "DSProxy"), \ + TYPES(ui64, ui32, TString, NKikimr::TBlobPutTactics, TString), \ + NAMES("tabletId", "groupId", "blob", "tactics", "handleClass")) \ PROBE(DSProxyPutVPut, GROUPS("DSProxyRequest", "DSProxy"), \ TYPES(ui64, ui32, ui32, ui32, TString, NKikimr::TBlobPutTactics, TString, ui32, ui32, \ ui32, TString, double), \ NAMES("tabletId", "groupId", "channel", "partId", "blob", "tactics", "handleClass", "blobSize", "partSize", \ "vdiskOrderNum", "queueId", "predictedMs")) \ - PROBE(DSProxyPutVPutIsSent, GROUPS("DSProxyRequest", "DSProxy", "LWTrackStart"), \ - TYPES(ui64, ui32, ui32, ui32, TString, ui32), \ - NAMES("vdiskOrderNum", "groupId", "channel", "partId", "blob", "blobSize")) \ - PROBE(DSQueueVPutIsQueued, GROUPS("DSQueueRequest", "DSQueue"), \ - TYPES(ui32, TString, ui32, ui32, ui32), \ - NAMES("groupId", "blob", "channel", "partId", "blobSize")) \ - PROBE(DSQueueVPutIsSent, GROUPS("DSQueueRequest", "DSQueue"), \ - TYPES(double), \ - NAMES("inQueueMs")) \ - PROBE(DSQueueVPutResultRecieved, GROUPS("DSQueueRequest", "DSQueue"), \ - TYPES(double, ui32, bool), \ - NAMES("processingTimeMs", "size", "isDiscarded")) \ - PROBE(PDiskNewRequest, GROUPS("PDisk", "PDiskRequest", "LWTrackStart"), \ + PROBE(DSProxyPutVPutIsSent, GROUPS("DSProxyRequest", "DSProxy", "LWTrackStart"), \ + TYPES(ui64, ui32, ui32, ui32, TString, ui32), \ + NAMES("vdiskOrderNum", "groupId", "channel", "partId", "blob", "blobSize")) \ + PROBE(DSQueueVPutIsQueued, GROUPS("DSQueueRequest", "DSQueue"), \ + TYPES(ui32, TString, ui32, ui32, ui32), \ + NAMES("groupId", "blob", "channel", "partId", "blobSize")) \ + PROBE(DSQueueVPutIsSent, GROUPS("DSQueueRequest", "DSQueue"), \ + TYPES(double), \ + NAMES("inQueueMs")) \ + PROBE(DSQueueVPutResultRecieved, GROUPS("DSQueueRequest", "DSQueue"), \ + TYPES(double, ui32, bool), \ + NAMES("processingTimeMs", "size", "isDiscarded")) \ + PROBE(PDiskNewRequest, GROUPS("PDisk", "PDiskRequest", "LWTrackStart"), \ TYPES(TPDiskIdField, ui64, double, double, bool, bool, ui64, ui64, NKikimr::NPDisk::TRequestTypeField), \ NAMES("pdisk", "reqId", "creationTimeSec", "costMs", "isSensitive", "isFast", "owner", "priorityClass", "type")) \ PROBE(PDiskFairSchedulerPush, GROUPS("PDisk", "PDiskRequest"), \ @@ -138,18 +138,18 @@ struct TEventTypeField { TYPES(TPDiskIdField, ui64, double), \ NAMES("pdisk", "reqId", "deadlineSec")) \ PROBE(PDiskInputRequest, GROUPS("PDisk", "PDiskRequest"), \ - TYPES(TPDiskIdField, ui64, double, double, ui64, double, double, ui64, bool, ui64, ui64), \ - NAMES("pdisk", "reqId", "creationTimeSec", "costMs", "inputQLA", "inputQCA", "deadlineSec", "owner", "isFast", \ - "priorityClass", "inputQueueSize")) \ - PROBE(PDiskAddToScheduler, GROUPS("PDisk", "PDiskRequest"), \ - TYPES(TPDiskIdField, ui64, double, ui64, bool, ui64), \ - NAMES("pdisk", "reqId", "creationTimeSec", "owner", "isFast", "priorityClass")) \ - PROBE(PDiskRouteRequest, GROUPS("PDisk", "PDiskRequest"), \ - TYPES(TPDiskIdField, ui64, double, ui64, bool, ui64), \ - NAMES("pdisk", "reqId", "creationTimeSec", "owner", "isFast", "priorityClass")) \ - PROBE(PDiskLogWriteFlush, GROUPS("PDisk", "PDiskRequest"), \ - TYPES(TPDiskIdField, ui64, double, double, double, ui64, bool, ui64), \ - NAMES("pdisk", "reqId", "creationTimeSec", "costMs", "deadlineSec", "owner", "isFast", "priorityClass")) \ + TYPES(TPDiskIdField, ui64, double, double, ui64, double, double, ui64, bool, ui64, ui64), \ + NAMES("pdisk", "reqId", "creationTimeSec", "costMs", "inputQLA", "inputQCA", "deadlineSec", "owner", "isFast", \ + "priorityClass", "inputQueueSize")) \ + PROBE(PDiskAddToScheduler, GROUPS("PDisk", "PDiskRequest"), \ + TYPES(TPDiskIdField, ui64, double, ui64, bool, ui64), \ + NAMES("pdisk", "reqId", "creationTimeSec", "owner", "isFast", "priorityClass")) \ + PROBE(PDiskRouteRequest, GROUPS("PDisk", "PDiskRequest"), \ + TYPES(TPDiskIdField, ui64, double, ui64, bool, ui64), \ + NAMES("pdisk", "reqId", "creationTimeSec", "owner", "isFast", "priorityClass")) \ + PROBE(PDiskLogWriteFlush, GROUPS("PDisk", "PDiskRequest"), \ + TYPES(TPDiskIdField, ui64, double, double, double, ui64, bool, ui64), \ + NAMES("pdisk", "reqId", "creationTimeSec", "costMs", "deadlineSec", "owner", "isFast", "priorityClass")) \ PROBE(PDiskBurst, GROUPS("PDisk", "PDiskRequest"), \ TYPES(TPDiskIdField, ui64, double, bool, double, double), \ NAMES("pdisk", "reqId", "creationTimeSec", "isSensitive", "costMs", "burstMs")) \ @@ -190,24 +190,24 @@ struct TEventTypeField { PROBE(PDiskSchedulerSubStep, GROUPS("PDisk", "PDiskSchedulerStep", "PDiskSchedulerSubStep"), \ TYPES(TPDiskIdField, ui64, ui64, double, double), \ NAMES("pdisk", "schedStep", "schedSubStep", "subStepCost", "subStepCount")) \ - PROBE(PDiskSchedulerStartStep, GROUPS("PDisk", "PDiskSchedulerStep", "LWTrackStart"), \ + PROBE(PDiskSchedulerStartStep, GROUPS("PDisk", "PDiskSchedulerStep", "LWTrackStart"), \ TYPES(TPDiskIdField, ui64), \ NAMES("pdisk", "schedStep")) \ - PROBE(PDiskChunkReadPieceAddToScheduler, GROUPS("PDisk", "PDiskRequest"), \ - TYPES(TPDiskIdField, ui32, ui64, ui64), \ - NAMES("pdisk", "pieceIdx", "size", "offset")) \ - PROBE(PDiskChunkReadPieceSendToDevice, GROUPS("PDisk", "PDiskRequest"), \ - TYPES(TPDiskIdField, ui64), \ - NAMES("pdisk", "size")) \ - PROBE(PDiskChunkReadPieceComplete, GROUPS("PDisk", "PDiskRequest"), \ - TYPES(TPDiskIdField, ui64, ui64), \ - NAMES("pdisk", "size", "relativeOffset")) \ - PROBE(PDiskAddWritePieceToScheduler, GROUPS("PDisk", "PDiskRequest"), \ - TYPES(TPDiskIdField, ui64, double, ui64, bool, ui64, ui64), \ - NAMES("pdisk", "reqId", "creationTimeSec", "owner", "isFast", "priorityClass", "size")) \ - PROBE(PDiskChunkWritePieceSendToDevice, GROUPS("PDisk", "PDiskRequest"), \ - TYPES(TPDiskIdField, ui64, ui64, ui64, ui64), \ - NAMES("pdisk", "owner", "chunkIdx", "pieceOffset", "pieceSize")) \ + PROBE(PDiskChunkReadPieceAddToScheduler, GROUPS("PDisk", "PDiskRequest"), \ + TYPES(TPDiskIdField, ui32, ui64, ui64), \ + NAMES("pdisk", "pieceIdx", "size", "offset")) \ + PROBE(PDiskChunkReadPieceSendToDevice, GROUPS("PDisk", "PDiskRequest"), \ + TYPES(TPDiskIdField, ui64), \ + NAMES("pdisk", "size")) \ + PROBE(PDiskChunkReadPieceComplete, GROUPS("PDisk", "PDiskRequest"), \ + TYPES(TPDiskIdField, ui64, ui64), \ + NAMES("pdisk", "size", "relativeOffset")) \ + PROBE(PDiskAddWritePieceToScheduler, GROUPS("PDisk", "PDiskRequest"), \ + TYPES(TPDiskIdField, ui64, double, ui64, bool, ui64, ui64), \ + NAMES("pdisk", "reqId", "creationTimeSec", "owner", "isFast", "priorityClass", "size")) \ + PROBE(PDiskChunkWritePieceSendToDevice, GROUPS("PDisk", "PDiskRequest"), \ + TYPES(TPDiskIdField, ui64, ui64, ui64, ui64), \ + NAMES("pdisk", "owner", "chunkIdx", "pieceOffset", "pieceSize")) \ PROBE(PDiskLogWriteComplete, GROUPS("PDisk", "PDiskRequest"), \ TYPES(TPDiskIdField, ui64, double, double, double, double, double, double), \ NAMES("pdisk", "reqId", "creationTimeSec", "costMs", "responseTimeMs", "inputTimeMs", "scheduleTimeMs", "deviceTotalTimeMs")) \ @@ -218,17 +218,17 @@ struct TEventTypeField { TYPES(TPDiskIdField, ui64, double, ui64), \ NAMES("pdisk", "reqId", "responseTimeMs", "sizeBytes")) \ PROBE(PDiskDeviceReadDuration, GROUPS("PDisk"), \ - TYPES(TPDiskIdField, double, ui64), \ - NAMES("pdisk", "deviceTimeMs", "size")) \ + TYPES(TPDiskIdField, double, ui64), \ + NAMES("pdisk", "deviceTimeMs", "size")) \ PROBE(PDiskDeviceWriteDuration, GROUPS("PDisk"), \ - TYPES(TPDiskIdField, double, ui64), \ - NAMES("pdisk", "deviceTimeMs", "size")) \ + TYPES(TPDiskIdField, double, ui64), \ + NAMES("pdisk", "deviceTimeMs", "size")) \ PROBE(PDiskDeviceTrimDuration, GROUPS("PDisk"), \ TYPES(TPDiskIdField, double, ui64), \ NAMES("pdisk", "trimTimeMs", "trimOffset")) \ - PROBE(PDiskDeviceOperationSizeAndType, GROUPS("PDisk"), \ - TYPES(TPDiskIdField, ui64, ui64), \ - NAMES("pdisk", "operationSize", "operationType")) \ + PROBE(PDiskDeviceOperationSizeAndType, GROUPS("PDisk"), \ + TYPES(TPDiskIdField, ui64, ui64), \ + NAMES("pdisk", "operationSize", "operationType")) \ PROBE(PDiskMilliBatchSize, GROUPS("PDisk"), \ TYPES(TPDiskIdField, ui64, ui64, ui64, ui64), \ NAMES("pdisk", "milliBatchLogCost", "milliBatchNonLogCost", "milliBatchLogReqs", "milliBatchNonLogReqs")) \ @@ -239,11 +239,11 @@ struct TEventTypeField { TYPES(TPDiskIdField, ui64, ui64, ui64, ui64, ui64, ui64, ui64, ui64, ui64), \ NAMES("pdisk", "realTimeNs", "uncorrectedForsetiTimeNs", "correctedForsetiTimeNs", "timeCorrectionNs", \ "realDurationNs", "virtualDurationNs", "newForsetiTimeNs", "totalCostNs", "virtualDeadlineNs")) \ - PROBE(LoadActorEvChunkReadCreated, GROUPS("LoadActor", "PDiskEvent"), \ - TYPES(ui32, ui64, ui64), \ - NAMES("chunkIdx", "size", "offset")) \ - PROBE(PDiskUpdateCycleDetails, GROUPS("PDisk"), \ - TYPES(float, float, float, float, float), \ + PROBE(LoadActorEvChunkReadCreated, GROUPS("LoadActor", "PDiskEvent"), \ + TYPES(ui32, ui64, ui64), \ + NAMES("chunkIdx", "size", "offset")) \ + PROBE(PDiskUpdateCycleDetails, GROUPS("PDisk"), \ + TYPES(float, float, float, float, float), \ NAMES("entireUpdateMs", "inputQueueMs", "schedulingMs", "processingMs", "waitingMs")) \ PROBE(DSProxyGetEnqueue, GROUPS("DSProxy", "LWTrackStart"), TYPES(), NAMES()) \ PROBE(DSProxyGetBootstrap, GROUPS("DSProxy"), TYPES(), NAMES()) \ diff --git a/ydb/core/blobstorage/lwtrace_probes/ya.make b/ydb/core/blobstorage/lwtrace_probes/ya.make index 2273b610a5..412fe7a0d3 100644 --- a/ydb/core/blobstorage/lwtrace_probes/ya.make +++ b/ydb/core/blobstorage/lwtrace_probes/ya.make @@ -1,20 +1,20 @@ -LIBRARY() - -OWNER( - cthulhu - va-kuznecov - g:kikimr -) - -SRCS( - blobstorage_probes.cpp -) - -PEERDIR( - contrib/libs/grpc +LIBRARY() + +OWNER( + cthulhu + va-kuznecov + g:kikimr +) + +SRCS( + blobstorage_probes.cpp +) + +PEERDIR( + contrib/libs/grpc library/cpp/lwtrace/protos ydb/core/base ydb/core/protos -) - -END() +) + +END() diff --git a/ydb/core/blobstorage/nodewarden/blobstorage_node_warden_ut.cpp b/ydb/core/blobstorage/nodewarden/blobstorage_node_warden_ut.cpp index e352418c00..e3c74cce7b 100644 --- a/ydb/core/blobstorage/nodewarden/blobstorage_node_warden_ut.cpp +++ b/ydb/core/blobstorage/nodewarden/blobstorage_node_warden_ut.cpp @@ -83,7 +83,7 @@ void FormatPDiskRandomKeys(TString path, ui32 diskSize, ui32 chunkSize, ui64 gui NKikimr::FormatPDisk(path, diskSize, 4 << 10, chunkSize, guid, chunkKey, logKey, sysLogKey, NPDisk::YdbDefaultPDiskSequence, "Test", - false, false, sectorMap); + false, false, sectorMap); } void SetupLogging(TTestActorRuntime& runtime) { @@ -216,7 +216,7 @@ void SetupServices(TTestActorRuntime &runtime, TString extraPath, TIntrusivePtr< sectorMap->ForceSize(64ull << 30ull); - TString pDiskPath0 = TStringBuilder() << "SectorMap:" << baseDir << "pdisk_map"; + TString pDiskPath0 = TStringBuilder() << "SectorMap:" << baseDir << "pdisk_map"; nodeWardenConfig->ServiceSet.MutablePDisks(0)->SetPath(pDiskPath0); nodeWardenConfig->SectorMaps[pDiskPath0] = sectorMap; @@ -228,7 +228,7 @@ void SetupServices(TTestActorRuntime &runtime, TString extraPath, TIntrusivePtr< NPDisk::YdbDefaultPDiskSequence, "", false, false, sectorMap); - // Magic path from testlib, do not change it + // Magic path from testlib, do not change it TString pDiskPath1 = TStringBuilder() << baseDir << "pdisk_1.dat"; TIntrusivePtr<NPDisk::TSectorMap> sectorMap1(new NPDisk::TSectorMap()); sectorMap1->ForceSize(64ull << 30ull); @@ -645,7 +645,7 @@ Y_UNIT_TEST_SUITE(TBlobStorageWardenTest) { TTempDir tempDir; TTestBasicRuntime runtime(2, false); TIntrusivePtr<NPDisk::TSectorMap> sectorMap(new NPDisk::TSectorMap(32ull << 30ull)); - Setup(runtime, "SectorMap:new_pdisk", sectorMap); + Setup(runtime, "SectorMap:new_pdisk", sectorMap); TActorId sender0 = runtime.AllocateEdgeActor(0); // TActorId sender1 = runtime.AllocateEdgeActor(1); @@ -673,32 +673,32 @@ Y_UNIT_TEST_SUITE(TBlobStorageWardenTest) { TAutoPtr<IEventHandle> handle; if (auto initResult = runtime.GrabEdgeEventRethrow<NPDisk::TEvYardInitResult>(handle, TDuration::Seconds(1))) { UNIT_ASSERT(initResult); - UNIT_ASSERT(initResult->Status == NKikimrProto::CORRUPTED); + UNIT_ASSERT(initResult->Status == NKikimrProto::CORRUPTED); break; } } } - void TestHttpMonForPath(const TString& path) { - TTestBasicRuntime runtime(1, false); - Setup(runtime, "", nullptr); - auto edge = runtime.AllocateEdgeActor(0); - TActorId nodeWarden = MakeBlobStorageNodeWardenID(edge.NodeId()); - THttpRequest HttpRequest; - NMonitoring::TMonService2HttpRequest monService2HttpRequest(nullptr, &HttpRequest, nullptr, nullptr, path, - nullptr); - runtime.Send(new IEventHandle(nodeWarden, edge, new NMon::TEvHttpInfo(monService2HttpRequest)), 0); - auto httpInfoRes = runtime.GrabEdgeEventRethrow<NMon::TEvHttpInfoRes>(edge, TDuration::Seconds(1)); - UNIT_ASSERT(httpInfoRes && httpInfoRes->Get()); - TStringStream out; - httpInfoRes->Get()->Output(out); - UNIT_ASSERT(out.Size()); - } - - CUSTOM_UNIT_TEST(TestHttpMonPage) { - TestHttpMonForPath(""); - TestHttpMonForPath("/json/groups"); - } + void TestHttpMonForPath(const TString& path) { + TTestBasicRuntime runtime(1, false); + Setup(runtime, "", nullptr); + auto edge = runtime.AllocateEdgeActor(0); + TActorId nodeWarden = MakeBlobStorageNodeWardenID(edge.NodeId()); + THttpRequest HttpRequest; + NMonitoring::TMonService2HttpRequest monService2HttpRequest(nullptr, &HttpRequest, nullptr, nullptr, path, + nullptr); + runtime.Send(new IEventHandle(nodeWarden, edge, new NMon::TEvHttpInfo(monService2HttpRequest)), 0); + auto httpInfoRes = runtime.GrabEdgeEventRethrow<NMon::TEvHttpInfoRes>(edge, TDuration::Seconds(1)); + UNIT_ASSERT(httpInfoRes && httpInfoRes->Get()); + TStringStream out; + httpInfoRes->Get()->Output(out); + UNIT_ASSERT(out.Size()); + } + + CUSTOM_UNIT_TEST(TestHttpMonPage) { + TestHttpMonForPath(""); + TestHttpMonForPath("/json/groups"); + } } } // namespace NBlobStorageNodeWardenTest diff --git a/ydb/core/blobstorage/nodewarden/node_warden.h b/ydb/core/blobstorage/nodewarden/node_warden.h index 1317686bac..4d91550869 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden.h +++ b/ydb/core/blobstorage/nodewarden/node_warden.h @@ -6,7 +6,7 @@ #include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_factory.h> #include <ydb/core/protos/config.pb.h> #include <ydb/library/pdisk_io/sector_map.h> - + #include <util/folder/path.h> namespace NKikimr { @@ -28,7 +28,7 @@ namespace NKikimr { std::unique_ptr<ICacheAccessor> CacheAccessor; TEncryptionKey TenantKey; TEncryptionKey StaticKey; - TEncryptionKey PDiskKey; + TEncryptionKey PDiskKey; bool CachePDisks = false; bool CacheVDisks = false; bool EnableVDiskCooldownTimeout = false; @@ -41,17 +41,17 @@ namespace NKikimr { , AllVDiskKinds(new TAllVDiskKinds) , AllDriveModels(new NPDisk::TDriveModelDb) {} - - NPDisk::TKey CreatePDiskKey() const { - if (PDiskKey) { - const ui8 *key; - ui32 keySize; - PDiskKey.Key.GetKeyBytes(&key, &keySize); - return *(ui64*)key; - } else { + + NPDisk::TKey CreatePDiskKey() const { + if (PDiskKey) { + const ui8 *key; + ui32 keySize; + PDiskKey.Key.GetKeyBytes(&key, &keySize); + return *(ui64*)key; + } else { return NPDisk::YdbDefaultPDiskSequence; - } - } + } + } bool IsCacheEnabled() const { return static_cast<bool>(CacheAccessor); @@ -62,7 +62,7 @@ namespace NKikimr { bool ObtainTenantKey(TEncryptionKey *key, const NKikimrProto::TKeyConfig& keyConfig); bool ObtainStaticKey(TEncryptionKey *key); - bool ObtainPDiskKey(TEncryptionKey *key, const NKikimrProto::TKeyConfig& keyConfig); + bool ObtainPDiskKey(TEncryptionKey *key, const NKikimrProto::TKeyConfig& keyConfig); std::unique_ptr<ICacheAccessor> CreateFileCacheAccessor(const TFsPath& cacheFilePath); diff --git a/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp b/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp index ec53bf9e3e..47093a108d 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp +++ b/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp @@ -1,33 +1,33 @@ #include "node_warden_impl.h" #include <ydb/library/pdisk_io/file_params.h> - + using namespace NKikimr; using namespace NStorage; -TVector<NPDisk::TDriveData> TNodeWarden::ListLocalDrives() { - TVector<NPDisk::TDriveData> drives = ListDevicesWithPartlabel(); - - try { - TString raw = TFileInput(MockDevicesPath).ReadAll(); - if (google::protobuf::TextFormat::ParseFromString(raw, &MockDevicesConfig)) { - for (const auto& device : MockDevicesConfig.GetDevices()) { - drives.emplace_back(device); - } - } else { +TVector<NPDisk::TDriveData> TNodeWarden::ListLocalDrives() { + TVector<NPDisk::TDriveData> drives = ListDevicesWithPartlabel(); + + try { + TString raw = TFileInput(MockDevicesPath).ReadAll(); + if (google::protobuf::TextFormat::ParseFromString(raw, &MockDevicesConfig)) { + for (const auto& device : MockDevicesConfig.GetDevices()) { + drives.emplace_back(device); + } + } else { STLOG(PRI_WARN, BS_NODE, NW01, "Error parsing mock devices protobuf from file", (Path, MockDevicesPath)); - } - } catch (...) { - STLOG(PRI_INFO, BS_NODE, NW90, "Unable to find mock devices file", (Path, MockDevicesPath)); - } - - std::sort(drives.begin(), drives.end(), [] (const auto& lhs, const auto& rhs) { - return lhs.Path < rhs.Path; - }); - - return drives; -} - + } + } catch (...) { + STLOG(PRI_INFO, BS_NODE, NW90, "Unable to find mock devices file", (Path, MockDevicesPath)); + } + + std::sort(drives.begin(), drives.end(), [] (const auto& lhs, const auto& rhs) { + return lhs.Path < rhs.Path; + }); + + return drives; +} + void TNodeWarden::StartInvalidGroupProxy() { const ui32 groupId = Max<ui32>(); STLOG(PRI_DEBUG, BS_NODE, NW11, "StartInvalidGroupProxy", (GroupId, groupId)); @@ -56,20 +56,20 @@ void TNodeWarden::Bootstrap() { WhiteboardId = NNodeWhiteboard::MakeNodeWhiteboardServiceId(LocalNodeId); Become(&TThis::StateOnline, TDuration::Seconds(10), new TEvPrivate::TEvSendDiskMetrics()); - Schedule(TDuration::Seconds(10), new TEvPrivate::TEvUpdateNodeDrives()); + Schedule(TDuration::Seconds(10), new TEvPrivate::TEvUpdateNodeDrives()); NLwTraceMonPage::ProbeRegistry().AddProbesList(LWTRACE_GET_PROBES(BLOBSTORAGE_PROVIDER)); - TActorSystem *actorSystem = TlsActivationContext->ExecutorThread.ActorSystem; - if (auto mon = AppData()->Mon) { - - TString name = "NodeWarden"; - TString path = ::to_lower(name); - NMonitoring::TIndexMonPage *actorsMonPage = mon->RegisterIndexPage("actors", "Actors"); - - mon->RegisterActorPage(actorsMonPage, path, name, false, actorSystem, SelfId()); - } - + TActorSystem *actorSystem = TlsActivationContext->ExecutorThread.ActorSystem; + if (auto mon = AppData()->Mon) { + + TString name = "NodeWarden"; + TString path = ::to_lower(name); + NMonitoring::TIndexMonPage *actorsMonPage = mon->RegisterIndexPage("actors", "Actors"); + + mon->RegisterActorPage(actorsMonPage, path, name, false, actorSystem, SelfId()); + } + DsProxyNodeMon = new TDsProxyNodeMon(AppData()->Counters, true); DsProxyNodeMonActor = Register(CreateDsProxyNodeMon(DsProxyNodeMon)); DsProxyPerPoolCounters = new TDsProxyPerPoolCounters(AppData()->Counters); @@ -268,17 +268,17 @@ void TNodeWarden::SendVDiskReport(TVSlotId vslotId, const TVDiskID &vDiskId, NKi SendToController(std::move(report)); } -void TNodeWarden::Handle(TEvBlobStorage::TEvAskRestartPDisk::TPtr ev) { - const auto id = ev->Get()->PDiskId; +void TNodeWarden::Handle(TEvBlobStorage::TEvAskRestartPDisk::TPtr ev) { + const auto id = ev->Get()->PDiskId; if (auto it = LocalPDisks.find(TPDiskKey{LocalNodeId, id}); it != LocalPDisks.end()) { - RestartLocalPDiskStart(id, CreatePDiskConfig(it->second.Record)); - } -} - -void TNodeWarden::Handle(TEvBlobStorage::TEvRestartPDiskResult::TPtr ev) { - RestartLocalPDiskFinish(ev->Get()->PDiskId, ev->Get()->Status); -} - + RestartLocalPDiskStart(id, CreatePDiskConfig(it->second.Record)); + } +} + +void TNodeWarden::Handle(TEvBlobStorage::TEvRestartPDiskResult::TPtr ev) { + RestartLocalPDiskFinish(ev->Get()->PDiskId, ev->Get()->Status); +} + void TNodeWarden::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatus::TPtr ev) { STLOG(PRI_TRACE, BS_NODE, NW38, "Handle(TEvBlobStorage::TEvControllerUpdateDiskStatus)"); @@ -310,7 +310,7 @@ void TNodeWarden::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatus::TPtr ev) vdisk.VDiskMetrics.emplace(m); VDisksWithUnreportedMetrics.PushBack(&vdisk); } - } + } } for (const NKikimrBlobStorage::TPDiskMetrics& m : record.GetPDisksMetrics()) { @@ -327,7 +327,7 @@ void TNodeWarden::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatus::TPtr ev) pdisk.PDiskMetrics.emplace(m); PDisksWithUnreportedMetrics.PushBack(&pdisk); } - } + } } } @@ -338,22 +338,22 @@ void TNodeWarden::Handle(TEvPrivate::TEvSendDiskMetrics::TPtr&) { Schedule(TDuration::Seconds(10), new TEvPrivate::TEvSendDiskMetrics()); } -void TNodeWarden::Handle(TEvPrivate::TEvUpdateNodeDrives::TPtr&) { +void TNodeWarden::Handle(TEvPrivate::TEvUpdateNodeDrives::TPtr&) { STLOG(PRI_TRACE, BS_NODE, NW88, "Handle(TEvPrivate::UpdateNodeDrives)"); - EnqueueSyncOp([this] (const TActorContext&) { - auto drives = ListLocalDrives(); - - return [this, drives = std::move(drives)] () { - if (drives != WorkingLocalDrives) { + EnqueueSyncOp([this] (const TActorContext&) { + auto drives = ListLocalDrives(); + + return [this, drives = std::move(drives)] () { + if (drives != WorkingLocalDrives) { SendToController(std::make_unique<TEvBlobStorage::TEvControllerUpdateNodeDrives>(LocalNodeId, drives)); WorkingLocalDrives = std::move(drives); - } - }; - }); - Schedule(TDuration::Seconds(10), new TEvPrivate::TEvUpdateNodeDrives()); -} - - + } + }; + }); + Schedule(TDuration::Seconds(10), new TEvPrivate::TEvUpdateNodeDrives()); +} + + void TNodeWarden::SendDiskMetrics(bool reportMetrics) { STLOG(PRI_TRACE, BS_NODE, NW45, "SendDiskMetrics", (ReportMetrics, reportMetrics)); @@ -406,79 +406,79 @@ void TNodeWarden::FillInVDiskStatus(google::protobuf::RepeatedPtrField<NKikimrBl } } -bool ObtainKey(TEncryptionKey *key, const NKikimrProto::TKeyRecord& record) { - TString containerPath = record.GetContainerPath(); - TString pin = record.GetPin(); - TString keyId = record.GetId(); - ui64 version = record.GetVersion(); - - TFileHandle containerFile(containerPath, OpenExisting | RdOnly); - if (!containerFile.IsOpen()) { - Cerr << "Can't open key container file# \"" << EscapeC(containerPath) << "\", make sure the file actually exists." << Endl; - return false; - } - ui64 length = containerFile.GetLength(); - if (length == 0) { - Cerr << "Key container file# \"" << EscapeC(containerPath) << "\" size is 0, make sure the file actually contains the key!" << Endl; - return false; - } - TString data = TString::Uninitialized(length); - size_t bytesRead = containerFile.Read(data.Detach(), length); - if (bytesRead != length) { - Cerr << "Key container file# \"" << EscapeC(containerPath) << "\" could not be read! Expected length# " << length - << " bytesRead# " << bytesRead << ", make sure the file stays put!" << Endl; - return false; - } - THashCalculator hasher; - if (pin.size() == 0) { - pin = "EmptyPin"; - } - - ui8 *keyBytes = 0; - ui32 keySize = 0; - key->Key.MutableKeyBytes(&keyBytes, &keySize); - Y_VERIFY(keySize == 4 * sizeof(ui64)); - ui64 *p = (ui64*)keyBytes; - - hasher.SetKey((const ui8*)pin.data(), pin.size()); - hasher.Hash(data.Detach(), data.size()); - p[0] = hasher.GetHashResult(&p[1]); - hasher.Clear(); - hasher.SetKey((const ui8*)pin.data(), pin.size()); - TString saltBefore = "SaltBefore"; - TString saltAfter = "SaltAfter"; - hasher.Hash(saltBefore.data(), saltBefore.size()); - hasher.Hash(data.Detach(), data.size()); - hasher.Hash(saltAfter.data(), saltAfter.size()); - p[2] = hasher.GetHashResult(&p[3]); - - key->Version = version; - key->Id = keyId; - return true; -} - +bool ObtainKey(TEncryptionKey *key, const NKikimrProto::TKeyRecord& record) { + TString containerPath = record.GetContainerPath(); + TString pin = record.GetPin(); + TString keyId = record.GetId(); + ui64 version = record.GetVersion(); + + TFileHandle containerFile(containerPath, OpenExisting | RdOnly); + if (!containerFile.IsOpen()) { + Cerr << "Can't open key container file# \"" << EscapeC(containerPath) << "\", make sure the file actually exists." << Endl; + return false; + } + ui64 length = containerFile.GetLength(); + if (length == 0) { + Cerr << "Key container file# \"" << EscapeC(containerPath) << "\" size is 0, make sure the file actually contains the key!" << Endl; + return false; + } + TString data = TString::Uninitialized(length); + size_t bytesRead = containerFile.Read(data.Detach(), length); + if (bytesRead != length) { + Cerr << "Key container file# \"" << EscapeC(containerPath) << "\" could not be read! Expected length# " << length + << " bytesRead# " << bytesRead << ", make sure the file stays put!" << Endl; + return false; + } + THashCalculator hasher; + if (pin.size() == 0) { + pin = "EmptyPin"; + } + + ui8 *keyBytes = 0; + ui32 keySize = 0; + key->Key.MutableKeyBytes(&keyBytes, &keySize); + Y_VERIFY(keySize == 4 * sizeof(ui64)); + ui64 *p = (ui64*)keyBytes; + + hasher.SetKey((const ui8*)pin.data(), pin.size()); + hasher.Hash(data.Detach(), data.size()); + p[0] = hasher.GetHashResult(&p[1]); + hasher.Clear(); + hasher.SetKey((const ui8*)pin.data(), pin.size()); + TString saltBefore = "SaltBefore"; + TString saltAfter = "SaltAfter"; + hasher.Hash(saltBefore.data(), saltBefore.size()); + hasher.Hash(data.Detach(), data.size()); + hasher.Hash(saltAfter.data(), saltAfter.size()); + p[2] = hasher.GetHashResult(&p[3]); + + key->Version = version; + key->Id = keyId; + return true; +} + bool NKikimr::ObtainTenantKey(TEncryptionKey *key, const NKikimrProto::TKeyConfig& keyConfig) { if (keyConfig.KeysSize()) { // TODO(cthulhu): process muliple keys here. auto &record = keyConfig.GetKeys(0); - return ObtainKey(key, record); - } else { - Cerr << "No Keys in KeyConfig! Encrypted group DsProxies will not start" << Endl; - return false; - } -} - -bool NKikimr::ObtainPDiskKey(TEncryptionKey *key, const NKikimrProto::TKeyConfig& keyConfig) { - if (keyConfig.KeysSize()) { - auto &record = keyConfig.GetKeys(0); - return ObtainKey(key, record); + return ObtainKey(key, record); + } else { + Cerr << "No Keys in KeyConfig! Encrypted group DsProxies will not start" << Endl; + return false; + } +} + +bool NKikimr::ObtainPDiskKey(TEncryptionKey *key, const NKikimrProto::TKeyConfig& keyConfig) { + if (keyConfig.KeysSize()) { + auto &record = keyConfig.GetKeys(0); + return ObtainKey(key, record); } else { - Cerr << "No Keys in PDiskKeyConfig! Encrypted pdisks will not start" << Endl; + Cerr << "No Keys in PDiskKeyConfig! Encrypted pdisks will not start" << Endl; return false; } } - + bool NKikimr::ObtainStaticKey(TEncryptionKey *key) { // TODO(cthulhu): Replace this with real data key->Key.SetKey((ui8*)"TestStaticKey", 13); diff --git a/ydb/core/blobstorage/nodewarden/node_warden_impl.h b/ydb/core/blobstorage/nodewarden/node_warden_impl.h index 7680aeb1ef..be67e5ef67 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_impl.h +++ b/ydb/core/blobstorage/nodewarden/node_warden_impl.h @@ -11,7 +11,7 @@ namespace NKikimr::NStorage { constexpr ui32 ProxyConfigurationTimeoutMilliseconds = 200; constexpr TDuration BackoffMin = TDuration::MilliSeconds(20); constexpr TDuration BackoffMax = TDuration::Seconds(5); - constexpr const char *MockDevicesPath = "/Berkanavt/kikimr/testing/mock_devices.txt"; + constexpr const char *MockDevicesPath = "/Berkanavt/kikimr/testing/mock_devices.txt"; template<typename T, typename TPred> T *FindOrCreateProtoItem(google::protobuf::RepeatedPtrField<T> *collection, TPred&& pred) { @@ -51,15 +51,15 @@ namespace NKikimr::NStorage { struct TPDiskRecord : TIntrusiveListItem<TPDiskRecord, TUnreportedMetricTag> { - NKikimrBlobStorage::TNodeWardenServiceSet::TPDisk Record; - + NKikimrBlobStorage::TNodeWardenServiceSet::TPDisk Record; + std::optional<NKikimrBlobStorage::TPDiskMetrics> PDiskMetrics; TReplQuoter::TPtr ReplPDiskReadQuoter; TReplQuoter::TPtr ReplPDiskWriteQuoter; TPDiskRecord(NKikimrBlobStorage::TNodeWardenServiceSet::TPDisk record) - : Record(std::move(record)) + : Record(std::move(record)) {} }; @@ -76,7 +76,7 @@ namespace NKikimr::NStorage { std::map<TPDiskKey, TPDiskRecord> LocalPDisks; TIntrusiveList<TPDiskRecord, TUnreportedMetricTag> PDisksWithUnreportedMetrics; - std::set<TPDiskKey> InFlightRestartedPDisks; // for sanity checks only + std::set<TPDiskKey> InFlightRestartedPDisks; // for sanity checks only ui64 LastScrubCookie = RandomNumber<ui64>(); @@ -84,25 +84,25 @@ namespace NKikimr::NStorage { std::optional<TString> InstanceId; // instance ID of BS_CONTROLLER running this node TActorId PipeClientId; - TVector<NPDisk::TDriveData> WorkingLocalDrives; - + TVector<NPDisk::TDriveData> WorkingLocalDrives; + NPDisk::TOwnerRound LocalPDiskInitOwnerRound = 1; bool IgnoreCache = false; bool EnableProxyMock = false; - NKikimrBlobStorage::TMockDevicesConfig MockDevicesConfig; + NKikimrBlobStorage::TMockDevicesConfig MockDevicesConfig; struct TEvPrivate { enum EEv { EvSendDiskMetrics = EventSpaceBegin(TEvents::ES_PRIVATE), - EvUpdateNodeDrives, + EvUpdateNodeDrives, EvReadCache, EvGetGroup, }; struct TEvSendDiskMetrics : TEventLocal<TEvSendDiskMetrics, EvSendDiskMetrics> {}; - struct TEvUpdateNodeDrives : TEventLocal<TEvUpdateNodeDrives, EvUpdateNodeDrives> {}; + struct TEvUpdateNodeDrives : TEventLocal<TEvUpdateNodeDrives, EvUpdateNodeDrives> {}; }; TControlWrapper EnablePutBatching; @@ -133,10 +133,10 @@ namespace NKikimr::NStorage { return LocalPDiskInitOwnerRound; } - TIntrusivePtr<TPDiskConfig> CreatePDiskConfig(const NKikimrBlobStorage::TNodeWardenServiceSet::TPDisk& pdisk); + TIntrusivePtr<TPDiskConfig> CreatePDiskConfig(const NKikimrBlobStorage::TNodeWardenServiceSet::TPDisk& pdisk); void StartLocalPDisk(const NKikimrBlobStorage::TNodeWardenServiceSet::TPDisk& pdisk); - void RestartLocalPDiskStart(ui32 pdiskId, TIntrusivePtr<TPDiskConfig> pdiskConfig); - void RestartLocalPDiskFinish(ui32 pdiskId, NKikimrProto::EReplyStatus status); + void RestartLocalPDiskStart(ui32 pdiskId, TIntrusivePtr<TPDiskConfig> pdiskConfig); + void RestartLocalPDiskFinish(ui32 pdiskId, NKikimrProto::EReplyStatus status); void DestroyLocalPDisk(ui32 pdiskId); void ApplyServiceSetPDisks(const NKikimrBlobStorage::TNodeWardenServiceSet& serviceSet); @@ -154,8 +154,8 @@ namespace NKikimr::NStorage { void StartLocalProxy(ui32 groupId); void StartStaticProxies(); - TVector<NPDisk::TDriveData> ListLocalDrives(); - + TVector<NPDisk::TDriveData> ListLocalDrives(); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Pipe management @@ -403,19 +403,19 @@ namespace NKikimr::NStorage { void Handle(TEvBlobStorage::TEvControllerUpdateDiskStatus::TPtr ev); void Handle(TEvPrivate::TEvSendDiskMetrics::TPtr&); - void Handle(TEvPrivate::TEvUpdateNodeDrives ::TPtr&); - void Handle(NMon::TEvHttpInfo::TPtr&); - void RenderJsonGroupInfo(IOutputStream& out, const std::set<ui32>& groupIds); - void RenderWholePage(IOutputStream&); - void RenderLocalDrives(IOutputStream&); - void RenderDSProxies(IOutputStream& out); + void Handle(TEvPrivate::TEvUpdateNodeDrives ::TPtr&); + void Handle(NMon::TEvHttpInfo::TPtr&); + void RenderJsonGroupInfo(IOutputStream& out, const std::set<ui32>& groupIds); + void RenderWholePage(IOutputStream&); + void RenderLocalDrives(IOutputStream&); + void RenderDSProxies(IOutputStream& out); void SendDiskMetrics(bool reportMetrics); void Handle(TEvStatusUpdate::TPtr ev); void Handle(TEvBlobStorage::TEvDropDonor::TPtr ev); - void Handle(TEvBlobStorage::TEvAskRestartPDisk::TPtr ev); - void Handle(TEvBlobStorage::TEvRestartPDiskResult::TPtr ev); + void Handle(TEvBlobStorage::TEvAskRestartPDisk::TPtr ev); + void Handle(TEvBlobStorage::TEvRestartPDiskResult::TPtr ev); void FillInVDiskStatus(google::protobuf::RepeatedPtrField<NKikimrBlobStorage::TVDiskStatus> *pb, bool initial); @@ -470,8 +470,8 @@ namespace NKikimr::NStorage { hFunc(TEvStatusUpdate, Handle); hFunc(TEvBlobStorage::TEvDropDonor, Handle); - hFunc(TEvBlobStorage::TEvAskRestartPDisk, Handle); - hFunc(TEvBlobStorage::TEvRestartPDiskResult, Handle); + hFunc(TEvBlobStorage::TEvAskRestartPDisk, Handle); + hFunc(TEvBlobStorage::TEvRestartPDiskResult, Handle); hFunc(TEvGroupStatReport, Handle); @@ -479,8 +479,8 @@ namespace NKikimr::NStorage { hFunc(TEvBlobStorage::TEvUpdateGroupInfo, Handle); hFunc(TEvBlobStorage::TEvControllerUpdateDiskStatus, Handle); hFunc(TEvPrivate::TEvSendDiskMetrics, Handle); - hFunc(TEvPrivate::TEvUpdateNodeDrives, Handle); - hFunc(NMon::TEvHttpInfo, Handle); + hFunc(TEvPrivate::TEvUpdateNodeDrives, Handle); + hFunc(NMon::TEvHttpInfo, Handle); cFunc(NActors::TEvents::TSystem::Poison, PassAway); hFunc(TEvBlobStorage::TEvControllerScrubQueryStartQuantum, Handle); diff --git a/ydb/core/blobstorage/nodewarden/node_warden_mon.cpp b/ydb/core/blobstorage/nodewarden/node_warden_mon.cpp index 8e4cc03d9d..79568dbe87 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_mon.cpp +++ b/ydb/core/blobstorage/nodewarden/node_warden_mon.cpp @@ -1,56 +1,56 @@ -#include "node_warden_impl.h" - +#include "node_warden_impl.h" + #include <ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.h> - + #include <ydb/library/pdisk_io/file_params.h> -#include <library/cpp/json/json_writer.h> - -#include <util/string/split.h> -#include <util/string/strip.h> - -using namespace NKikimr; -using namespace NStorage; - -void TNodeWarden::Handle(NMon::TEvHttpInfo::TPtr &ev) { - const TCgiParameters &cgi = ev->Get()->Request.GetParams(); - TStringBuf pathInfo = ev->Get()->Request.GetPathInfo(); - TStringStream out; - std::unique_ptr<NMon::TEvHttpInfoRes> result; - if (pathInfo.StartsWith("/json")) { - if (pathInfo.EndsWith("all")) { - } else if (pathInfo.EndsWith("groups")) { - std::set<ui32> groupsIds; - if (cgi.Has("ids")) { - StringSplitter(cgi.Get("ids")).SplitBySet(" ,;\n").SkipEmpty().Consume([&](TStringBuf token) { - i64 groupId = FromStringWithDefault<i64>(StripString(token), -1); - if (groupId != -1) { - groupsIds.emplace(groupId); - } - }); - } - RenderJsonGroupInfo(out, groupsIds); - } - result = std::make_unique<NMon::TEvHttpInfoRes>(NMonitoring::HTTPOKJSON + out.Str(), 0, - NMon::IEvHttpInfoRes::EContentType::Custom); - } else { - RenderWholePage(out); - result = std::make_unique<NMon::TEvHttpInfoRes>(out.Str()); - } - Send(ev->Sender, result.release()); -} - -void TNodeWarden::RenderJsonGroupInfo(IOutputStream& out, const std::set<ui32>& groupIds) { - std::set<ui32> allGroups; - if (groupIds.empty()) { +#include <library/cpp/json/json_writer.h> + +#include <util/string/split.h> +#include <util/string/strip.h> + +using namespace NKikimr; +using namespace NStorage; + +void TNodeWarden::Handle(NMon::TEvHttpInfo::TPtr &ev) { + const TCgiParameters &cgi = ev->Get()->Request.GetParams(); + TStringBuf pathInfo = ev->Get()->Request.GetPathInfo(); + TStringStream out; + std::unique_ptr<NMon::TEvHttpInfoRes> result; + if (pathInfo.StartsWith("/json")) { + if (pathInfo.EndsWith("all")) { + } else if (pathInfo.EndsWith("groups")) { + std::set<ui32> groupsIds; + if (cgi.Has("ids")) { + StringSplitter(cgi.Get("ids")).SplitBySet(" ,;\n").SkipEmpty().Consume([&](TStringBuf token) { + i64 groupId = FromStringWithDefault<i64>(StripString(token), -1); + if (groupId != -1) { + groupsIds.emplace(groupId); + } + }); + } + RenderJsonGroupInfo(out, groupsIds); + } + result = std::make_unique<NMon::TEvHttpInfoRes>(NMonitoring::HTTPOKJSON + out.Str(), 0, + NMon::IEvHttpInfoRes::EContentType::Custom); + } else { + RenderWholePage(out); + result = std::make_unique<NMon::TEvHttpInfoRes>(out.Str()); + } + Send(ev->Sender, result.release()); +} + +void TNodeWarden::RenderJsonGroupInfo(IOutputStream& out, const std::set<ui32>& groupIds) { + std::set<ui32> allGroups; + if (groupIds.empty()) { allGroups.insert(EjectedGroups.begin(), EjectedGroups.end()); for (const auto& [groupId, _] : Groups) { - allGroups.emplace(groupId); - } - } - - NJson::TJsonArray array; - for (auto& groupId : (!groupIds.empty() ? groupIds : allGroups)) { - NJson::TJsonValue groupInfo; + allGroups.emplace(groupId); + } + } + + NJson::TJsonArray array; + for (auto& groupId : (!groupIds.empty() ? groupIds : allGroups)) { + NJson::TJsonValue groupInfo; groupInfo["GroupId"] = groupId; @@ -59,241 +59,241 @@ void TNodeWarden::RenderJsonGroupInfo(IOutputStream& out, const std::set<ui32>& } else { TGroupRecord& group = Groups[groupId]; groupInfo["Status"] = group.ProxyRunning ? "started" : "stopped"; - + if (const auto& info = group.Info) { groupInfo["Generation"] = info->GroupGeneration; groupInfo["ErasureType"] = info->Type.ToString(); groupInfo["EncryptionMode"] = TStringBuilder() << info->GetEncryptionMode(); groupInfo["LifeCyclePhase"] = TStringBuilder() << info->GetLifeCyclePhase(); - + NJson::TJsonArray vdisks; for (ui32 i = 0; i < info->GetTotalVDisksNum(); ++i) { vdisks.AppendValue(info->GetVDiskId(i).ToString()); } groupInfo["VDisks"] = std::move(vdisks); } - } - - array.AppendValue(std::move(groupInfo)); - } - NJson::WriteJson(&out, &array); -} - -void TNodeWarden::RenderWholePage(IOutputStream& out) { - HTML (out) { - out << R"__( - <style> - table.oddgray > tbody > tr:nth-child(odd) { - background-color: #f0f0f0; - } - </style> - )__"; - + } + + array.AppendValue(std::move(groupInfo)); + } + NJson::WriteJson(&out, &array); +} + +void TNodeWarden::RenderWholePage(IOutputStream& out) { + HTML (out) { + out << R"__( + <style> + table.oddgray > tbody > tr:nth-child(odd) { + background-color: #f0f0f0; + } + </style> + )__"; + H2() { out << "NodeWarden on node " << LocalNodeId; } - RenderLocalDrives(out); - - H3() { out << "PDisks"; } - TABLE_CLASS("table oddgray") { - TABLEHEAD() { - TABLER() { - TABLEH() { out << "Id (NodeId, PDiskId)"; } - TABLEH() { out << "Path"; } - TABLEH() { out << "Guid"; } - TABLEH() { out << "Category"; } - } - } - TABLEBODY() { - for (auto& [key, value] : LocalPDisks) { - TABLER() { - TABLED() { out << "(" << key.NodeId << "," << key.PDiskId << ")"; } - TABLED() { out << value.Record.GetPath(); } - TABLED() { out << value.Record.GetPDiskGuid(); } - TABLED() { out << value.Record.GetPDiskCategory(); } - } - } - } - } - - H3() { out << "VDisks"; } - TABLE_CLASS("table oddgray") { - TABLEHEAD() { - TABLER() { - TABLEH() { out << "Location (NodeId, PDiskId, VSlotId)"; } - TABLEH() { out << "VDiskId"; } + RenderLocalDrives(out); + + H3() { out << "PDisks"; } + TABLE_CLASS("table oddgray") { + TABLEHEAD() { + TABLER() { + TABLEH() { out << "Id (NodeId, PDiskId)"; } + TABLEH() { out << "Path"; } + TABLEH() { out << "Guid"; } + TABLEH() { out << "Category"; } + } + } + TABLEBODY() { + for (auto& [key, value] : LocalPDisks) { + TABLER() { + TABLED() { out << "(" << key.NodeId << "," << key.PDiskId << ")"; } + TABLED() { out << value.Record.GetPath(); } + TABLED() { out << value.Record.GetPDiskGuid(); } + TABLED() { out << value.Record.GetPDiskCategory(); } + } + } + } + } + + H3() { out << "VDisks"; } + TABLE_CLASS("table oddgray") { + TABLEHEAD() { + TABLER() { + TABLEH() { out << "Location (NodeId, PDiskId, VSlotId)"; } + TABLEH() { out << "VDiskId"; } TABLEH() { out << "Running"; } - TABLEH() { out << "StoragePoolName"; } - TABLEH() { out << "DonorMode"; } - TABLEH() { out << "CurrentStatus"; } - TABLEH() { out << "ReportedVDiskStatus"; } - } - } - TABLEBODY() { - for (auto& [key, value] : LocalVDisks) { - TABLER() { - TABLED() { out << "(" << key.NodeId << "," << key.PDiskId << "," << key.VDiskSlotId << ")"; } + TABLEH() { out << "StoragePoolName"; } + TABLEH() { out << "DonorMode"; } + TABLEH() { out << "CurrentStatus"; } + TABLEH() { out << "ReportedVDiskStatus"; } + } + } + TABLEBODY() { + for (auto& [key, value] : LocalVDisks) { + TABLER() { + TABLED() { out << "(" << key.NodeId << "," << key.PDiskId << "," << key.VDiskSlotId << ")"; } TABLED() { out << value.GetVDiskId(); } TABLED() { out << (value.RuntimeData ? "true" : "false"); } TABLED() { out << value.Config.GetStoragePoolName(); } TABLED() { out << (value.Config.HasDonorMode() ? "true" : "false"); } - TABLED() { + TABLED() { out << value.Status; - } - TABLED() { - if (value.ReportedVDiskStatus) { - out << *value.ReportedVDiskStatus; - } else { - out << "(unknown)"; - } - } - } - } - } - } - - RenderDSProxies(out); - } -} - -void TNodeWarden::RenderDSProxies(IOutputStream& out) { - HTML(out) { - out << R"_( - <script> - function loadGroups(status) { - $.ajax({ - url: document.URL + "/json/groups", - success: printGroupTable.bind(this, status) - }); - } - - function getOrEmpty(val) { - return val !== undefined ? val : "(empty)"; - } - - function printGroupTable(status, result) { - var tbody = document.getElementById(status + 'DSProxiesTBody'); - tbody.innerHTML = ""; - for (var i = 0; i < result.length; ++i) { - var html = ''; - var group = result[i]; - if (group['Status'] == status) { - html += "<td>" + getOrEmpty(group['GroupId']) + "</td>"; - html += "<td>" + getOrEmpty(group['Generation']) + "</td>"; - html += "<td>" + getOrEmpty(group['ErasureType']) + "</td>"; - html += "<td>" + getOrEmpty(group['EncryptionMode']) + "</td>"; - html += "<td>" + getOrEmpty(group['LifeCyclePhase']) + "</td>"; - html += "<td>"; - if (group['VDisks'] !== undefined) { - html += group['VDisks'].join('<br>'); - } else { - html += "(empty)"; - } - html += "</td>"; - } - tbody.insertRow(-1).innerHTML = html; - } - - //document.getElementById(status + 'DSProxiesButton').remove(); - } - </script> - )_"; - - auto createTable = [&](IOutputStream& out, const TString& status, const ui64 rows) { - out << "<table class='table oddgray'>"; - out << R"_( <thead> - <tr> - <th>GroupId</th> - <th>Generation</th> - <th>ErasureType</th> - <th>EncryptionMode</th> - <th>LifeCyclePhase</th> - <th>VDisks</th> - </tr> - </thead> - )_"; - out << "<tbody id='" << status << "DSProxiesTBody'><tr><td colspan=6 style='text-align:center'>"; - out << "<button style='margin-top:30px;margin-bottom:30px' onclick='loadGroups(\"" << status << "\");'>"; - out << "Load, approx. rows " << rows; - out << "</button>"; - out << "</td></tr></tbody>"; - out << "</table>"; - }; - + } + TABLED() { + if (value.ReportedVDiskStatus) { + out << *value.ReportedVDiskStatus; + } else { + out << "(unknown)"; + } + } + } + } + } + } + + RenderDSProxies(out); + } +} + +void TNodeWarden::RenderDSProxies(IOutputStream& out) { + HTML(out) { + out << R"_( + <script> + function loadGroups(status) { + $.ajax({ + url: document.URL + "/json/groups", + success: printGroupTable.bind(this, status) + }); + } + + function getOrEmpty(val) { + return val !== undefined ? val : "(empty)"; + } + + function printGroupTable(status, result) { + var tbody = document.getElementById(status + 'DSProxiesTBody'); + tbody.innerHTML = ""; + for (var i = 0; i < result.length; ++i) { + var html = ''; + var group = result[i]; + if (group['Status'] == status) { + html += "<td>" + getOrEmpty(group['GroupId']) + "</td>"; + html += "<td>" + getOrEmpty(group['Generation']) + "</td>"; + html += "<td>" + getOrEmpty(group['ErasureType']) + "</td>"; + html += "<td>" + getOrEmpty(group['EncryptionMode']) + "</td>"; + html += "<td>" + getOrEmpty(group['LifeCyclePhase']) + "</td>"; + html += "<td>"; + if (group['VDisks'] !== undefined) { + html += group['VDisks'].join('<br>'); + } else { + html += "(empty)"; + } + html += "</td>"; + } + tbody.insertRow(-1).innerHTML = html; + } + + //document.getElementById(status + 'DSProxiesButton').remove(); + } + </script> + )_"; + + auto createTable = [&](IOutputStream& out, const TString& status, const ui64 rows) { + out << "<table class='table oddgray'>"; + out << R"_( <thead> + <tr> + <th>GroupId</th> + <th>Generation</th> + <th>ErasureType</th> + <th>EncryptionMode</th> + <th>LifeCyclePhase</th> + <th>VDisks</th> + </tr> + </thead> + )_"; + out << "<tbody id='" << status << "DSProxiesTBody'><tr><td colspan=6 style='text-align:center'>"; + out << "<button style='margin-top:30px;margin-bottom:30px' onclick='loadGroups(\"" << status << "\");'>"; + out << "Load, approx. rows " << rows; + out << "</button>"; + out << "</td></tr></tbody>"; + out << "</table>"; + }; + ui32 numStarted = 0, numEjected = EjectedGroups.size(); for (const auto& [groupId, group] : Groups) { numStarted += group.ProxyRunning; } - H3() { out << "Started DSProxies"; } + H3() { out << "Started DSProxies"; } createTable(out, "started", numStarted); - - H3() { out << "Ejected DSProxies"; } + + H3() { out << "Ejected DSProxies"; } createTable(out, "ejected", numEjected); - } -} - -void TNodeWarden::RenderLocalDrives(IOutputStream& out) { - TVector<NPDisk::TDriveData> onlineLocalDrives = ListLocalDrives(); - - HTML(out) { - H3() { out << "LocalDrives"; } - out << "\n"; - TABLE_CLASS("table oddgray") { - TABLEHEAD() { - TABLER() { - TABLEH() { out << "SentToBSC"; } - TABLEH() { out << "Online"; } - TABLEH() { out << "Path"; } - TABLEH() { out << "Serial"; } - TABLEH() { out << "DeviceType"; } - } - } - out << "\n"; - TABLEBODY() { - auto initialIt = WorkingLocalDrives.begin(); - auto onlineIt = onlineLocalDrives.begin(); - while (initialIt != WorkingLocalDrives.end() || onlineIt != onlineLocalDrives.end()) { - TABLER() { - NPDisk::TDriveData *initialData = nullptr; - NPDisk::TDriveData *onlineData = nullptr; - if (initialIt == WorkingLocalDrives.end()) { - onlineData = &*onlineIt; - ++onlineIt; - } else if (onlineIt == onlineLocalDrives.end()) { - initialData = &*initialIt; - ++initialIt; - } else { - if (initialIt->Path < onlineIt->Path) { - initialData = &*initialIt; - ++initialIt; - } else if (initialIt->Path > onlineIt->Path) { - onlineData = &*onlineIt; - ++onlineIt; - } else { - if (initialIt->SerialNumber == onlineIt->SerialNumber) { - initialData = &*initialIt; - ++initialIt; - onlineData = &*onlineIt; - ++onlineIt; - } else { - initialData = &*initialIt; - ++initialIt; - } - } - } - TABLED() { out << (initialData ? "true" : "<b style='color: red'>false</b>"); } - TABLED() { out << (onlineData ? "true" : "<b style='color: red'>false</b>"); } - NPDisk::TDriveData *data = initialData ? initialData : onlineData ? onlineData : nullptr; - Y_VERIFY(data); - TABLED() { out << data->Path; } - TABLED() { out << data->SerialNumber.Quote(); } - TABLED() { - out << TPDiskCategory::DeviceTypeStr(data->DeviceType, true); - out << (data->IsMock ? "(mock)" : ""); - } - } - out << "\n"; - } - } - } - } -} + } +} + +void TNodeWarden::RenderLocalDrives(IOutputStream& out) { + TVector<NPDisk::TDriveData> onlineLocalDrives = ListLocalDrives(); + + HTML(out) { + H3() { out << "LocalDrives"; } + out << "\n"; + TABLE_CLASS("table oddgray") { + TABLEHEAD() { + TABLER() { + TABLEH() { out << "SentToBSC"; } + TABLEH() { out << "Online"; } + TABLEH() { out << "Path"; } + TABLEH() { out << "Serial"; } + TABLEH() { out << "DeviceType"; } + } + } + out << "\n"; + TABLEBODY() { + auto initialIt = WorkingLocalDrives.begin(); + auto onlineIt = onlineLocalDrives.begin(); + while (initialIt != WorkingLocalDrives.end() || onlineIt != onlineLocalDrives.end()) { + TABLER() { + NPDisk::TDriveData *initialData = nullptr; + NPDisk::TDriveData *onlineData = nullptr; + if (initialIt == WorkingLocalDrives.end()) { + onlineData = &*onlineIt; + ++onlineIt; + } else if (onlineIt == onlineLocalDrives.end()) { + initialData = &*initialIt; + ++initialIt; + } else { + if (initialIt->Path < onlineIt->Path) { + initialData = &*initialIt; + ++initialIt; + } else if (initialIt->Path > onlineIt->Path) { + onlineData = &*onlineIt; + ++onlineIt; + } else { + if (initialIt->SerialNumber == onlineIt->SerialNumber) { + initialData = &*initialIt; + ++initialIt; + onlineData = &*onlineIt; + ++onlineIt; + } else { + initialData = &*initialIt; + ++initialIt; + } + } + } + TABLED() { out << (initialData ? "true" : "<b style='color: red'>false</b>"); } + TABLED() { out << (onlineData ? "true" : "<b style='color: red'>false</b>"); } + NPDisk::TDriveData *data = initialData ? initialData : onlineData ? onlineData : nullptr; + Y_VERIFY(data); + TABLED() { out << data->Path; } + TABLED() { out << data->SerialNumber.Quote(); } + TABLED() { + out << TPDiskCategory::DeviceTypeStr(data->DeviceType, true); + out << (data->IsMock ? "(mock)" : ""); + } + } + out << "\n"; + } + } + } + } +} diff --git a/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp b/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp index 7dcc1d9136..07adc41be7 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp +++ b/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp @@ -199,7 +199,7 @@ namespace NKikimr::NStorage { STLOG(PRI_NOTICE, BS_NODE, NW69, "RestartLocalPDisk is started", (PDiskId, pdiskId)); } - void TNodeWarden::RestartLocalPDiskFinish(ui32 pdiskId, NKikimrProto::EReplyStatus status) { + void TNodeWarden::RestartLocalPDiskFinish(ui32 pdiskId, NKikimrProto::EReplyStatus status) { const TPDiskKey pdiskKey(LocalNodeId, pdiskId); size_t erasedCount = InFlightRestartedPDisks.erase(pdiskKey); @@ -208,27 +208,27 @@ namespace NKikimr::NStorage { const TVSlotId from(pdiskKey.NodeId, pdiskKey.PDiskId, 0); const TVSlotId to(pdiskKey.NodeId, pdiskKey.PDiskId, Max<ui32>()); - if (status == NKikimrProto::EReplyStatus::OK) { - TStringStream vdisks; - bool first = true; - vdisks << "{"; + if (status == NKikimrProto::EReplyStatus::OK) { + TStringStream vdisks; + bool first = true; + vdisks << "{"; for (auto it = LocalVDisks.lower_bound(from); it != LocalVDisks.end() && it->first <= to; ++it) { - auto& [key, value] = *it; - - PoisonLocalVDisk(value); - vdisks << (std::exchange(first, false) ? "" : ", ") << value.GetVDiskId().ToString(); - if (value.SlayInFlight) { - Send(MakeBlobStoragePDiskID(key.NodeId, key.PDiskId), new NPDisk::TEvSlay(value.GetVDiskId(), - NextLocalPDiskInitOwnerRound(), key.PDiskId, key.VDiskSlotId)); - } else { - StartLocalVDiskActor(value, TDuration::Zero()); - } + auto& [key, value] = *it; + + PoisonLocalVDisk(value); + vdisks << (std::exchange(first, false) ? "" : ", ") << value.GetVDiskId().ToString(); + if (value.SlayInFlight) { + Send(MakeBlobStoragePDiskID(key.NodeId, key.PDiskId), new NPDisk::TEvSlay(value.GetVDiskId(), + NextLocalPDiskInitOwnerRound(), key.PDiskId, key.VDiskSlotId)); + } else { + StartLocalVDiskActor(value, TDuration::Zero()); + } } - SendDiskMetrics(false); - - vdisks << "}"; - STLOG(PRI_NOTICE, BS_NODE, NW74, "RestartLocalPDisk has finished", - (PDiskId, pdiskId), (VDiskIds, vdisks.Str())); + SendDiskMetrics(false); + + vdisks << "}"; + STLOG(PRI_NOTICE, BS_NODE, NW74, "RestartLocalPDisk has finished", + (PDiskId, pdiskId), (VDiskIds, vdisks.Str())); } else { for (auto it = LocalVDisks.lower_bound(from); it != LocalVDisks.end() && it->first <= to; ++it) { auto& [key, value] = *it; diff --git a/ydb/core/blobstorage/nodewarden/node_warden_resource.cpp b/ydb/core/blobstorage/nodewarden/node_warden_resource.cpp index d9441c8655..d8179b7af8 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_resource.cpp +++ b/ydb/core/blobstorage/nodewarden/node_warden_resource.cpp @@ -2,11 +2,11 @@ #include <ydb/core/blobstorage/crypto/default.h> #include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_wcache.h> - + #include <ydb/library/pdisk_io/file_params.h> - -#include <util/string/split.h> - + +#include <util/string/split.h> + using namespace NKikimr; using namespace NStorage; @@ -98,7 +98,7 @@ void TNodeWarden::HandleIncrHugeInit(NIncrHuge::TEvIncrHugeInit::TPtr ev) { NIncrHuge::TKeeperSettings settings{ it->first.PDiskId, MakeBlobStoragePDiskID(it->first.NodeId, it->first.PDiskId), - it->second.Record.GetPDiskGuid(), + it->second.Record.GetPDiskGuid(), config.GetMinHugeBlobInBytes(), config.GetMinCleanChunks(), config.GetMinAllocationBatch(), diff --git a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp index b4e15ad683..096ac7ee90 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp +++ b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp @@ -149,7 +149,7 @@ namespace NKikimr::NStorage { donorDiskIds.emplace_back(VDiskIDFromVDiskID(donor.GetVDiskId()), donorSlot.GetVDiskServiceId()); } - TVDiskConfig::TBaseInfo baseInfo(vdiskId, pdiskServiceId, pdiskGuid, vslotId.PDiskId, deviceType, + TVDiskConfig::TBaseInfo baseInfo(vdiskId, pdiskServiceId, pdiskGuid, vslotId.PDiskId, deviceType, vslotId.VDiskSlotId, kind, NextLocalPDiskInitOwnerRound(), groupInfo->GetStoragePoolName(), donorMode, donorDiskIds, scrubCookie, whiteboardInstanceGuid); diff --git a/ydb/core/blobstorage/nodewarden/ut_sequence/dsproxy_config_retrieval.cpp b/ydb/core/blobstorage/nodewarden/ut_sequence/dsproxy_config_retrieval.cpp index 4ac7a3d764..040411e06c 100644 --- a/ydb/core/blobstorage/nodewarden/ut_sequence/dsproxy_config_retrieval.cpp +++ b/ydb/core/blobstorage/nodewarden/ut_sequence/dsproxy_config_retrieval.cpp @@ -25,7 +25,7 @@ void SetupServices(TTestBasicRuntime& runtime) { app.AddDomain(dom.Release()); TTempDir temp; - TString path = "SectorMap:" + temp() + "static.dat"; + TString path = "SectorMap:" + temp() + "static.dat"; ui64 pdiskSize = 32ULL << 30; ui64 chunkSize = 32ULL << 20; ui64 guid = RandomNumber<ui64>(); diff --git a/ydb/core/blobstorage/nodewarden/ya.make b/ydb/core/blobstorage/nodewarden/ya.make index db450ca225..b43e61e02d 100644 --- a/ydb/core/blobstorage/nodewarden/ya.make +++ b/ydb/core/blobstorage/nodewarden/ya.make @@ -12,7 +12,7 @@ SRCS( node_warden_group.cpp node_warden_group_resolver.cpp node_warden_impl.cpp - node_warden_mon.cpp + node_warden_mon.cpp node_warden_pdisk.cpp node_warden_pipe.cpp node_warden_proxy.cpp @@ -23,7 +23,7 @@ SRCS( ) PEERDIR( - library/cpp/json + library/cpp/json ydb/core/base ydb/core/blobstorage/groupinfo ydb/core/blobstorage/pdisk diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk.h index 53b02459cf..8e722ce42a 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk.h @@ -1,10 +1,10 @@ #pragma once #include "defs.h" - + #include "blobstorage_pdisk_defs.h" #include "blobstorage_pdisk_params.h" -#include "blobstorage_pdisk_config.h" - +#include "blobstorage_pdisk_config.h" + #include <ydb/core/blobstorage/base/vdisk_lsn.h> #include <ydb/core/blobstorage/base/blobstorage_vdiskid.h> #include <ydb/core/blobstorage/base/bufferwithgaps.h> @@ -95,11 +95,11 @@ protected: class TLogRecord { public: - TLogSignature Signature; + TLogSignature Signature; TString Data; ui64 Lsn; - TLogRecord(TLogSignature signature, const TString &data, ui64 lsn) + TLogRecord(TLogSignature signature, const TString &data, ui64 lsn) : Signature(signature) , Data(data) , Lsn(lsn) @@ -118,7 +118,7 @@ public: TString ToString() const { TStringStream str; - str << "{TLogRecord Signature# " << Signature.ToString(); + str << "{TLogRecord Signature# " << Signature.ToString(); str << " Data.Size()# " << Data.size(); str << " Lsn# " << Lsn; str << "}"; @@ -167,7 +167,7 @@ struct TEvYardInit : public TEventLocal<TEvYardInit, TEvBlobStorage::EvYardInit> struct TEvYardInitResult : public TEventLocal<TEvYardInitResult, TEvBlobStorage::EvYardInitResult> { NKikimrProto::EReplyStatus Status; - TMap<TLogSignature, TLogRecord> StartingPoints; + TMap<TLogSignature, TLogRecord> StartingPoints; TStatusFlags StatusFlags; TIntrusivePtr<TPDiskParams> PDiskParams; TVector<TChunkIdx> OwnedChunks; // Sorted vector of owned chunk identifiers. @@ -245,12 +245,12 @@ struct TEvLogResult; struct TEvLog : public TEventLocal<TEvLog, TEvBlobStorage::EvLog> { struct ICallback { virtual ~ICallback() = default; - virtual void operator ()(TActorSystem *actorSystem, const TEvLogResult &ev) = 0; + virtual void operator ()(TActorSystem *actorSystem, const TEvLogResult &ev) = 0; }; using TCallback = std::unique_ptr<ICallback>; - explicit TEvLog(TOwner owner, TOwnerRound ownerRound, TLogSignature signature, + explicit TEvLog(TOwner owner, TOwnerRound ownerRound, TLogSignature signature, const TString &data, TLsnSeg seg, void *cookie, TCallback &&cb = TCallback()) : Owner(owner) , OwnerRound(ownerRound) @@ -270,12 +270,12 @@ struct TEvLog : public TEventLocal<TEvLog, TEvBlobStorage::EvLog> { REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&cookie, sizeof(cookie)); } - explicit TEvLog(TOwner owner, TOwnerRound ownerRound, TLogSignature signature, + explicit TEvLog(TOwner owner, TOwnerRound ownerRound, TLogSignature signature, const TCommitRecord &commitRecord, const TString &data, TLsnSeg seg, void *cookie, TCallback &&cb = TCallback()) : Owner(owner) , OwnerRound(ownerRound) - , Signature(signature, /*commitRecord*/ true) + , Signature(signature, /*commitRecord*/ true) , Data(data) , LsnSegmentStart(seg.First) , Lsn(seg.Last) @@ -304,7 +304,7 @@ struct TEvLog : public TEventLocal<TEvLog, TEvBlobStorage::EvLog> { str << " Lsn# " << (ui64)record.Lsn; str << " LsnSegmentStart# " << (ui32)record.LsnSegmentStart; str << " Cookie# " << (ui64)record.Cookie; - if (record.Signature.HasCommitRecord()) { + if (record.Signature.HasCommitRecord()) { str << record.CommitRecord.ToString(); } str << "}"; @@ -317,7 +317,7 @@ struct TEvLog : public TEventLocal<TEvLog, TEvBlobStorage::EvLog> { TOwner Owner; TOwnerRound OwnerRound; - TLogSignature Signature; + TLogSignature Signature; TString Data; ui64 LsnSegmentStart; // we may write a log record for diapason of lsns [LsnSegmentStart, Lsn]; // usually LsnSegmentStart=Lsn and this diapason is a single point @@ -325,8 +325,8 @@ struct TEvLog : public TEventLocal<TEvLog, TEvBlobStorage::EvLog> { void *Cookie; TCallback LogCallback; TCommitRecord CommitRecord; - - mutable NLWTrace::TOrbit Orbit; + + mutable NLWTrace::TOrbit Orbit; }; struct TEvMultiLog : public TEventLocal<TEvMultiLog, TEvBlobStorage::EvMultiLog> { @@ -364,7 +364,7 @@ struct TEvLogResult : public TEventLocal<TEvLogResult, TEvBlobStorage::EvLogResu struct TRecord { ui64 Lsn; void *Cookie; - mutable NLWTrace::TOrbit Orbit; + mutable NLWTrace::TOrbit Orbit; TRecord(ui64 lsn, void *cookie) : Lsn(lsn) @@ -412,10 +412,10 @@ struct TEvLogResult : public TEventLocal<TEvLogResult, TEvBlobStorage::EvLogResu struct TEvReadLog : public TEventLocal<TEvReadLog, TEvBlobStorage::EvReadLog> { TOwner Owner; TOwnerRound OwnerRound; - TLogPosition Position; + TLogPosition Position; ui64 SizeLimit; - TEvReadLog(TOwner owner, TOwnerRound ownerRound, TLogPosition position = TLogPosition{0, 0}, ui64 sizeLimit = 16 << 20) + TEvReadLog(TOwner owner, TOwnerRound ownerRound, TLogPosition position = TLogPosition{0, 0}, ui64 sizeLimit = 16 << 20) : Owner(owner) , OwnerRound(ownerRound) , Position(position) @@ -441,22 +441,22 @@ struct TEvReadLogResult : public TEventLocal<TEvReadLogResult, TEvBlobStorage::E typedef TVector<TLogRecord> TResults; TResults Results; NKikimrProto::EReplyStatus Status; - TLogPosition Position; - TLogPosition NextPosition; + TLogPosition Position; + TLogPosition NextPosition; bool IsEndOfLog; TStatusFlags StatusFlags; TString ErrorReason; - TOwner Owner; + TOwner Owner; - TEvReadLogResult(NKikimrProto::EReplyStatus status, TLogPosition position, TLogPosition nextPosition, - bool isEndOfLog, TStatusFlags statusFlags, const TString &errorReason, TOwner owner) + TEvReadLogResult(NKikimrProto::EReplyStatus status, TLogPosition position, TLogPosition nextPosition, + bool isEndOfLog, TStatusFlags statusFlags, const TString &errorReason, TOwner owner) : Status(status) , Position(position) , NextPosition(nextPosition) , IsEndOfLog(isEndOfLog) , StatusFlags(statusFlags) , ErrorReason(errorReason) - , Owner(owner) + , Owner(owner) {} TString ToString() const { @@ -932,12 +932,12 @@ struct TEvChunkWrite : public TEventLocal<TEvChunkWrite, TEvBlobStorage::EvChunk return str.Str(); } - void Validate() const { - const ui32 count = PartsPtr ? PartsPtr->Size() : 0; - for (ui32 idx = 0; idx < count; ++idx) { + void Validate() const { + const ui32 count = PartsPtr ? PartsPtr->Size() : 0; + for (ui32 idx = 0; idx < count; ++idx) { Y_VERIFY((*PartsPtr)[idx].second); - if ((*PartsPtr)[idx].first) { - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED((*PartsPtr)[idx].first, (*PartsPtr)[idx].second); + if ((*PartsPtr)[idx].first) { + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED((*PartsPtr)[idx].first, (*PartsPtr)[idx].second); } } } @@ -1166,7 +1166,7 @@ struct TEvConfigureScheduler : public TEventLocal<TEvConfigureScheduler, TEvBlob TOwner Owner; TOwnerRound OwnerRound; - TPDiskSchedulerConfig SchedulerCfg; + TPDiskSchedulerConfig SchedulerCfg; TEvConfigureScheduler(TOwner owner, TOwnerRound ownerRound) : Owner(owner) @@ -1177,7 +1177,7 @@ struct TEvConfigureScheduler : public TEventLocal<TEvConfigureScheduler, TEvBlob TStringStream str; str << "{TEvConfigureScheduler ownerId# " << (ui32)Owner; str << " ownerRound# " << OwnerRound; - str << " SchedulerCfg# " << SchedulerCfg.ToString(false); + str << " SchedulerCfg# " << SchedulerCfg.ToString(false); str << "}"; return str.Str(); } @@ -1210,13 +1210,13 @@ struct TEvYardControl : public TEventLocal<TEvYardControl, TEvBlobStorage::EvYar ActionPause = 0, ActionStep = 1, ActionResume = 2, - Brake = 3, - PDiskStop = 4, - // If pdisk is working now successfull responce will be sent immediately - // Else responce will be sent only when PDisk is fully initialized or come in error state - PDiskStart = 5, - // Return pointer to TPDisk instance in TEvYardControlResult::Cookie - GetPDiskPointer = 6, + Brake = 3, + PDiskStop = 4, + // If pdisk is working now successfull responce will be sent immediately + // Else responce will be sent only when PDisk is fully initialized or come in error state + PDiskStart = 5, + // Return pointer to TPDisk instance in TEvYardControlResult::Cookie + GetPDiskPointer = 6, }; ui32 Action; diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp index af1b2fb693..d92b19c6cc 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp @@ -3,15 +3,15 @@ #include "blobstorage_pdisk_crypto.h" #include "blobstorage_pdisk_data.h" #include "blobstorage_pdisk_factory.h" -#include "blobstorage_pdisk_impl.h" +#include "blobstorage_pdisk_impl.h" #include "blobstorage_pdisk_mon.h" #include "blobstorage_pdisk_requestimpl.h" #include "blobstorage_pdisk_state.h" #include "blobstorage_pdisk_thread.h" -#include "blobstorage_pdisk_tools.h" +#include "blobstorage_pdisk_tools.h" #include "blobstorage_pdisk_util_countedqueueoneone.h" #include "blobstorage_pdisk_util_cputimer.h" -#include "blobstorage_pdisk_writer.h" +#include "blobstorage_pdisk_writer.h" #include <ydb/core/base/appdata.h> #include <ydb/core/base/counters.h> @@ -31,8 +31,8 @@ #include <library/cpp/monlib/service/pages/templates.h> #include <util/generic/algorithm.h> -#include <util/random/entropy.h> -#include <util/string/split.h> +#include <util/random/entropy.h> +#include <util/string/split.h> #include <util/system/sanitizers.h> namespace NKikimr { @@ -70,19 +70,19 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> { TIntrusivePtr<TPDiskConfig> Cfg; TKey MainKey; TList<TInitQueueItem> InitQueue; - const TIntrusivePtr<NMonitoring::TDynamicCounters> PDiskCounters; - TIntrusivePtr<TPDisk> PDisk; - bool IsMagicAlreadyChecked = false; - - THolder<TThread> FormattingThread; - bool IsFormattingNow = false; - std::function<void()> PendingRestartResponse; - + const TIntrusivePtr<NMonitoring::TDynamicCounters> PDiskCounters; + TIntrusivePtr<TPDisk> PDisk; + bool IsMagicAlreadyChecked = false; + + THolder<TThread> FormattingThread; + bool IsFormattingNow = false; + std::function<void()> PendingRestartResponse; + TActorId NodeWhiteboardServiceId; TActorId NodeWardenServiceId; - THolder<IEventHandle> ControledStartResult; - + THolder<IEventHandle> ControledStartResult; + class TWhiteboardFlag { private: class TSource { @@ -144,16 +144,16 @@ class TPDiskActor : public TActorBootstrapped<TPDiskActor> { }; private: TVector<TSource> Sources; - NKikimrWhiteboard::EFlag LastFlag = NKikimrWhiteboard::Grey; + NKikimrWhiteboard::EFlag LastFlag = NKikimrWhiteboard::Grey; public: void AddSource(const TLightBase& light) { Sources.emplace_back(light); } - void RemoveSources() { - Sources.clear(); - } - + void RemoveSources() { + Sources.clear(); + } + ui64 GetRedMsPs() { ui64 redMsPs = 0; for (TSource& source : Sources) { @@ -198,11 +198,11 @@ public: TPDiskActor(const TIntrusivePtr<TPDiskConfig>& cfg, const NPDisk::TKey &mainKey, const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters) - : Cfg(cfg) + : Cfg(cfg) , MainKey(mainKey) - , PDiskCounters(GetServiceCounters(counters, "pdisks") - ->GetSubgroup("pdisk", Sprintf("%09" PRIu32, (ui32)cfg->PDiskId)) - ->GetSubgroup("media", to_lower(cfg->PDiskCategory.TypeStrShort()))) + , PDiskCounters(GetServiceCounters(counters, "pdisks") + ->GetSubgroup("pdisk", Sprintf("%09" PRIu32, (ui32)cfg->PDiskId)) + ->GetSubgroup("media", to_lower(cfg->PDiskCategory.TypeStrShort()))) { } @@ -213,7 +213,7 @@ public: //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Bootstrap state void Bootstrap(const TActorContext &ctx) { - auto mon = AppData()->Mon; + auto mon = AppData()->Mon; if (mon) { NMonitoring::TIndexMonPage *actorsMonPage = mon->RegisterIndexPage("actors", "Actors"); NMonitoring::TIndexMonPage *pdisksMonPage = actorsMonPage->RegisterIndexPage("pdisks", "PDisks"); @@ -221,283 +221,283 @@ public: TString path = Sprintf("pdisk%09" PRIu32, (ui32)Cfg->PDiskId); TString name = Sprintf("PDisk%09" PRIu32, (ui32)Cfg->PDiskId); mon->RegisterActorPage(pdisksMonPage, path, name, false, ctx.ExecutorThread.ActorSystem, - SelfId()); + SelfId()); } - NodeWhiteboardServiceId = NNodeWhiteboard::MakeNodeWhiteboardServiceId(SelfId().NodeId()); - NodeWardenServiceId = MakeBlobStorageNodeWardenID(SelfId().NodeId()); - - Schedule(TDuration::MilliSeconds(Cfg->StatisticsUpdateIntervalMs), new TEvents::TEvWakeup()); - - StartPDiskThread(); - } - - void StartPDiskThread() { - PDisk = new TPDisk(Cfg, PDiskCounters); - - RealtimeFlag.RemoveSources(); - DeviceFlag.RemoveSources(); + NodeWhiteboardServiceId = NNodeWhiteboard::MakeNodeWhiteboardServiceId(SelfId().NodeId()); + NodeWardenServiceId = MakeBlobStorageNodeWardenID(SelfId().NodeId()); + + Schedule(TDuration::MilliSeconds(Cfg->StatisticsUpdateIntervalMs), new TEvents::TEvWakeup()); + + StartPDiskThread(); + } + + void StartPDiskThread() { + PDisk = new TPDisk(Cfg, PDiskCounters); + + RealtimeFlag.RemoveSources(); + DeviceFlag.RemoveSources(); DeviceFlag.AddSource(PDisk->Mon.L6); - bool isOk = PDisk->Initialize(TlsActivationContext->ActorSystem(), SelfId()); + bool isOk = PDisk->Initialize(TlsActivationContext->ActorSystem(), SelfId()); if (!isOk) { TStringStream str; str << "PDiskId# " << (ui32)PDisk->PDiskId - << " bootstrapped to the StateError, reason# " << PDisk->ErrorStr - << " Can not be initialized"; - InitError(str.Str()); - str << " Config: " << Cfg->ToString(); - LOG_CRIT_S(*TlsActivationContext, NKikimrServices::BS_PDISK, str.Str()); - } else { - PDisk->InitiateReadSysLog(SelfId()); - StateErrorReason = - "PDisk is in StateInit, wait for PDisk to read sys log. Did you ckeck EvYardInit result? Marker# BSY09"; - Become(&TThis::StateInit); + << " bootstrapped to the StateError, reason# " << PDisk->ErrorStr + << " Can not be initialized"; + InitError(str.Str()); + str << " Config: " << Cfg->ToString(); + LOG_CRIT_S(*TlsActivationContext, NKikimrServices::BS_PDISK, str.Str()); + } else { + PDisk->InitiateReadSysLog(SelfId()); + StateErrorReason = + "PDisk is in StateInit, wait for PDisk to read sys log. Did you ckeck EvYardInit result? Marker# BSY09"; + Become(&TThis::StateInit); } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Init state - void InitError(const TString &errorReason) { + void InitError(const TString &errorReason) { Become(&TThis::StateError); for (TList<TInitQueueItem>::iterator it = InitQueue.begin(); it != InitQueue.end(); ++it) { - Send(it->Sender, new NPDisk::TEvYardInitResult(NKikimrProto::CORRUPTED, errorReason)); - if (PDisk) { - PDisk->Mon.YardInit.CountResponse(); - } + Send(it->Sender, new NPDisk::TEvYardInitResult(NKikimrProto::CORRUPTED, errorReason)); + if (PDisk) { + PDisk->Mon.YardInit.CountResponse(); + } } InitQueue.clear(); TStringStream str; str << "PDisk is in StateError, reason# " << errorReason; StateErrorReason = str.Str(); - if (PDisk) { - PDisk->ErrorStr = StateErrorReason; - auto* request = PDisk->ReqCreator.CreateFromArgs<TStopDevice>(); - PDisk->InputRequest(request); - } - - if (ControledStartResult) { - auto *ev = ControledStartResult->Get<TEvYardControlResult>(); - ev->Status = NKikimrProto::CORRUPTED; - ev->ErrorReason = StateErrorReason; - TlsActivationContext->Send(ControledStartResult.Release()); - } + if (PDisk) { + PDisk->ErrorStr = StateErrorReason; + auto* request = PDisk->ReqCreator.CreateFromArgs<TStopDevice>(); + PDisk->InputRequest(request); + } + + if (ControledStartResult) { + auto *ev = ControledStartResult->Get<TEvYardControlResult>(); + ev->Status = NKikimrProto::CORRUPTED; + ev->ErrorReason = StateErrorReason; + TlsActivationContext->Send(ControledStartResult.Release()); + } + } + + void InitHandle(NMon::TEvHttpInfo::TPtr &ev) { + TStringStream outStr; + outStr.Reserve(512 << 10); + TStringStream deviceFlagStr; + DeviceFlag.Render(deviceFlagStr); + TStringStream realtimeFlagStr; + RealtimeFlag.Render(realtimeFlagStr); + TStringStream fairSchedulerStr; + THolder<THttpInfo> req(PDisk->ReqCreator.CreateFromArgs<THttpInfo>(SelfId(), ev->Sender, outStr, + deviceFlagStr.Str(), realtimeFlagStr.Str(), fairSchedulerStr.Str(), PDisk->ErrorStr, false)); + if (!IsFormattingNow) { + PDisk->InputRequest(req.Release()); + } else { + PDisk->HttpInfo(*req); // Sends TEvHttpInfoResult inside + } } - void InitHandle(NMon::TEvHttpInfo::TPtr &ev) { - TStringStream outStr; - outStr.Reserve(512 << 10); - TStringStream deviceFlagStr; - DeviceFlag.Render(deviceFlagStr); - TStringStream realtimeFlagStr; - RealtimeFlag.Render(realtimeFlagStr); - TStringStream fairSchedulerStr; - THolder<THttpInfo> req(PDisk->ReqCreator.CreateFromArgs<THttpInfo>(SelfId(), ev->Sender, outStr, - deviceFlagStr.Str(), realtimeFlagStr.Str(), fairSchedulerStr.Str(), PDisk->ErrorStr, false)); - if (!IsFormattingNow) { - PDisk->InputRequest(req.Release()); - } else { - PDisk->HttpInfo(*req); // Sends TEvHttpInfoResult inside - } - } - - void InitHandle(TEvPDiskFormattingFinished::TPtr &ev) { - FormattingThread->Join(); - IsFormattingNow = false; - if (ev->Get()->IsSucceed) { - StartPDiskThread(); - LOG_WARN_S(*TlsActivationContext, NKikimrServices::BS_PDISK, - "PDiskId# " << PDisk->PDiskId << " device formatting done"); - } else { - PDisk.Reset(new TPDisk(Cfg, PDiskCounters)); - PDisk->Initialize(TlsActivationContext->ActorSystem(), SelfId()); - Y_VERIFY(PDisk->PDiskThread.Running()); - + void InitHandle(TEvPDiskFormattingFinished::TPtr &ev) { + FormattingThread->Join(); + IsFormattingNow = false; + if (ev->Get()->IsSucceed) { + StartPDiskThread(); + LOG_WARN_S(*TlsActivationContext, NKikimrServices::BS_PDISK, + "PDiskId# " << PDisk->PDiskId << " device formatting done"); + } else { + PDisk.Reset(new TPDisk(Cfg, PDiskCounters)); + PDisk->Initialize(TlsActivationContext->ActorSystem(), SelfId()); + Y_VERIFY(PDisk->PDiskThread.Running()); + *PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::InitialFormatReadError; - *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorDiskCannotBeFormated; - - PDisk->ErrorStr = ToString("Can not be formated! Reason# ") + ev->Get()->ErrorStr; - - TStringStream str; - str << "PDiskId# " << (ui32)PDisk->PDiskId - << " Can not be formated! Reason# " << ev->Get()->ErrorStr - << " Switching to StateError. Config: " << Cfg->ToString(); - LOG_CRIT_S(*TlsActivationContext, NKikimrServices::BS_PDISK, str.Str()); - InitError(str.Str()); - } - } - - void CheckMagicSector(ui8 *magicData, ui32 magicDataSize) { - bool isFormatMagicValid = PDisk->IsFormatMagicValid(magicData, magicDataSize); - if (isFormatMagicValid) { - IsMagicAlreadyChecked = true; - IsFormattingNow = true; - // Stop PDiskThread but use PDisk object for creation of http pages - PDisk->Stop(); - *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::BootingDeviceFormattingAndTrimming; - PDisk->ErrorStr = "Magic sector is present on disk, now going to format device"; - LOG_WARN_S(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# " << PDisk->PDiskId << PDisk->ErrorStr); - - // Is used to pass parameters into formatting thread, because TThread can pass only void* + *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorDiskCannotBeFormated; + + PDisk->ErrorStr = ToString("Can not be formated! Reason# ") + ev->Get()->ErrorStr; + + TStringStream str; + str << "PDiskId# " << (ui32)PDisk->PDiskId + << " Can not be formated! Reason# " << ev->Get()->ErrorStr + << " Switching to StateError. Config: " << Cfg->ToString(); + LOG_CRIT_S(*TlsActivationContext, NKikimrServices::BS_PDISK, str.Str()); + InitError(str.Str()); + } + } + + void CheckMagicSector(ui8 *magicData, ui32 magicDataSize) { + bool isFormatMagicValid = PDisk->IsFormatMagicValid(magicData, magicDataSize); + if (isFormatMagicValid) { + IsMagicAlreadyChecked = true; + IsFormattingNow = true; + // Stop PDiskThread but use PDisk object for creation of http pages + PDisk->Stop(); + *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::BootingDeviceFormattingAndTrimming; + PDisk->ErrorStr = "Magic sector is present on disk, now going to format device"; + LOG_WARN_S(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# " << PDisk->PDiskId << PDisk->ErrorStr); + + // Is used to pass parameters into formatting thread, because TThread can pass only void* using TCookieType = std::tuple<TPDiskActor*, TActorSystem*, TActorId>; - FormattingThread.Reset(new TThread( - [] (void *cookie) -> void* { - auto params = static_cast<TCookieType*>(cookie); - TPDiskActor *actor = std::get<0>(*params); - TActorSystem *actorSystem = std::get<1>(*params); + FormattingThread.Reset(new TThread( + [] (void *cookie) -> void* { + auto params = static_cast<TCookieType*>(cookie); + TPDiskActor *actor = std::get<0>(*params); + TActorSystem *actorSystem = std::get<1>(*params); TActorId pDiskActor = std::get<2>(*params); - delete params; - - NPDisk::TKey chunkKey; - NPDisk::TKey logKey; - NPDisk::TKey sysLogKey; - EntropyPool().Read(&chunkKey, sizeof(NKikimr::NPDisk::TKey)); - EntropyPool().Read(&logKey, sizeof(NKikimr::NPDisk::TKey)); - EntropyPool().Read(&sysLogKey, sizeof(NKikimr::NPDisk::TKey)); - TPDiskConfig *cfg = actor->Cfg.Get(); - - try { - FormatPDisk(cfg->GetDevicePath(), 0, cfg->SectorSize, cfg->ChunkSize, + delete params; + + NPDisk::TKey chunkKey; + NPDisk::TKey logKey; + NPDisk::TKey sysLogKey; + EntropyPool().Read(&chunkKey, sizeof(NKikimr::NPDisk::TKey)); + EntropyPool().Read(&logKey, sizeof(NKikimr::NPDisk::TKey)); + EntropyPool().Read(&sysLogKey, sizeof(NKikimr::NPDisk::TKey)); + TPDiskConfig *cfg = actor->Cfg.Get(); + + try { + FormatPDisk(cfg->GetDevicePath(), 0, cfg->SectorSize, cfg->ChunkSize, cfg->PDiskGuid, chunkKey, logKey, sysLogKey, actor->MainKey, TString(), false, - cfg->FeatureFlags.GetTrimEntireDeviceOnStartup(), cfg->SectorMap); - actorSystem->Send(pDiskActor, new TEvPDiskFormattingFinished(true, "")); - } catch (yexception ex) { - LOG_ERROR_S(*actorSystem, NKikimrServices::BS_PDISK, "Formatting error, what#" << ex.what()); - actorSystem->Send(pDiskActor, new TEvPDiskFormattingFinished(false, ex.what())); - } - return nullptr; - }, - new TCookieType(this, TlsActivationContext->ActorSystem(), SelfId()))); - - FormattingThread->Start(); - } else { + cfg->FeatureFlags.GetTrimEntireDeviceOnStartup(), cfg->SectorMap); + actorSystem->Send(pDiskActor, new TEvPDiskFormattingFinished(true, "")); + } catch (yexception ex) { + LOG_ERROR_S(*actorSystem, NKikimrServices::BS_PDISK, "Formatting error, what#" << ex.what()); + actorSystem->Send(pDiskActor, new TEvPDiskFormattingFinished(false, ex.what())); + } + return nullptr; + }, + new TCookieType(this, TlsActivationContext->ActorSystem(), SelfId()))); + + FormattingThread->Start(); + } else { SecureWipeBuffer((ui8*)&MainKey, sizeof(MainKey)); *PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::InitialFormatReadError; - *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorPDiskCannotBeInitialised; - if (!IsMagicAlreadyChecked) { - PDisk->ErrorStr = "Format is incomplete. Magic sector is not present on disk. Maybe wrong PDiskKey"; - } else { - PDisk->ErrorStr = "Format is incomplete. Magic sector is present and new format was written"; - } + *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorPDiskCannotBeInitialised; + if (!IsMagicAlreadyChecked) { + PDisk->ErrorStr = "Format is incomplete. Magic sector is not present on disk. Maybe wrong PDiskKey"; + } else { + PDisk->ErrorStr = "Format is incomplete. Magic sector is present and new format was written"; + } TStringStream str; - str << "PDiskId# " << PDisk->PDiskId + str << "PDiskId# " << PDisk->PDiskId << " Can not be initialized! " << PDisk->ErrorStr - << " Hash(MainKey)# " << Cfg->HashedMainKey; - InitError(str.Str()); - str << " Config: " << Cfg->ToString(); - LOG_CRIT_S(*TlsActivationContext, NKikimrServices::BS_PDISK, str.Str()); - } - } - - void InitHandle(TEvReadFormatResult::TPtr &ev) { - ui8 *formatSectors = ev->Get()->FormatSectors.Get(); - ui32 formatSectorsSize = ev->Get()->FormatSectorsSize; - NSan::CheckMemIsInitialized(formatSectors, formatSectorsSize); + << " Hash(MainKey)# " << Cfg->HashedMainKey; + InitError(str.Str()); + str << " Config: " << Cfg->ToString(); + LOG_CRIT_S(*TlsActivationContext, NKikimrServices::BS_PDISK, str.Str()); + } + } + + void InitHandle(TEvReadFormatResult::TPtr &ev) { + ui8 *formatSectors = ev->Get()->FormatSectors.Get(); + ui32 formatSectorsSize = ev->Get()->FormatSectorsSize; + NSan::CheckMemIsInitialized(formatSectors, formatSectorsSize); bool isFormatOk = PDisk->ReadChunk0Format(formatSectors, MainKey); if (!isFormatOk) { - *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::BootingFormatMagicChecking; - PDisk->ErrorStr = "Format is not Ok, now checking for proper magic sector on disk"; - CheckMagicSector(formatSectors, formatSectorsSize); + *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::BootingFormatMagicChecking; + PDisk->ErrorStr = "Format is not Ok, now checking for proper magic sector on disk"; + CheckMagicSector(formatSectors, formatSectorsSize); } else { SecureWipeBuffer((ui8*)&MainKey, sizeof(MainKey)); // Format is read OK - LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# " << PDisk->PDiskId - << " Successfully read format record# " << PDisk->Format.ToString()); - TString info; - if (!PDisk->CheckGuid(&info)) { + LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# " << PDisk->PDiskId + << " Successfully read format record# " << PDisk->Format.ToString()); + TString info; + if (!PDisk->CheckGuid(&info)) { *PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::InitialFormatReadError; - *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorInitialFormatReadDueToGuid; - PDisk->ErrorStr = TStringBuilder() << "Can't start due to a guid error " << info; + PDisk->ErrorStr = TStringBuilder() << "Can't start due to a guid error " << info; TStringStream str; - str << "PDiskId# " << PDisk->PDiskId << PDisk->ErrorStr; - LOG_ERROR_S(*TlsActivationContext, NKikimrServices::BS_PDISK, str.Str()); - InitError(str.Str()); + str << "PDiskId# " << PDisk->PDiskId << PDisk->ErrorStr; + LOG_ERROR_S(*TlsActivationContext, NKikimrServices::BS_PDISK, str.Str()); + InitError(str.Str()); } else if (!PDisk->CheckFormatComplete()) { *PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::InitialFormatReadError; - *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorInitialFormatReadIncompleteFormat; - PDisk->ErrorStr = "Can't start due to incomplete format!"; + *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorInitialFormatReadIncompleteFormat; + PDisk->ErrorStr = "Can't start due to incomplete format!"; TStringStream str; - str << "PDiskId# " << PDisk->PDiskId << " " << PDisk->ErrorStr << " " + str << "PDiskId# " << PDisk->PDiskId << " " << PDisk->ErrorStr << " " << "Please, do not trun off your server or remove your storage device while formatting. " << "We are sure you did this or something even more creative, like killing the formatter."; - LOG_ERROR_S(*TlsActivationContext, NKikimrServices::BS_PDISK, str.Str()); - InitError(str.Str()); + LOG_ERROR_S(*TlsActivationContext, NKikimrServices::BS_PDISK, str.Str()); + InitError(str.Str()); } else { // PDisk GUID is OK and format is complete *PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::InitialSysLogRead; - *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::BootingSysLogRead; + *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::BootingSysLogRead; PDisk->Format.InitMagic(); - PDisk->ReadSysLog(SelfId()); + PDisk->ReadSysLog(SelfId()); } } } - void InitHandle(NPDisk::TEvReadLogResult::TPtr &ev) { - auto *request = PDisk->ReqCreator.CreateFromEv<TLogReadResultProcess>(ev, SelfId()); - PDisk->InputRequest(request); - } - - void InitHandle(NPDisk::TEvLogInitResult::TPtr &ev) { - const NPDisk::TEvLogInitResult &evLogInitResult = *ev->Get(); - PDisk->ErrorStr = evLogInitResult.ErrorStr; - if (evLogInitResult.IsInitializedGood) { - InitSuccess(); - } else { + void InitHandle(NPDisk::TEvReadLogResult::TPtr &ev) { + auto *request = PDisk->ReqCreator.CreateFromEv<TLogReadResultProcess>(ev, SelfId()); + PDisk->InputRequest(request); + } + + void InitHandle(NPDisk::TEvLogInitResult::TPtr &ev) { + const NPDisk::TEvLogInitResult &evLogInitResult = *ev->Get(); + PDisk->ErrorStr = evLogInitResult.ErrorStr; + if (evLogInitResult.IsInitializedGood) { + InitSuccess(); + } else { TStringStream str; str << "PDiskId# " << PDisk->PDiskId << " Can't start due to a log processing error! ErrorStr# \"" << evLogInitResult.ErrorStr << "\""; - LOG_ERROR_S(*TlsActivationContext, NKikimrServices::BS_PDISK, str.Str()); - InitError(str.Str()); + LOG_ERROR_S(*TlsActivationContext, NKikimrServices::BS_PDISK, str.Str()); + InitError(str.Str()); } } - void InitSuccess() { + void InitSuccess() { Become(&TThis::StateOnline); for (TList<TInitQueueItem>::iterator it = InitQueue.begin(); it != InitQueue.end(); ++it) { NPDisk::TEvYardInit evInit(it->OwnerRound, it->VDisk, it->PDiskGuid, it->CutLogId, it->WhiteboardProxyId, it->SlotId); - auto* request = PDisk->ReqCreator.CreateFromEv<TYardInit>(evInit, it->Sender); + auto* request = PDisk->ReqCreator.CreateFromEv<TYardInit>(evInit, it->Sender); PDisk->InputRequest(request); } InitQueue.clear(); - if (ControledStartResult) { - TlsActivationContext->Send(ControledStartResult.Release()); - } + if (ControledStartResult) { + TlsActivationContext->Send(ControledStartResult.Release()); + } } - void InitHandle(NPDisk::TEvYardInit::TPtr &ev) { + void InitHandle(NPDisk::TEvYardInit::TPtr &ev) { const NPDisk::TEvYardInit &evYardInit = *ev->Get(); InitQueue.emplace_back(evYardInit.OwnerRound, evYardInit.VDisk, evYardInit.PDiskGuid, ev->Sender, evYardInit.CutLogID, evYardInit.WhiteboardProxyId, evYardInit.SlotId); } - void InitHandle(NPDisk::TEvYardControl::TPtr &ev) { - - const NPDisk::TEvYardControl &evControl = *ev->Get(); - switch (evControl.Action) { - case TEvYardControl::PDiskStart: - ControledStartResult = MakeHolder<IEventHandle>(ev->Sender, SelfId(), - new TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {})); - break; - default: - Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::CORRUPTED, evControl.Cookie, - "Unexpected control action for pdisk in StateInit")); - PDisk->Mon.YardControl.CountResponse(); - break; - } - - } - - void InitHandle(NPDisk::TEvSlay::TPtr &ev) { + void InitHandle(NPDisk::TEvYardControl::TPtr &ev) { + + const NPDisk::TEvYardControl &evControl = *ev->Get(); + switch (evControl.Action) { + case TEvYardControl::PDiskStart: + ControledStartResult = MakeHolder<IEventHandle>(ev->Sender, SelfId(), + new TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {})); + break; + default: + Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::CORRUPTED, evControl.Cookie, + "Unexpected control action for pdisk in StateInit")); + PDisk->Mon.YardControl.CountResponse(); + break; + } + + } + + void InitHandle(NPDisk::TEvSlay::TPtr &ev) { const NPDisk::TEvSlay &evSlay = *ev->Get(); PDisk->Mon.YardSlay.CountRequest(); TStringStream str; str << "PDiskId# " << (ui32)PDisk->PDiskId << " is still initializing, please wait"; - Send(ev->Sender, new NPDisk::TEvSlayResult(NKikimrProto::NOTREADY, 0, + Send(ev->Sender, new NPDisk::TEvSlayResult(NKikimrProto::NOTREADY, 0, evSlay.VDiskId, evSlay.SlayOwnerRound, evSlay.PDiskId, evSlay.VSlotId, str.Str())); PDisk->Mon.YardSlay.CountResponse(); } @@ -506,19 +506,19 @@ public: //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Error state - void ErrorHandle(NPDisk::TEvYardInit::TPtr &ev) { + void ErrorHandle(NPDisk::TEvYardInit::TPtr &ev) { PDisk->Mon.YardInit.CountRequest(); - Send(ev->Sender, new NPDisk::TEvYardInitResult(NKikimrProto::CORRUPTED, StateErrorReason)); + Send(ev->Sender, new NPDisk::TEvYardInitResult(NKikimrProto::CORRUPTED, StateErrorReason)); PDisk->Mon.YardInit.CountResponse(); } - void ErrorHandle(NPDisk::TEvCheckSpace::TPtr &ev) { - PDisk->Mon.CheckSpace.CountRequest(); - Send(ev->Sender, new NPDisk::TEvCheckSpaceResult(NKikimrProto::CORRUPTED, 0, 0, 0, 0, StateErrorReason)); - PDisk->Mon.CheckSpace.CountResponse(); + void ErrorHandle(NPDisk::TEvCheckSpace::TPtr &ev) { + PDisk->Mon.CheckSpace.CountRequest(); + Send(ev->Sender, new NPDisk::TEvCheckSpaceResult(NKikimrProto::CORRUPTED, 0, 0, 0, 0, StateErrorReason)); + PDisk->Mon.CheckSpace.CountResponse(); } - void ErrorHandle(NPDisk::TEvLog::TPtr &ev) { + void ErrorHandle(NPDisk::TEvLog::TPtr &ev) { const NPDisk::TEvLog &evLog = *ev->Get(); TStringStream str; str << "PDiskId# " << PDisk->PDiskId; @@ -531,14 +531,14 @@ public: str << "Unknown, something went very wrong in PDisk. Marker# BSY06"; } str << " StateErrorReason# " << StateErrorReason; - THolder<NPDisk::TEvLogResult> result(new NPDisk::TEvLogResult(NKikimrProto::CORRUPTED, 0, str.Str())); + THolder<NPDisk::TEvLogResult> result(new NPDisk::TEvLogResult(NKikimrProto::CORRUPTED, 0, str.Str())); result->Results.push_back(NPDisk::TEvLogResult::TRecord(evLog.Lsn, evLog.Cookie)); PDisk->Mon.WriteLog.CountRequest(0); - Send(ev->Sender, result.Release()); + Send(ev->Sender, result.Release()); PDisk->Mon.WriteLog.CountResponse(); } - void ErrorHandle(NPDisk::TEvMultiLog::TPtr &ev) { + void ErrorHandle(NPDisk::TEvMultiLog::TPtr &ev) { const NPDisk::TEvMultiLog &evMultiLog = *ev->Get(); TStringStream str; str << "PDiskId# " << PDisk->PDiskId; @@ -556,11 +556,11 @@ public: result->Results.push_back(NPDisk::TEvLogResult::TRecord(log->Lsn, log->Cookie)); } PDisk->Mon.WriteLog.CountRequest(0); - Send(ev->Sender, result.Release()); + Send(ev->Sender, result.Release()); PDisk->Mon.WriteLog.CountResponse(); } - void ErrorHandle(NPDisk::TEvReadLog::TPtr &ev) { + void ErrorHandle(NPDisk::TEvReadLog::TPtr &ev) { const NPDisk::TEvReadLog &evReadLog = *ev->Get(); TStringStream str; str << "PDiskId# " << PDisk->PDiskId; @@ -573,93 +573,93 @@ public: str << "Unknown, something went very wrong in PDisk. Marker# BSY03"; } THolder<NPDisk::TEvReadLogResult> result(new NPDisk::TEvReadLogResult( - NKikimrProto::CORRUPTED, evReadLog.Position, evReadLog.Position, true, 0, str.Str(), evReadLog.Owner)); - PDisk->Mon.LogRead.CountRequest(); - Send(ev->Sender, result.Release()); - PDisk->Mon.LogRead.CountResponse(); + NKikimrProto::CORRUPTED, evReadLog.Position, evReadLog.Position, true, 0, str.Str(), evReadLog.Owner)); + PDisk->Mon.LogRead.CountRequest(); + Send(ev->Sender, result.Release()); + PDisk->Mon.LogRead.CountResponse(); } - void ErrorHandle(NPDisk::TEvChunkWrite::TPtr &ev) { + void ErrorHandle(NPDisk::TEvChunkWrite::TPtr &ev) { const NPDisk::TEvChunkWrite &evChunkWrite = *ev->Get(); PDisk->Mon.GetWriteCounter(evChunkWrite.PriorityClass)->CountRequest(0); PDisk->Mon.GetWriteCounter(evChunkWrite.PriorityClass)->CountResponse(); - Send(ev->Sender, new NPDisk::TEvChunkWriteResult(NKikimrProto::CORRUPTED, + Send(ev->Sender, new NPDisk::TEvChunkWriteResult(NKikimrProto::CORRUPTED, evChunkWrite.ChunkIdx, evChunkWrite.Cookie, 0, StateErrorReason)); } - void ErrorHandle(NPDisk::TEvChunkRead::TPtr &ev) { + void ErrorHandle(NPDisk::TEvChunkRead::TPtr &ev) { const NPDisk::TEvChunkRead &evChunkRead = *ev->Get(); PDisk->Mon.GetReadCounter(evChunkRead.PriorityClass)->CountRequest(0); THolder<NPDisk::TEvChunkReadResult> result = MakeHolder<NPDisk::TEvChunkReadResult>(NKikimrProto::CORRUPTED, - evChunkRead.ChunkIdx, evChunkRead.Offset, evChunkRead.Cookie, 0, "PDisk is in error state"); - LOG_DEBUG(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " %s To: %" PRIu64 " Marker# BSY02", + evChunkRead.ChunkIdx, evChunkRead.Offset, evChunkRead.Cookie, 0, "PDisk is in error state"); + LOG_DEBUG(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " %s To: %" PRIu64 " Marker# BSY02", (ui32)PDisk->PDiskId, result->ToString().c_str(), (ui64)ev->Sender.LocalId()); - Send(ev->Sender, result.Release()); + Send(ev->Sender, result.Release()); PDisk->Mon.GetReadCounter(evChunkRead.PriorityClass)->CountResponse(); } - void ErrorHandle(NPDisk::TEvHarakiri::TPtr &ev) { - PDisk->Mon.Harakiri.CountRequest(); - Send(ev->Sender, new NPDisk::TEvHarakiriResult(NKikimrProto::CORRUPTED, 0, StateErrorReason)); - PDisk->Mon.Harakiri.CountResponse(); + void ErrorHandle(NPDisk::TEvHarakiri::TPtr &ev) { + PDisk->Mon.Harakiri.CountRequest(); + Send(ev->Sender, new NPDisk::TEvHarakiriResult(NKikimrProto::CORRUPTED, 0, StateErrorReason)); + PDisk->Mon.Harakiri.CountResponse(); } - void ErrorHandle(NPDisk::TEvSlay::TPtr &ev) { + void ErrorHandle(NPDisk::TEvSlay::TPtr &ev) { const NPDisk::TEvSlay &evSlay = *ev->Get(); PDisk->Mon.YardSlay.CountRequest(); TStringStream str; str << "PDiskId# " << (ui32)PDisk->PDiskId << " is in error state."; - Send(ev->Sender, new NPDisk::TEvSlayResult(NKikimrProto::CORRUPTED, 0, + Send(ev->Sender, new NPDisk::TEvSlayResult(NKikimrProto::CORRUPTED, 0, evSlay.VDiskId, evSlay.SlayOwnerRound, evSlay.PDiskId, evSlay.VSlotId, str.Str())); PDisk->Mon.YardSlay.CountResponse(); } - void ErrorHandle(NPDisk::TEvChunkReserve::TPtr &ev) { - PDisk->Mon.ChunkReserve.CountRequest(); - Send(ev->Sender, new NPDisk::TEvChunkReserveResult(NKikimrProto::CORRUPTED, 0, StateErrorReason)); - PDisk->Mon.ChunkReserve.CountResponse(); + void ErrorHandle(NPDisk::TEvChunkReserve::TPtr &ev) { + PDisk->Mon.ChunkReserve.CountRequest(); + Send(ev->Sender, new NPDisk::TEvChunkReserveResult(NKikimrProto::CORRUPTED, 0, StateErrorReason)); + PDisk->Mon.ChunkReserve.CountResponse(); } - void ErrorHandle(NPDisk::TEvYardControl::TPtr &ev) { + void ErrorHandle(NPDisk::TEvYardControl::TPtr &ev) { const NPDisk::TEvYardControl &evControl = *ev->Get(); - Y_VERIFY(PDisk); - + Y_VERIFY(PDisk); + PDisk->Mon.YardControl.CountRequest(); - - switch (evControl.Action) { - case TEvYardControl::PDiskStart: - { + + switch (evControl.Action) { + case TEvYardControl::PDiskStart: + { auto *mainKey = static_cast<const NPDisk::TKey*>(evControl.Cookie); Y_VERIFY(mainKey); MainKey = *mainKey; - StartPDiskThread(); - ControledStartResult = MakeHolder<IEventHandle>(ev->Sender, SelfId(), - new TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {})); - break; - } - default: - Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::CORRUPTED, evControl.Cookie, StateErrorReason)); - PDisk->Mon.YardControl.CountResponse(); - break; - } + StartPDiskThread(); + ControledStartResult = MakeHolder<IEventHandle>(ev->Sender, SelfId(), + new TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {})); + break; + } + default: + Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::CORRUPTED, evControl.Cookie, StateErrorReason)); + PDisk->Mon.YardControl.CountResponse(); + break; + } } - void ErrorHandle(NPDisk::TEvAskForCutLog::TPtr &ev) { + void ErrorHandle(NPDisk::TEvAskForCutLog::TPtr &ev) { // Just ignore the event, can't send cut log in this state. Y_UNUSED(ev); } - + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Online state - void Handle(NPDisk::TEvYardInit::TPtr &ev) { - auto* request = PDisk->ReqCreator.CreateFromEv<TYardInit>(*ev->Get(), ev->Sender); + void Handle(NPDisk::TEvYardInit::TPtr &ev) { + auto* request = PDisk->ReqCreator.CreateFromEv<TYardInit>(*ev->Get(), ev->Sender); PDisk->InputRequest(request); } - void Handle(NPDisk::TEvCheckSpace::TPtr &ev) { - auto* request = PDisk->ReqCreator.CreateFromEv<TCheckSpace>(*ev->Get(), ev->Sender); + void Handle(NPDisk::TEvCheckSpace::TPtr &ev) { + auto* request = PDisk->ReqCreator.CreateFromEv<TCheckSpace>(*ev->Get(), ev->Sender); PDisk->InputRequest(request); } @@ -668,15 +668,15 @@ public: Y_UNUSED(burstMs); } - void Handle(NPDisk::TEvLog::TPtr &ev) { + void Handle(NPDisk::TEvLog::TPtr &ev) { double burstMs; - TLogWrite* request = PDisk->ReqCreator.CreateLogWrite(*ev->Get(), ev->Sender, burstMs, std::move(ev->TraceId)); + TLogWrite* request = PDisk->ReqCreator.CreateLogWrite(*ev->Get(), ev->Sender, burstMs, std::move(ev->TraceId)); CheckBurst(request->IsSensitive, burstMs); - request->Orbit = std::move(ev->Get()->Orbit); - PDisk->InputRequest(request); + request->Orbit = std::move(ev->Get()->Orbit); + PDisk->InputRequest(request); } - void Handle(NPDisk::TEvMultiLog::TPtr &ev) { + void Handle(NPDisk::TEvMultiLog::TPtr &ev) { for (auto &log : ev->Get()->Logs) { double burstMs; TLogWrite* request = PDisk->ReqCreator.CreateLogWrite(*log, ev->Sender, burstMs, std::move(ev->TraceId)); @@ -686,91 +686,91 @@ public: } } - void Handle(NPDisk::TEvReadLog::TPtr &ev) { - LOG_DEBUG(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " %s Marker# BSY01", + void Handle(NPDisk::TEvReadLog::TPtr &ev) { + LOG_DEBUG(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " %s Marker# BSY01", (ui32)PDisk->PDiskId, ev->Get()->ToString().c_str()); double burstMs; - auto* request = PDisk->ReqCreator.CreateFromEv<TLogRead>(*ev->Get(), ev->Sender, &burstMs); + auto* request = PDisk->ReqCreator.CreateFromEv<TLogRead>(*ev->Get(), ev->Sender, &burstMs); CheckBurst(request->IsSensitive, burstMs); - PDisk->InputRequest(request); + PDisk->InputRequest(request); } - void Handle(NPDisk::TEvChunkWrite::TPtr &ev) { + void Handle(NPDisk::TEvChunkWrite::TPtr &ev) { double burstMs; - TChunkWrite* request = PDisk->ReqCreator.CreateChunkWrite(*ev->Get(), ev->Sender, burstMs, std::move(ev->TraceId)); + TChunkWrite* request = PDisk->ReqCreator.CreateChunkWrite(*ev->Get(), ev->Sender, burstMs, std::move(ev->TraceId)); CheckBurst(request->IsSensitive, burstMs); - PDisk->InputRequest(request); + PDisk->InputRequest(request); } - void Handle(NPDisk::TEvChunkRead::TPtr &ev) { + void Handle(NPDisk::TEvChunkRead::TPtr &ev) { double burstMs; - TChunkRead* request = PDisk->ReqCreator.CreateChunkRead(*ev->Get(), ev->Sender, burstMs, std::move(ev->TraceId)); + TChunkRead* request = PDisk->ReqCreator.CreateChunkRead(*ev->Get(), ev->Sender, burstMs, std::move(ev->TraceId)); CheckBurst(request->IsSensitive, burstMs); - PDisk->InputRequest(request); + PDisk->InputRequest(request); + } + + void Handle(NPDisk::TEvHarakiri::TPtr &ev) { + auto* request = PDisk->ReqCreator.CreateFromEv<THarakiri>(*ev->Get(), ev->Sender); + PDisk->InputRequest(request); } - void Handle(NPDisk::TEvHarakiri::TPtr &ev) { - auto* request = PDisk->ReqCreator.CreateFromEv<THarakiri>(*ev->Get(), ev->Sender); + void Handle(NPDisk::TEvSlay::TPtr &ev) { + auto* request = PDisk->ReqCreator.CreateFromEv<TSlay>(*ev->Get(), ev->Sender); PDisk->InputRequest(request); } - void Handle(NPDisk::TEvSlay::TPtr &ev) { - auto* request = PDisk->ReqCreator.CreateFromEv<TSlay>(*ev->Get(), ev->Sender); + void Handle(NPDisk::TEvChunkReserve::TPtr &ev) { + auto* request = PDisk->ReqCreator.CreateFromEv<TChunkReserve>(*ev->Get(), ev->Sender); PDisk->InputRequest(request); } - void Handle(NPDisk::TEvChunkReserve::TPtr &ev) { - auto* request = PDisk->ReqCreator.CreateFromEv<TChunkReserve>(*ev->Get(), ev->Sender); + void Handle(NPDisk::TEvChunksLock::TPtr &ev) { + auto* request = PDisk->ReqCreator.CreateFromEv<TChunksLock>(*ev->Get(), ev->Sender); PDisk->InputRequest(request); } - void Handle(NPDisk::TEvChunksLock::TPtr &ev) { - auto* request = PDisk->ReqCreator.CreateFromEv<TChunksLock>(*ev->Get(), ev->Sender); - PDisk->InputRequest(request); - } - - void Handle(NPDisk::TEvChunksUnlock::TPtr &ev) { - auto* request = PDisk->ReqCreator.CreateFromEv<TChunksUnlock>(*ev->Get(), ev->Sender); - PDisk->InputRequest(request); - } - - void Handle(NPDisk::TEvYardControl::TPtr &ev) { + void Handle(NPDisk::TEvChunksUnlock::TPtr &ev) { + auto* request = PDisk->ReqCreator.CreateFromEv<TChunksUnlock>(*ev->Get(), ev->Sender); + PDisk->InputRequest(request); + } + + void Handle(NPDisk::TEvYardControl::TPtr &ev) { const NPDisk::TEvYardControl &evControl = *ev->Get(); - switch (evControl.Action) { - case TEvYardControl::Brake: - InitError("Received TEvYardControl::Brake"); - Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {})); - break; - case TEvYardControl::PDiskStop: - PDisk->Stop(); - InitError("Received TEvYardControl::PDiskStop"); - Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {})); - break; - case TEvYardControl::GetPDiskPointer: - Y_VERIFY(!evControl.Cookie); - Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, PDisk.Get(), {})); - break; - case TEvYardControl::PDiskStart: - Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, nullptr, {})); - break; - default: - auto* request = PDisk->ReqCreator.CreateFromEv<TYardControl>(evControl, ev->Sender); + switch (evControl.Action) { + case TEvYardControl::Brake: + InitError("Received TEvYardControl::Brake"); + Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {})); + break; + case TEvYardControl::PDiskStop: + PDisk->Stop(); + InitError("Received TEvYardControl::PDiskStop"); + Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {})); + break; + case TEvYardControl::GetPDiskPointer: + Y_VERIFY(!evControl.Cookie); + Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, PDisk.Get(), {})); + break; + case TEvYardControl::PDiskStart: + Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, nullptr, {})); + break; + default: + auto* request = PDisk->ReqCreator.CreateFromEv<TYardControl>(evControl, ev->Sender); PDisk->InputRequest(request); - break; + break; } } - void Handle(NPDisk::TEvAskForCutLog::TPtr &ev) { - auto* request = PDisk->ReqCreator.CreateFromEv<TAskForCutLog>(*ev->Get(), ev->Sender); + void Handle(NPDisk::TEvAskForCutLog::TPtr &ev) { + auto* request = PDisk->ReqCreator.CreateFromEv<TAskForCutLog>(*ev->Get(), ev->Sender); PDisk->InputRequest(request); } - void Handle(NPDisk::TEvConfigureScheduler::TPtr &ev) { - LOG_INFO_S(*TlsActivationContext, NKikimrServices::BS_PDISK, - "PDiskId# " << (ui32)PDisk->PDiskId << " " << ev->Get()->ToString()); + void Handle(NPDisk::TEvConfigureScheduler::TPtr &ev) { + LOG_INFO_S(*TlsActivationContext, NKikimrServices::BS_PDISK, + "PDiskId# " << (ui32)PDisk->PDiskId << " " << ev->Get()->ToString()); PDisk->Mon.YardConfigureScheduler.CountRequest(); // Configure forseti scheduler weights - auto* request = PDisk->ReqCreator.CreateFromEv<TConfigureScheduler>(*ev->Get(), ev->Sender); + auto* request = PDisk->ReqCreator.CreateFromEv<TConfigureScheduler>(*ev->Get(), ev->Sender); PDisk->InputRequest(request); } @@ -778,152 +778,152 @@ public: //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // All states - void HandlePoison() { - ui32 pdiskId = PDisk->PDiskId; - PDisk.Reset(); - PassAway(); - LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# " << pdiskId - << " HandlePoison, PDiskThread stopped"); + void HandlePoison() { + ui32 pdiskId = PDisk->PDiskId; + PDisk.Reset(); + PassAway(); + LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# " << pdiskId + << " HandlePoison, PDiskThread stopped"); } - void HandleWakeup() { - Schedule(TDuration::MilliSeconds(Cfg->StatisticsUpdateIntervalMs), new TEvents::TEvWakeup()); + void HandleWakeup() { + Schedule(TDuration::MilliSeconds(Cfg->StatisticsUpdateIntervalMs), new TEvents::TEvWakeup()); TCpuTimer timer; PDisk->Mon.UpdatePercentileTrackers(); PDisk->Mon.UpdateLights(); - const bool halt = PDisk->Mon.UpdateDeviceHaltCounters(); + const bool halt = PDisk->Mon.UpdateDeviceHaltCounters(); PDisk->Mon.UpdateStats(); ui64 updatePercentileTrackersCycles = timer.Elapsed(); - if (halt) { - Send(SelfId(), new TEvDeviceError("device halt too long")); - } - - TEvWhiteboardReportResult *response = new TEvWhiteboardReportResult(); + if (halt) { + Send(SelfId(), new TEvDeviceError("device halt too long")); + } + + TEvWhiteboardReportResult *response = new TEvWhiteboardReportResult(); response->PDiskState = MakeHolder<NNodeWhiteboard::TEvWhiteboard::TEvPDiskStateUpdate>(); - response->VDiskStateVect.reserve(16); // Pessimistic upper estimate of a number of owners - THolder<TWhiteboardReport> request(PDisk->ReqCreator.CreateFromArgs<TWhiteboardReport>(SelfId(), response)); - ui64 whiteboardReportCycles = 0; - ui64 updateSchedulerCycles = 0; - if (!IsFormattingNow && AtomicGet(PDisk->IsStarted)) { + response->VDiskStateVect.reserve(16); // Pessimistic upper estimate of a number of owners + THolder<TWhiteboardReport> request(PDisk->ReqCreator.CreateFromArgs<TWhiteboardReport>(SelfId(), response)); + ui64 whiteboardReportCycles = 0; + ui64 updateSchedulerCycles = 0; + if (!IsFormattingNow && AtomicGet(PDisk->IsStarted)) { PDisk->InputRequest(request.Release()); - // Update the current scheduler - whiteboardReportCycles = timer.Elapsed(); - updateSchedulerCycles = 0; - } else { - PDisk->WhiteboardReport(*request); // Send TEvWhiteboardReportResult inside + // Update the current scheduler + whiteboardReportCycles = timer.Elapsed(); + updateSchedulerCycles = 0; + } else { + PDisk->WhiteboardReport(*request); // Send TEvWhiteboardReportResult inside } - + LWPROBE(PDiskHandleWakeup, PDisk->PDiskId, HPMilliSecondsFloat(updatePercentileTrackersCycles), HPMilliSecondsFloat(whiteboardReportCycles), HPMilliSecondsFloat(updateSchedulerCycles)); } - void Handle(NPDisk::TEvWhiteboardReportResult::TPtr &ev) { - NPDisk::TEvWhiteboardReportResult *result = ev->Get(); - LOG_TRACE_S(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDisk->PDiskId + void Handle(NPDisk::TEvWhiteboardReportResult::TPtr &ev) { + NPDisk::TEvWhiteboardReportResult *result = ev->Get(); + LOG_TRACE_S(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDisk->PDiskId << " handle TEvWhiteboardReportResult# " << result->ToString()); - Send(NodeWhiteboardServiceId, result->PDiskState.Release()); + Send(NodeWhiteboardServiceId, result->PDiskState.Release()); for (auto& p : result->VDiskStateVect) { - Send(std::get<0>(p), - new NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateUpdate(std::move(std::get<1>(p)))); + Send(std::get<0>(p), + new NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateUpdate(std::move(std::get<1>(p)))); + } + if (result->DiskMetrics) { + Send(NodeWardenServiceId, result->DiskMetrics.Release()); } - if (result->DiskMetrics) { - Send(NodeWardenServiceId, result->DiskMetrics.Release()); - } bool sendFlags = false; RealtimeFlag.Update(sendFlags); DeviceFlag.Update(sendFlags); AtomicSet(PDisk->NonRealTimeMs, RealtimeFlag.GetRedMsPs()); AtomicSet(PDisk->SlowDeviceMs, DeviceFlag.GetRedMsPs()); if (sendFlags) { - Send(NodeWhiteboardServiceId, new NNodeWhiteboard::TEvWhiteboard::TEvPDiskStateUpdate( + Send(NodeWhiteboardServiceId, new NNodeWhiteboard::TEvWhiteboard::TEvPDiskStateUpdate( PDisk->PDiskId, RealtimeFlag.Get(), DeviceFlag.Get())); } } - void Handle(NPDisk::TEvDeviceError::TPtr &ev) { - LOG_ERROR_S(*TlsActivationContext, NKikimrServices::BS_PDISK, - "Actor recieved device error, info# " << ev->Get()->Info); - *PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::DeviceIoError; - *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorDeviceIoError; - PDisk->ErrorStr = ev->Get()->Info; - InitError("io error"); - } - - void Handle(TEvBlobStorage::TEvRestartPDisk::TPtr &ev) { - if (CurrentStateFunc() == &TPDiskActor::StateInit - || CurrentStateFunc() == &TPDiskActor::StateOnline && !Cfg->SectorMap) { - Send(ev->Sender, new TEvBlobStorage::TEvRestartPDiskResult(PDisk->PDiskId, - NKikimrProto::EReplyStatus::ERROR)); - return; - } - + void Handle(NPDisk::TEvDeviceError::TPtr &ev) { + LOG_ERROR_S(*TlsActivationContext, NKikimrServices::BS_PDISK, + "Actor recieved device error, info# " << ev->Get()->Info); + *PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::DeviceIoError; + *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorDeviceIoError; + PDisk->ErrorStr = ev->Get()->Info; + InitError("io error"); + } + + void Handle(TEvBlobStorage::TEvRestartPDisk::TPtr &ev) { + if (CurrentStateFunc() == &TPDiskActor::StateInit + || CurrentStateFunc() == &TPDiskActor::StateOnline && !Cfg->SectorMap) { + Send(ev->Sender, new TEvBlobStorage::TEvRestartPDiskResult(PDisk->PDiskId, + NKikimrProto::EReplyStatus::ERROR)); + return; + } + MainKey = ev->Get()->MainKey; SecureWipeBuffer((ui8*)&ev->Get()->MainKey, sizeof(ev->Get()->MainKey)); - LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# " << PDisk->PDiskId - << " Going to restart PDisk since recieved TEvRestartPDisk"); - PDisk->Stop(); - - auto& newCfg = ev->Get()->Config; - if (newCfg) { - Y_VERIFY_S(Cfg->PDiskId == PDisk->PDiskId, - "New config's PDiskId# " << newCfg->PDiskId << " is not equal to real PDiskId# " << PDisk->PDiskId); - Cfg = std::move(newCfg); - } - StartPDiskThread(); - - Send(ev->Sender, new TEvBlobStorage::TEvRestartPDiskResult(PDisk->PDiskId)); - if (PendingRestartResponse) { - PendingRestartResponse(); - PendingRestartResponse = {}; - } - } - - void Handle(NMon::TEvHttpInfo::TPtr &ev) { + LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::BS_PDISK, "PDiskId# " << PDisk->PDiskId + << " Going to restart PDisk since recieved TEvRestartPDisk"); + PDisk->Stop(); + + auto& newCfg = ev->Get()->Config; + if (newCfg) { + Y_VERIFY_S(Cfg->PDiskId == PDisk->PDiskId, + "New config's PDiskId# " << newCfg->PDiskId << " is not equal to real PDiskId# " << PDisk->PDiskId); + Cfg = std::move(newCfg); + } + StartPDiskThread(); + + Send(ev->Sender, new TEvBlobStorage::TEvRestartPDiskResult(PDisk->PDiskId)); + if (PendingRestartResponse) { + PendingRestartResponse(); + PendingRestartResponse = {}; + } + } + + void Handle(NMon::TEvHttpInfo::TPtr &ev) { const TCgiParameters &cgi = ev->Get()->Request.GetPostParams(); - if (cgi.Has("chunksLockByRange")) { - ui32 begin = strtoul(cgi.Get("chunksLockBegin").c_str(), nullptr, 10); - ui32 end = strtoul(cgi.Get("chunksLockEnd").c_str(), nullptr, 10); - TEvChunksLock evLock(true, begin, end, 0); - auto* request = PDisk->ReqCreator.CreateFromEv<TChunksLock>(evLock, ev->Sender); - PDisk->InputRequest(request); - } else if (cgi.Has("chunksLockByCount")) { - ui32 begin = strtoul(cgi.Get("chunksLockBegin").c_str(), nullptr, 10); - ui32 count = strtoul(cgi.Get("chunksLockCount").c_str(), nullptr, 10); - TEvChunksLock evLock(false, begin, 0, count); - auto* request = PDisk->ReqCreator.CreateFromEv<TChunksLock>(evLock , ev->Sender); - PDisk->InputRequest(request); - } else if (cgi.Has("chunksUnlock")) { - auto* request = PDisk->ReqCreator.CreateFromEv<TChunksUnlock>(NPDisk::TEvChunksUnlock(), ev->Sender); - PDisk->InputRequest(request); - } else if (cgi.Has("restartPDisk")) { - if (Cfg->SectorMap || CurrentStateFunc() == &TPDiskActor::StateError) { - Send(NodeWardenServiceId, new TEvBlobStorage::TEvAskRestartPDisk(PDisk->PDiskId)); - // Send responce later when restart command will be received - PendingRestartResponse = [this, actor = ev->Sender] () { - Send(actor, new NMon::TEvHttpInfoRes("")); - }; - return; - } - } else if (cgi.Has("stopPDisk")) { - if (Cfg->SectorMap) { - *PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::DeviceIoError; - *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorFake; - - PDisk->Stop(); - InitError("Received Stop from web UI"); - } - Send(ev->Sender, new NMon::TEvHttpInfoRes("")); - return; - } - + if (cgi.Has("chunksLockByRange")) { + ui32 begin = strtoul(cgi.Get("chunksLockBegin").c_str(), nullptr, 10); + ui32 end = strtoul(cgi.Get("chunksLockEnd").c_str(), nullptr, 10); + TEvChunksLock evLock(true, begin, end, 0); + auto* request = PDisk->ReqCreator.CreateFromEv<TChunksLock>(evLock, ev->Sender); + PDisk->InputRequest(request); + } else if (cgi.Has("chunksLockByCount")) { + ui32 begin = strtoul(cgi.Get("chunksLockBegin").c_str(), nullptr, 10); + ui32 count = strtoul(cgi.Get("chunksLockCount").c_str(), nullptr, 10); + TEvChunksLock evLock(false, begin, 0, count); + auto* request = PDisk->ReqCreator.CreateFromEv<TChunksLock>(evLock , ev->Sender); + PDisk->InputRequest(request); + } else if (cgi.Has("chunksUnlock")) { + auto* request = PDisk->ReqCreator.CreateFromEv<TChunksUnlock>(NPDisk::TEvChunksUnlock(), ev->Sender); + PDisk->InputRequest(request); + } else if (cgi.Has("restartPDisk")) { + if (Cfg->SectorMap || CurrentStateFunc() == &TPDiskActor::StateError) { + Send(NodeWardenServiceId, new TEvBlobStorage::TEvAskRestartPDisk(PDisk->PDiskId)); + // Send responce later when restart command will be received + PendingRestartResponse = [this, actor = ev->Sender] () { + Send(actor, new NMon::TEvHttpInfoRes("")); + }; + return; + } + } else if (cgi.Has("stopPDisk")) { + if (Cfg->SectorMap) { + *PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::DeviceIoError; + *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorFake; + + PDisk->Stop(); + InitError("Received Stop from web UI"); + } + Send(ev->Sender, new NMon::TEvHttpInfoRes("")); + return; + } + bool doGetSchedule = false; const auto& httpRequest = ev->Get()->Request; if (httpRequest.GetMethod() == HTTP_METHOD_GET) { @@ -945,131 +945,131 @@ public: } } - TStringStream deviceFlagStr; - DeviceFlag.Render(deviceFlagStr); - TStringStream realtimeFlagStr; - RealtimeFlag.Render(realtimeFlagStr); - TStringStream fairSchedulerStr; - TStringStream outStr; - outStr.Reserve(512 << 10); - - THolder<THttpInfo> req(PDisk->ReqCreator.CreateFromArgs<THttpInfo>(SelfId(), ev->Sender, outStr, - deviceFlagStr.Str(), realtimeFlagStr.Str(), fairSchedulerStr.Str(), PDisk->ErrorStr, doGetSchedule)); - if (AtomicGet(PDisk->IsStarted)) { - PDisk->InputRequest(req.Release()); - } else { - PDisk->HttpInfo(*req); // Sends TEvHttpInfoResult inside - } + TStringStream deviceFlagStr; + DeviceFlag.Render(deviceFlagStr); + TStringStream realtimeFlagStr; + RealtimeFlag.Render(realtimeFlagStr); + TStringStream fairSchedulerStr; + TStringStream outStr; + outStr.Reserve(512 << 10); + + THolder<THttpInfo> req(PDisk->ReqCreator.CreateFromArgs<THttpInfo>(SelfId(), ev->Sender, outStr, + deviceFlagStr.Str(), realtimeFlagStr.Str(), fairSchedulerStr.Str(), PDisk->ErrorStr, doGetSchedule)); + if (AtomicGet(PDisk->IsStarted)) { + PDisk->InputRequest(req.Release()); + } else { + PDisk->HttpInfo(*req); // Sends TEvHttpInfoResult inside + } + } + + void Handle(NPDisk::TEvHttpInfoResult::TPtr &ev) { + NPDisk::TEvHttpInfoResult *result = ev->Get(); + Send(result->EndCustomer, result->HttpInfoRes.Release()); } - void Handle(NPDisk::TEvHttpInfoResult::TPtr &ev) { - NPDisk::TEvHttpInfoResult *result = ev->Get(); - Send(result->EndCustomer, result->HttpInfoRes.Release()); + void Handle(NPDisk::TEvReadLogContinue::TPtr &ev) { + auto *request = PDisk->ReqCreator.CreateFromEv<TLogReadContinue>(*ev->Get(), SelfId()); + PDisk->InputRequest(request); + } + + void Handle(NPDisk::TEvLogSectorRestore::TPtr &ev) { + auto *request = PDisk->ReqCreator.CreateFromEv<TLogSectorRestore>(*ev->Get(), SelfId()); + PDisk->InputRequest(request); + } + + void Handle(TEvents::TEvUndelivered::TPtr &ev) { + auto sender = ev->Sender; + TRequestBase *request = PDisk->ReqCreator.CreateFromEv<TUndelivered>(std::move(ev), sender); + PDisk->InputRequest(request); } - void Handle(NPDisk::TEvReadLogContinue::TPtr &ev) { - auto *request = PDisk->ReqCreator.CreateFromEv<TLogReadContinue>(*ev->Get(), SelfId()); - PDisk->InputRequest(request); - } - - void Handle(NPDisk::TEvLogSectorRestore::TPtr &ev) { - auto *request = PDisk->ReqCreator.CreateFromEv<TLogSectorRestore>(*ev->Get(), SelfId()); - PDisk->InputRequest(request); - } - - void Handle(TEvents::TEvUndelivered::TPtr &ev) { - auto sender = ev->Sender; - TRequestBase *request = PDisk->ReqCreator.CreateFromEv<TUndelivered>(std::move(ev), sender); - PDisk->InputRequest(request); - } - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Actor state functions //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - STRICT_STFUNC(StateInit, - hFunc(NPDisk::TEvYardInit, InitHandle); - hFunc(NPDisk::TEvCheckSpace, ErrorHandle); - hFunc(NPDisk::TEvLog, ErrorHandle); - hFunc(NPDisk::TEvMultiLog, ErrorHandle); - hFunc(NPDisk::TEvReadLog, ErrorHandle); - hFunc(NPDisk::TEvChunkWrite, ErrorHandle); - hFunc(NPDisk::TEvChunkRead, ErrorHandle); - hFunc(NPDisk::TEvHarakiri, ErrorHandle); - hFunc(NPDisk::TEvSlay, InitHandle); - hFunc(NPDisk::TEvChunkReserve, ErrorHandle); - hFunc(NPDisk::TEvYardControl, InitHandle); - hFunc(NPDisk::TEvAskForCutLog, ErrorHandle); - hFunc(NPDisk::TEvWhiteboardReportResult, Handle); - hFunc(NPDisk::TEvHttpInfoResult, Handle); - hFunc(NPDisk::TEvReadLogContinue, Handle); - hFunc(NPDisk::TEvLogSectorRestore, Handle); - hFunc(NPDisk::TEvLogInitResult, InitHandle); - hFunc(TEvents::TEvUndelivered, Handle); - hFunc(NPDisk::TEvPDiskFormattingFinished, InitHandle); - hFunc(TEvReadFormatResult, InitHandle); - hFunc(NPDisk::TEvReadLogResult, InitHandle); - cFunc(NActors::TEvents::TSystem::PoisonPill, HandlePoison); - hFunc(NMon::TEvHttpInfo, InitHandle); - cFunc(TEvents::TSystem::Wakeup, HandleWakeup); - hFunc(NPDisk::TEvDeviceError, Handle); - hFunc(TEvBlobStorage::TEvRestartPDisk, Handle); - ) - - STRICT_STFUNC(StateOnline, - hFunc(NPDisk::TEvYardInit, Handle); - hFunc(NPDisk::TEvCheckSpace, Handle); - hFunc(NPDisk::TEvLog, Handle); - hFunc(NPDisk::TEvMultiLog, Handle); - hFunc(NPDisk::TEvReadLog, Handle); - hFunc(NPDisk::TEvChunkWrite, Handle); - hFunc(NPDisk::TEvChunkRead, Handle); - hFunc(NPDisk::TEvHarakiri, Handle); - hFunc(NPDisk::TEvSlay, Handle); - hFunc(NPDisk::TEvChunkReserve, Handle); - hFunc(NPDisk::TEvChunksLock, Handle); - hFunc(NPDisk::TEvChunksUnlock, Handle); - hFunc(NPDisk::TEvYardControl, Handle); - hFunc(NPDisk::TEvAskForCutLog, Handle); - hFunc(NPDisk::TEvConfigureScheduler, Handle); - hFunc(NPDisk::TEvWhiteboardReportResult, Handle); - hFunc(NPDisk::TEvHttpInfoResult, Handle); - hFunc(NPDisk::TEvReadLogContinue, Handle); - hFunc(NPDisk::TEvLogSectorRestore, Handle); - hFunc(TEvents::TEvUndelivered, Handle); - - cFunc(NActors::TEvents::TSystem::PoisonPill, HandlePoison); - hFunc(NMon::TEvHttpInfo, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleWakeup); - hFunc(NPDisk::TEvDeviceError, Handle); - hFunc(TEvBlobStorage::TEvRestartPDisk, Handle); - ) - - STRICT_STFUNC(StateError, - hFunc(NPDisk::TEvYardInit, ErrorHandle); - hFunc(NPDisk::TEvCheckSpace, ErrorHandle); - hFunc(NPDisk::TEvLog, ErrorHandle); - hFunc(NPDisk::TEvMultiLog, ErrorHandle); - hFunc(NPDisk::TEvReadLog, ErrorHandle); - hFunc(NPDisk::TEvChunkWrite, ErrorHandle); - hFunc(NPDisk::TEvChunkRead, ErrorHandle); - hFunc(NPDisk::TEvHarakiri, ErrorHandle); - hFunc(NPDisk::TEvSlay, ErrorHandle); - hFunc(NPDisk::TEvChunkReserve, ErrorHandle); - hFunc(NPDisk::TEvYardControl, ErrorHandle); - hFunc(NPDisk::TEvAskForCutLog, ErrorHandle); - hFunc(NPDisk::TEvWhiteboardReportResult, Handle); - hFunc(NPDisk::TEvHttpInfoResult, Handle); - hFunc(NPDisk::TEvReadLogContinue, Handle); - hFunc(NPDisk::TEvLogSectorRestore, Handle); - hFunc(TEvents::TEvUndelivered, Handle); - - cFunc(NActors::TEvents::TSystem::PoisonPill, HandlePoison); - hFunc(NMon::TEvHttpInfo, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleWakeup); - hFunc(NPDisk::TEvDeviceError, Handle); - hFunc(TEvBlobStorage::TEvRestartPDisk, Handle); - ) + STRICT_STFUNC(StateInit, + hFunc(NPDisk::TEvYardInit, InitHandle); + hFunc(NPDisk::TEvCheckSpace, ErrorHandle); + hFunc(NPDisk::TEvLog, ErrorHandle); + hFunc(NPDisk::TEvMultiLog, ErrorHandle); + hFunc(NPDisk::TEvReadLog, ErrorHandle); + hFunc(NPDisk::TEvChunkWrite, ErrorHandle); + hFunc(NPDisk::TEvChunkRead, ErrorHandle); + hFunc(NPDisk::TEvHarakiri, ErrorHandle); + hFunc(NPDisk::TEvSlay, InitHandle); + hFunc(NPDisk::TEvChunkReserve, ErrorHandle); + hFunc(NPDisk::TEvYardControl, InitHandle); + hFunc(NPDisk::TEvAskForCutLog, ErrorHandle); + hFunc(NPDisk::TEvWhiteboardReportResult, Handle); + hFunc(NPDisk::TEvHttpInfoResult, Handle); + hFunc(NPDisk::TEvReadLogContinue, Handle); + hFunc(NPDisk::TEvLogSectorRestore, Handle); + hFunc(NPDisk::TEvLogInitResult, InitHandle); + hFunc(TEvents::TEvUndelivered, Handle); + hFunc(NPDisk::TEvPDiskFormattingFinished, InitHandle); + hFunc(TEvReadFormatResult, InitHandle); + hFunc(NPDisk::TEvReadLogResult, InitHandle); + cFunc(NActors::TEvents::TSystem::PoisonPill, HandlePoison); + hFunc(NMon::TEvHttpInfo, InitHandle); + cFunc(TEvents::TSystem::Wakeup, HandleWakeup); + hFunc(NPDisk::TEvDeviceError, Handle); + hFunc(TEvBlobStorage::TEvRestartPDisk, Handle); + ) + + STRICT_STFUNC(StateOnline, + hFunc(NPDisk::TEvYardInit, Handle); + hFunc(NPDisk::TEvCheckSpace, Handle); + hFunc(NPDisk::TEvLog, Handle); + hFunc(NPDisk::TEvMultiLog, Handle); + hFunc(NPDisk::TEvReadLog, Handle); + hFunc(NPDisk::TEvChunkWrite, Handle); + hFunc(NPDisk::TEvChunkRead, Handle); + hFunc(NPDisk::TEvHarakiri, Handle); + hFunc(NPDisk::TEvSlay, Handle); + hFunc(NPDisk::TEvChunkReserve, Handle); + hFunc(NPDisk::TEvChunksLock, Handle); + hFunc(NPDisk::TEvChunksUnlock, Handle); + hFunc(NPDisk::TEvYardControl, Handle); + hFunc(NPDisk::TEvAskForCutLog, Handle); + hFunc(NPDisk::TEvConfigureScheduler, Handle); + hFunc(NPDisk::TEvWhiteboardReportResult, Handle); + hFunc(NPDisk::TEvHttpInfoResult, Handle); + hFunc(NPDisk::TEvReadLogContinue, Handle); + hFunc(NPDisk::TEvLogSectorRestore, Handle); + hFunc(TEvents::TEvUndelivered, Handle); + + cFunc(NActors::TEvents::TSystem::PoisonPill, HandlePoison); + hFunc(NMon::TEvHttpInfo, Handle); + cFunc(TEvents::TSystem::Wakeup, HandleWakeup); + hFunc(NPDisk::TEvDeviceError, Handle); + hFunc(TEvBlobStorage::TEvRestartPDisk, Handle); + ) + + STRICT_STFUNC(StateError, + hFunc(NPDisk::TEvYardInit, ErrorHandle); + hFunc(NPDisk::TEvCheckSpace, ErrorHandle); + hFunc(NPDisk::TEvLog, ErrorHandle); + hFunc(NPDisk::TEvMultiLog, ErrorHandle); + hFunc(NPDisk::TEvReadLog, ErrorHandle); + hFunc(NPDisk::TEvChunkWrite, ErrorHandle); + hFunc(NPDisk::TEvChunkRead, ErrorHandle); + hFunc(NPDisk::TEvHarakiri, ErrorHandle); + hFunc(NPDisk::TEvSlay, ErrorHandle); + hFunc(NPDisk::TEvChunkReserve, ErrorHandle); + hFunc(NPDisk::TEvYardControl, ErrorHandle); + hFunc(NPDisk::TEvAskForCutLog, ErrorHandle); + hFunc(NPDisk::TEvWhiteboardReportResult, Handle); + hFunc(NPDisk::TEvHttpInfoResult, Handle); + hFunc(NPDisk::TEvReadLogContinue, Handle); + hFunc(NPDisk::TEvLogSectorRestore, Handle); + hFunc(TEvents::TEvUndelivered, Handle); + + cFunc(NActors::TEvents::TSystem::PoisonPill, HandlePoison); + hFunc(NMon::TEvHttpInfo, Handle); + cFunc(TEvents::TSystem::Wakeup, HandleWakeup); + hFunc(NPDisk::TEvDeviceError, Handle); + hFunc(TEvBlobStorage::TEvRestartPDisk, Handle); + ) }; } // NPDisk diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actorsystem_creator.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actorsystem_creator.h index cf275d699d..c74201d451 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actorsystem_creator.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actorsystem_creator.h @@ -1,68 +1,68 @@ -#pragma once - +#pragma once + #include <ydb/core/base/appdata.h> #include <ydb/core/base/counters.h> #include <ydb/core/protos/services.pb.h> - + #include <ydb/library/pdisk_io/aio.h> -#include <library/cpp/actors/core/actorsystem.h> -#include <library/cpp/actors/core/executor_pool_io.h> -#include <library/cpp/actors/core/executor_pool_basic.h> -#include <library/cpp/actors/core/mon.h> -#include <library/cpp/actors/core/scheduler_basic.h> - -namespace NKikimr { - -class TActorSystemCreator { - std::unique_ptr<TAppData> AppData; - std::shared_ptr<NPDisk::IIoContextFactory> IoContext; - TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; - - std::unique_ptr<NActors::TActorSystem> ActorSystem; - -public: - TActorSystemCreator() - { - using namespace NActors; - - AppData = std::make_unique<TAppData>(0, 0, 0, 0, TMap<TString, ui32>(), nullptr, nullptr, nullptr, nullptr); - IoContext = std::make_shared<NPDisk::TIoContextFactoryOSS>(); - AppData->IoContextFactory = IoContext.get(); - - auto setup = MakeHolder<TActorSystemSetup>(); - setup->NodeId = 1; - setup->ExecutorsCount = 3; - setup->Executors.Reset(new TAutoPtr<IExecutorPool>[3]); - setup->Executors[0].Reset(new TBasicExecutorPool(0, 2, 20)); - setup->Executors[1].Reset(new TBasicExecutorPool(1, 2, 20)); - setup->Executors[2].Reset(new TIOExecutorPool(2, 10)); - setup->Scheduler.Reset(new TBasicSchedulerThread(TSchedulerConfig(512, 100))); - - auto logSettings = MakeIntrusive<NActors::NLog::TSettings>(NActors::TActorId(1, "logger"), - NKikimrServices::LOGGER, NActors::NLog::PRI_ERROR, NActors::NLog::PRI_ERROR, ui32{0}); - logSettings->Append( - NActorsServices::EServiceCommon_MIN, - NActorsServices::EServiceCommon_MAX, - NActorsServices::EServiceCommon_Name - ); - logSettings->Append( - NKikimrServices::EServiceKikimr_MIN, - NKikimrServices::EServiceKikimr_MAX, - NKikimrServices::EServiceKikimr_Name - ); - Counters = MakeIntrusive<NMonitoring::TDynamicCounters>(); - NActors::TLoggerActor *loggerActor = new NActors::TLoggerActor(logSettings, NActors::CreateNullBackend(), - GetServiceCounters(Counters, "utils")); - NActors::TActorSetupCmd loggerActorCmd(loggerActor, NActors::TMailboxType::Simple, 2); - setup->LocalServices.emplace_back(NActors::TActorId(1, "logger"), loggerActorCmd); - - ActorSystem = std::make_unique<TActorSystem>(setup, AppData.get(), logSettings); - ActorSystem->Start(); - } - - TActorSystem *GetActorSystem() { - return ActorSystem.get(); - } -}; - -} // NKikimr +#include <library/cpp/actors/core/actorsystem.h> +#include <library/cpp/actors/core/executor_pool_io.h> +#include <library/cpp/actors/core/executor_pool_basic.h> +#include <library/cpp/actors/core/mon.h> +#include <library/cpp/actors/core/scheduler_basic.h> + +namespace NKikimr { + +class TActorSystemCreator { + std::unique_ptr<TAppData> AppData; + std::shared_ptr<NPDisk::IIoContextFactory> IoContext; + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + + std::unique_ptr<NActors::TActorSystem> ActorSystem; + +public: + TActorSystemCreator() + { + using namespace NActors; + + AppData = std::make_unique<TAppData>(0, 0, 0, 0, TMap<TString, ui32>(), nullptr, nullptr, nullptr, nullptr); + IoContext = std::make_shared<NPDisk::TIoContextFactoryOSS>(); + AppData->IoContextFactory = IoContext.get(); + + auto setup = MakeHolder<TActorSystemSetup>(); + setup->NodeId = 1; + setup->ExecutorsCount = 3; + setup->Executors.Reset(new TAutoPtr<IExecutorPool>[3]); + setup->Executors[0].Reset(new TBasicExecutorPool(0, 2, 20)); + setup->Executors[1].Reset(new TBasicExecutorPool(1, 2, 20)); + setup->Executors[2].Reset(new TIOExecutorPool(2, 10)); + setup->Scheduler.Reset(new TBasicSchedulerThread(TSchedulerConfig(512, 100))); + + auto logSettings = MakeIntrusive<NActors::NLog::TSettings>(NActors::TActorId(1, "logger"), + NKikimrServices::LOGGER, NActors::NLog::PRI_ERROR, NActors::NLog::PRI_ERROR, ui32{0}); + logSettings->Append( + NActorsServices::EServiceCommon_MIN, + NActorsServices::EServiceCommon_MAX, + NActorsServices::EServiceCommon_Name + ); + logSettings->Append( + NKikimrServices::EServiceKikimr_MIN, + NKikimrServices::EServiceKikimr_MAX, + NKikimrServices::EServiceKikimr_Name + ); + Counters = MakeIntrusive<NMonitoring::TDynamicCounters>(); + NActors::TLoggerActor *loggerActor = new NActors::TLoggerActor(logSettings, NActors::CreateNullBackend(), + GetServiceCounters(Counters, "utils")); + NActors::TActorSetupCmd loggerActorCmd(loggerActor, NActors::TMailboxType::Simple, 2); + setup->LocalServices.emplace_back(NActors::TActorId(1, "logger"), loggerActorCmd); + + ActorSystem = std::make_unique<TActorSystem>(setup, AppData.get(), logSettings); + ActorSystem->Start(); + } + + TActorSystem *GetActorSystem() { + return ActorSystem.get(); + } +}; + +} // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice.h index a0a7228201..aa28b6297d 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice.h @@ -1,11 +1,11 @@ #pragma once #include "defs.h" -#include "blobstorage_pdisk_completion.h" +#include "blobstorage_pdisk_completion.h" #include "blobstorage_pdisk_drivedata.h" #include "blobstorage_pdisk_request_id.h" -#include "blobstorage_pdisk_util_devicemode.h" - +#include "blobstorage_pdisk_util_devicemode.h" + #include <ydb/core/base/blobstorage.h> #include <ydb/core/control/immediate_control_board_wrapper.h> #include <ydb/library/pdisk_io/aio.h> @@ -30,43 +30,43 @@ class IBlockDevice { public: virtual ~IBlockDevice() {}; - // Initialization methods - virtual void Initialize(TActorSystem *actorSystem, const TActorId &pdiskActor) = 0; + // Initialization methods + virtual void Initialize(TActorSystem *actorSystem, const TActorId &pdiskActor) = 0; virtual bool IsGood() = 0; - virtual int GetLastErrno() = 0; - - // Synchronous intefrace - virtual void PwriteSync(const void *data, ui64 size, ui64 offset, TReqId reqId, NWilson::TTraceId *traceId) = 0; - virtual void PreadSync(void *data, ui32 size, ui64 offset, TReqId reqId, NWilson::TTraceId *traceId) = 0; - virtual void TrimSync(ui32 size, ui64 offset) = 0; - - // Asynchronous intefrace - virtual void PwriteAsync(const void *data, ui64 size, ui64 offset, TCompletionAction *completionAction, + virtual int GetLastErrno() = 0; + + // Synchronous intefrace + virtual void PwriteSync(const void *data, ui64 size, ui64 offset, TReqId reqId, NWilson::TTraceId *traceId) = 0; + virtual void PreadSync(void *data, ui32 size, ui64 offset, TReqId reqId, NWilson::TTraceId *traceId) = 0; + virtual void TrimSync(ui32 size, ui64 offset) = 0; + + // Asynchronous intefrace + virtual void PwriteAsync(const void *data, ui64 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId, NWilson::TTraceId *traceId) = 0; - virtual void PreadAsync(void *data, ui32 size, ui64 offset, TCompletionAction *completionAction, + virtual void PreadAsync(void *data, ui32 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId, NWilson::TTraceId *traceId) = 0; - virtual void CachedPreadAsync(void *data, ui32 size, ui64 offset, TCompletionAction *completionAction, - TReqId reqId, NWilson::TTraceId *traceId) = 0; - virtual void ClearCache() = 0; + virtual void CachedPreadAsync(void *data, ui32 size, ui64 offset, TCompletionAction *completionAction, + TReqId reqId, NWilson::TTraceId *traceId) = 0; + virtual void ClearCache() = 0; virtual void FlushAsync(TCompletionAction *completionAction, TReqId reqId) = 0; virtual void NoopAsync(TCompletionAction *completionAction, TReqId reqId) = 0; - virtual void NoopAsyncHackForLogReader(TCompletionAction *completionAction, TReqId reqId) = 0; - virtual void TrimAsync(ui32 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId) = 0; - - // Control methods - virtual bool GetIsTrimEnabled() = 0; - virtual TDriveData GetDriveData() = 0; + virtual void NoopAsyncHackForLogReader(TCompletionAction *completionAction, TReqId reqId) = 0; + virtual void TrimAsync(ui32 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId) = 0; + + // Control methods + virtual bool GetIsTrimEnabled() = 0; + virtual TDriveData GetDriveData() = 0; virtual ui32 GetPDiskId() = 0; virtual void SetWriteCache(bool isEnable) = 0; virtual void Stop() = 0; - virtual TString DebugInfo() = 0; + virtual TString DebugInfo() = 0; }; -IBlockDevice* CreateRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, - ui64 reorderingCycles, ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, +IBlockDevice* CreateRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, + ui64 reorderingCycles, ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions, TIntrusivePtr<TSectorMap> sectorMap); -IBlockDevice* CreateRealBlockDeviceWithDefaults(const TString &path, TPDiskMon &mon, TDeviceMode::TFlags flags, - TIntrusivePtr<TSectorMap> sectorMap, TActorSystem *actorSystem); +IBlockDevice* CreateRealBlockDeviceWithDefaults(const TString &path, TPDiskMon &mon, TDeviceMode::TFlags flags, + TIntrusivePtr<TSectorMap> sectorMap, TActorSystem *actorSystem); } // NPDisk } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp index 3230dd736f..34c20999b7 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp @@ -1,12 +1,12 @@ #include "blobstorage_pdisk_blockdevice.h" #include <ydb/library/pdisk_io/buffers.h> -#include "blobstorage_pdisk_completion_impl.h" +#include "blobstorage_pdisk_completion_impl.h" #include "blobstorage_pdisk_mon.h" #include "blobstorage_pdisk_util_atomicblockcounter.h" -#include "blobstorage_pdisk_util_countedqueuemanyone.h" +#include "blobstorage_pdisk_util_countedqueuemanyone.h" #include "blobstorage_pdisk_util_countedqueueoneone.h" #include "blobstorage_pdisk_util_flightcontrol.h" -#include "blobstorage_pdisk_util_idlecounter.h" +#include "blobstorage_pdisk_util_idlecounter.h" #include "blobstorage_pdisk_util_wcache.h" #include <ydb/core/base/appdata.h> @@ -20,15 +20,15 @@ #include <library/cpp/actors/core/log.h> #include <library/cpp/actors/util/thread.h> -#include <library/cpp/containers/stack_vector/stack_vec.h> +#include <library/cpp/containers/stack_vector/stack_vec.h> -#include <util/generic/deque.h> -#include <util/generic/bitops.h> +#include <util/generic/deque.h> +#include <util/generic/bitops.h> #include <util/system/file.h> -#include <util/system/mutex.h> +#include <util/system/mutex.h> #include <util/system/sanitizers.h> -#include <util/system/spinlock.h> -#include <util/system/thread.h> +#include <util/system/spinlock.h> +#include <util/system/thread.h> namespace NKikimr { namespace NPDisk { @@ -36,265 +36,265 @@ namespace NPDisk { LWTRACE_USING(BLOBSTORAGE_PROVIDER); constexpr ui64 MaxWaitingNoops = 256; - + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TRealBlockDevice +// TRealBlockDevice //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -class TRealBlockDevice : public IBlockDevice { +class TRealBlockDevice : public IBlockDevice { //////////////////////////////////////////////////////// // TCompletionThread //////////////////////////////////////////////////////// class TCompletionThread : public TThread { - static constexpr ui32 NumOfWriters = 2; + static constexpr ui32 NumOfWriters = 2; public: - TCompletionThread(TRealBlockDevice &device, ui32 maxQueuedActions) + TCompletionThread(TRealBlockDevice &device, ui32 maxQueuedActions) : TThread(&ThreadProc, this) , Device(device) - , QueuedActions(0) - , MaxQueuedActions(maxQueuedActions) + , QueuedActions(0) + , MaxQueuedActions(maxQueuedActions) {} static void* ThreadProc(void* _this) { - SetCurrentThreadName("PdCmpl"); + SetCurrentThreadName("PdCmpl"); static_cast<TCompletionThread*>(_this)->Exec(); return nullptr; } void Exec() { - ui32 exitSignalsReceived = 0; - Device.Mon.L7.Set(false, AtomicGetAndIncrement(SeqnoL7)); - auto prevCycleEnd = HPNow(); - bool isWorking = true; - bool stateError = false; - - while(isWorking) { + ui32 exitSignalsReceived = 0; + Device.Mon.L7.Set(false, AtomicGetAndIncrement(SeqnoL7)); + auto prevCycleEnd = HPNow(); + bool isWorking = true; + bool stateError = false; + + while(isWorking) { TAtomicBase actionCount = CompletionActions.GetWaitingSize(); - + if (actionCount > 0) { for (TAtomicBase idx = 0; idx < actionCount; ++idx) { - TCompletionAction *action = CompletionActions.Pop(); - AtomicDecrement(QueuedActions); + TCompletionAction *action = CompletionActions.Pop(); + AtomicDecrement(QueuedActions); if (action == nullptr) { - ++exitSignalsReceived; - if (exitSignalsReceived == NumOfWriters) { - isWorking = false; - } - } else { - if (!stateError && action->CanHandleResult()) { - action->Exec(Device.ActorSystem); - } else { - if (!stateError) { - stateError = true; - Device.BecomeErrorState(TStringBuilder() - << " CompletionAction error, operation info# " << action->ErrorReason); - } - action->Release(Device.ActorSystem); - } + ++exitSignalsReceived; + if (exitSignalsReceived == NumOfWriters) { + isWorking = false; + } + } else { + if (!stateError && action->CanHandleResult()) { + action->Exec(Device.ActorSystem); + } else { + if (!stateError) { + stateError = true; + Device.BecomeErrorState(TStringBuilder() + << " CompletionAction error, operation info# " << action->ErrorReason); + } + action->Release(Device.ActorSystem); + } } } } else { - *Device.Mon.CompletionThreadCPU = ThreadCPUTime(); + *Device.Mon.CompletionThreadCPU = ThreadCPUTime(); CompletionActions.ProducedWaitI(); } - - const auto cycleEnd = HPNow(); - if (actionCount > 0) { - *Device.Mon.DeviceCompletionThreadBusyTimeNs += HPNanoSeconds(cycleEnd - prevCycleEnd); - } - prevCycleEnd = cycleEnd; + + const auto cycleEnd = HPNow(); + if (actionCount > 0) { + *Device.Mon.DeviceCompletionThreadBusyTimeNs += HPNanoSeconds(cycleEnd - prevCycleEnd); + } + prevCycleEnd = cycleEnd; } } // Schedule action execution // pass action = nullptr to quit void Schedule(TCompletionAction *action) noexcept { - TAtomicBase queueActions = AtomicIncrement(QueuedActions); - if (queueActions >= MaxQueuedActions) { - Device.Mon.L7.Set(true, AtomicGetAndIncrement(SeqnoL7)); - while (AtomicGet(QueuedActions) >= MaxQueuedActions) { - SpinLockPause(); - } - Device.Mon.L7.Set(false, AtomicGetAndIncrement(SeqnoL7)); - } + TAtomicBase queueActions = AtomicIncrement(QueuedActions); + if (queueActions >= MaxQueuedActions) { + Device.Mon.L7.Set(true, AtomicGetAndIncrement(SeqnoL7)); + while (AtomicGet(QueuedActions) >= MaxQueuedActions) { + SpinLockPause(); + } + Device.Mon.L7.Set(false, AtomicGetAndIncrement(SeqnoL7)); + } + CompletionActions.Push(action); + return; + } + + // Schedule action execution + // pass action = nullptr to quit + void ScheduleHackForLogReader(TCompletionAction *action) noexcept { + AtomicIncrement(QueuedActions); + action->Result = EIoResult::Ok; CompletionActions.Push(action); return; } - // Schedule action execution - // pass action = nullptr to quit - void ScheduleHackForLogReader(TCompletionAction *action) noexcept { - AtomicIncrement(QueuedActions); - action->Result = EIoResult::Ok; - CompletionActions.Push(action); - return; - } - private: - TCountedQueueManyOne<TCompletionAction, 4 << 10> CompletionActions; - TRealBlockDevice &Device; - TAtomic QueuedActions; - const TAtomicBase MaxQueuedActions; - TAtomic SeqnoL7 = 0; + TCountedQueueManyOne<TCompletionAction, 4 << 10> CompletionActions; + TRealBlockDevice &Device; + TAtomic QueuedActions; + const TAtomicBase MaxQueuedActions; + TAtomic SeqnoL7 = 0; + }; + + class TSubmitThreadBase : public TThread { + public: + TSubmitThreadBase(TRealBlockDevice &device, TThread::TThreadProc threadProc, void *_this) + : TThread(threadProc, _this) + , Device(device) + {} + + // Schedule op execution + // pass op = nullptr to quit + void Schedule(IAsyncIoOperation *op) noexcept { + if (!op) { + SubmitQuitCounter.Increment(); + SubmitQuitCounter.BlockA(); + OperationsToBeSubmit.Push(op); + return; + } + if (!SubmitQuitCounter.Increment()) { + Device.FreeOperation(op); + return; + } + ui64 size = op->GetSize(); + OperationsToBeSubmit.Push(op); + NHPTimer::STime start; + if (AtomicGetAndAdd(SubmitInFlightBytes, size) > SubmitInFlightBytesMax) { + TGuard<TMutex> guard(SubmitMtx); + start = HPNow(); + while (AtomicGet(SubmitInFlightBytes) > SubmitInFlightBytesMax) { + if (SubmitCondVar.WaitT(SubmitMtx, TDuration::Seconds(1))) { + return; + } else if (Device.ActorSystem) { + TAtomicBase maxInFlight = SubmitInFlightBytesMax; + LOG_WARN_S(*Device.ActorSystem, NKikimrServices::BS_DEVICE, + "Exceed 1 second deadline in SubmitThreadQueue: " << + " PDiskId# " << Device.PDiskId << + " Path# \"" << Device.Path << "\"" << + " Total time spent in waiting# " << NHPTimer::GetSeconds(HPNow() - start) << "sec" << + " SubmitInFlightBytes# " << AtomicGet(SubmitInFlightBytes) << + " SubmitInFlightBytesMax# " << maxInFlight); + } + } + } + } + + public: + TAtomic SubmitInFlightBytes = 0; + + protected: + TRealBlockDevice &Device; + TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> OperationsToBeSubmit; + static constexpr TAtomicBase SubmitInFlightBytesMax = 1ull << 15; + TMutex SubmitMtx; + TCondVar SubmitCondVar; + TAtomicBlockCounter SubmitQuitCounter; + }; + + //////////////////////////////////////////////////////// + // TSubmitThread + //////////////////////////////////////////////////////// + class TSubmitThread : public TSubmitThreadBase { + public: + TSubmitThread(TRealBlockDevice &device) + : TSubmitThreadBase(device, &ThreadProc, this) + {} + + static void* ThreadProc(void* _this) { + SetCurrentThreadName("PdSbmEv"); + static_cast<TSubmitThread*>(_this)->Exec(); + return nullptr; + } + + void ReleaseOp(IAsyncIoOperation *op) { + Device.DecrementMonInFlight(op->GetType(), op->GetSize()); + Device.FreeOperation(op); + Device.QuitCounter.Decrement(); + Device.IdleCounter.Decrement(); + } + + void Submit(IAsyncIoOperation *op) { + TCompletionAction *action = static_cast<TCompletionAction*>(op->GetCookie()); + + if (!Device.QuitCounter.Increment()) { + Device.FreeOperation(op); + TGuard<TMutex> guard(SubmitMtx); + SubmitCondVar.Signal(); + return; + } + Device.IdleCounter.Increment(); + + Device.IncrementMonInFlight(op->GetType(), op->GetSize()); + + action->OperationIdx = Device.FlightControl.Schedule(); + if (action->FlushAction) { + action->FlushAction->OperationIdx = action->OperationIdx; + } + + EIoResult ret = EIoResult::TryAgain; + while (ret == EIoResult::TryAgain) { + action->SubmitTime = HPNow(); + ret = Device.IoContext->Submit(op, Device.SharedCallback.Get()); + if (ret == EIoResult::Ok) { + return; + } + if (Device.QuitCounter.IsBlocked()) { + ReleaseOp(op); + return; + } + } + // IoError happend + ReleaseOp(op); + Device.BecomeErrorState(TStringBuilder() << " Submit error, reason# " << ret); + } + + void Exec() { + auto prevCycleEnd = HPNow(); + while(!SubmitQuitCounter.IsBlocked() || SubmitQuitCounter.Get()) { + TAtomicBase ops = OperationsToBeSubmit.GetWaitingSize(); + if (ops > 0) { + for (TAtomicBase idx = 0; idx < ops; ++idx) { + IAsyncIoOperation *op = OperationsToBeSubmit.Pop(); + SubmitQuitCounter.Decrement(); + if (op) { + ui64 size = op->GetSize(); // op may be deleted after submit + Submit(op); + TGuard<TMutex> guard(SubmitMtx); + if (AtomicSub(SubmitInFlightBytes, size) <= SubmitInFlightBytesMax) { + SubmitCondVar.Signal(); + } + } + } + } else { + *Device.Mon.SubmitThreadCPU = ThreadCPUTime(); + OperationsToBeSubmit.ProducedWaitI(); + } + auto cycleEnd = HPNow(); + // LWPROBE(PDiskDeviceSubmitThreadIdle, Device.GetPDiskId(), ops, + // HPMilliSecondsFloat(cycleEnd - prevCycleEnd)); + if (ops) { + *Device.Mon.DeviceSubmitThreadBusyTimeNs += HPNanoSeconds(cycleEnd - prevCycleEnd); + } + prevCycleEnd = cycleEnd; + } + Y_VERIFY(OperationsToBeSubmit.GetWaitingSize() == 0); + } }; - class TSubmitThreadBase : public TThread { - public: - TSubmitThreadBase(TRealBlockDevice &device, TThread::TThreadProc threadProc, void *_this) - : TThread(threadProc, _this) - , Device(device) - {} - - // Schedule op execution - // pass op = nullptr to quit - void Schedule(IAsyncIoOperation *op) noexcept { - if (!op) { - SubmitQuitCounter.Increment(); - SubmitQuitCounter.BlockA(); - OperationsToBeSubmit.Push(op); - return; - } - if (!SubmitQuitCounter.Increment()) { - Device.FreeOperation(op); - return; - } - ui64 size = op->GetSize(); - OperationsToBeSubmit.Push(op); - NHPTimer::STime start; - if (AtomicGetAndAdd(SubmitInFlightBytes, size) > SubmitInFlightBytesMax) { - TGuard<TMutex> guard(SubmitMtx); - start = HPNow(); - while (AtomicGet(SubmitInFlightBytes) > SubmitInFlightBytesMax) { - if (SubmitCondVar.WaitT(SubmitMtx, TDuration::Seconds(1))) { - return; - } else if (Device.ActorSystem) { - TAtomicBase maxInFlight = SubmitInFlightBytesMax; - LOG_WARN_S(*Device.ActorSystem, NKikimrServices::BS_DEVICE, - "Exceed 1 second deadline in SubmitThreadQueue: " << - " PDiskId# " << Device.PDiskId << - " Path# \"" << Device.Path << "\"" << - " Total time spent in waiting# " << NHPTimer::GetSeconds(HPNow() - start) << "sec" << - " SubmitInFlightBytes# " << AtomicGet(SubmitInFlightBytes) << - " SubmitInFlightBytesMax# " << maxInFlight); - } - } - } - } - - public: - TAtomic SubmitInFlightBytes = 0; - - protected: - TRealBlockDevice &Device; - TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> OperationsToBeSubmit; - static constexpr TAtomicBase SubmitInFlightBytesMax = 1ull << 15; - TMutex SubmitMtx; - TCondVar SubmitCondVar; - TAtomicBlockCounter SubmitQuitCounter; - }; - //////////////////////////////////////////////////////// - // TSubmitThread - //////////////////////////////////////////////////////// - class TSubmitThread : public TSubmitThreadBase { - public: - TSubmitThread(TRealBlockDevice &device) - : TSubmitThreadBase(device, &ThreadProc, this) - {} - - static void* ThreadProc(void* _this) { - SetCurrentThreadName("PdSbmEv"); - static_cast<TSubmitThread*>(_this)->Exec(); - return nullptr; - } - - void ReleaseOp(IAsyncIoOperation *op) { - Device.DecrementMonInFlight(op->GetType(), op->GetSize()); - Device.FreeOperation(op); - Device.QuitCounter.Decrement(); - Device.IdleCounter.Decrement(); - } - - void Submit(IAsyncIoOperation *op) { - TCompletionAction *action = static_cast<TCompletionAction*>(op->GetCookie()); - - if (!Device.QuitCounter.Increment()) { - Device.FreeOperation(op); - TGuard<TMutex> guard(SubmitMtx); - SubmitCondVar.Signal(); - return; - } - Device.IdleCounter.Increment(); - - Device.IncrementMonInFlight(op->GetType(), op->GetSize()); - - action->OperationIdx = Device.FlightControl.Schedule(); - if (action->FlushAction) { - action->FlushAction->OperationIdx = action->OperationIdx; - } - - EIoResult ret = EIoResult::TryAgain; - while (ret == EIoResult::TryAgain) { - action->SubmitTime = HPNow(); - ret = Device.IoContext->Submit(op, Device.SharedCallback.Get()); - if (ret == EIoResult::Ok) { - return; - } - if (Device.QuitCounter.IsBlocked()) { - ReleaseOp(op); - return; - } - } - // IoError happend - ReleaseOp(op); - Device.BecomeErrorState(TStringBuilder() << " Submit error, reason# " << ret); - } - - void Exec() { - auto prevCycleEnd = HPNow(); - while(!SubmitQuitCounter.IsBlocked() || SubmitQuitCounter.Get()) { - TAtomicBase ops = OperationsToBeSubmit.GetWaitingSize(); - if (ops > 0) { - for (TAtomicBase idx = 0; idx < ops; ++idx) { - IAsyncIoOperation *op = OperationsToBeSubmit.Pop(); - SubmitQuitCounter.Decrement(); - if (op) { - ui64 size = op->GetSize(); // op may be deleted after submit - Submit(op); - TGuard<TMutex> guard(SubmitMtx); - if (AtomicSub(SubmitInFlightBytes, size) <= SubmitInFlightBytesMax) { - SubmitCondVar.Signal(); - } - } - } - } else { - *Device.Mon.SubmitThreadCPU = ThreadCPUTime(); - OperationsToBeSubmit.ProducedWaitI(); - } - auto cycleEnd = HPNow(); - // LWPROBE(PDiskDeviceSubmitThreadIdle, Device.GetPDiskId(), ops, - // HPMilliSecondsFloat(cycleEnd - prevCycleEnd)); - if (ops) { - *Device.Mon.DeviceSubmitThreadBusyTimeNs += HPNanoSeconds(cycleEnd - prevCycleEnd); - } - prevCycleEnd = cycleEnd; - } - Y_VERIFY(OperationsToBeSubmit.GetWaitingSize() == 0); - } - }; - - //////////////////////////////////////////////////////// - // TGetThread + // TGetThread //////////////////////////////////////////////////////// - class TGetThread : public TThread { + class TGetThread : public TThread { public: - TGetThread(TRealBlockDevice &device) + TGetThread(TRealBlockDevice &device) : TThread(&ThreadProc, this) , Device(device) {} static void* ThreadProc(void* _this) { - SetCurrentThreadName("PdGetEv"); - static_cast<TGetThread*>(_this)->Exec(); + SetCurrentThreadName("PdGetEv"); + static_cast<TGetThread*>(_this)->Exec(); return nullptr; } @@ -306,638 +306,638 @@ class TRealBlockDevice : public IBlockDevice { TAsyncIoOperationResult events[MaxEvents]; while(!Device.QuitCounter.IsBlocked() || Device.QuitCounter.Get()) { - i64 ret = Device.IoContext->GetEvents(1, MaxEvents, events, TDuration::MilliSeconds(WaitTimeoutMs)); - if (ret == -static_cast<i64>(EIoResult::InterruptedSystemCall)) { + i64 ret = Device.IoContext->GetEvents(1, MaxEvents, events, TDuration::MilliSeconds(WaitTimeoutMs)); + if (ret == -static_cast<i64>(EIoResult::InterruptedSystemCall)) { Device.Mon.DeviceInterruptedSystemCalls->Inc(); } else if (ret < 0) { - Device.BecomeErrorState(TStringBuilder() << " Get error, reason# " << (EIoResult)-ret); - } - } - } - private: - TRealBlockDevice &Device; - }; - - - class TSharedCallback : public ICallback { - ui64 NextPossibleNoop = 0; - ui64 EndOffset = 0; - ui64 PrevEventGotAtCycle = HPNow(); - ui64 PrevEstimationAtCycle = HPNow(); - ui64 PrevEstimatedCostNs = 0; - ui64 PrevActualCostNs = 0; - - TCompletionAction* WaitingNoops[MaxWaitingNoops] = {nullptr}; - TRealBlockDevice &Device; - - public: - TSharedCallback(TRealBlockDevice &device) - : Device(device) - {} - - void FillCompletionAction(TCompletionAction *action, IAsyncIoOperation *op, EIoResult result) { - action->TraceId = std::move(*op->GetTraceIdPtr()); - action->SetResult(result); - if (result != EIoResult::Ok) { - // Previously seen errors: OutOfMemory, IOError; - action->SetErrorReason(TStringBuilder() - << " type# " << op->GetType() - << " offset# " << op->GetOffset() - << " size# " << op->GetSize() - << " Result# " << result); - LOG_ERROR_S(*Device.ActorSystem, NKikimrServices::BS_DEVICE, "IAsyncIoOperation error, reason# " - << action->ErrorReason); - ++*Device.Mon.DeviceIoErrors; - } - } - - void Exec(TAsyncIoOperationResult *event) { - IAsyncIoOperation *op = event->Operation; - // Add up the execution time of all the events - ui64 totalExecutionCycles = 0; - ui64 totalCostNs = 0; - ui64 eventGotAtCycle = HPNow(); - AtomicSet(Device.Mon.LastDoneOperationTimestamp, eventGotAtCycle); - - TCompletionAction *completionAction = static_cast<TCompletionAction*>(op->GetCookie()); - FillCompletionAction(completionAction, op, event->Result); - - Device.QuitCounter.Decrement(); - Device.IdleCounter.Decrement(); - Device.FlightControl.MarkComplete(completionAction->OperationIdx); - - ui64 startCycle = Max((ui64)completionAction->SubmitTime, PrevEventGotAtCycle); - ui64 durationCycles = (eventGotAtCycle > startCycle) ? eventGotAtCycle - startCycle : 0; - totalExecutionCycles = Max(totalExecutionCycles, durationCycles); - totalCostNs += completionAction->CostNs; - - bool isSeekExpected = - ((ui64)completionAction->SubmitTime + Device.SeekCostNs / 25ull >= PrevEventGotAtCycle); - - const ui64 opSize = op->GetSize(); - Device.DecrementMonInFlight(op->GetType(), opSize); - if (opSize == 0) { - if (op->GetType() == IAsyncIoOperation::EType::PRead) { - Y_VERIFY(WaitingNoops[completionAction->OperationIdx % MaxWaitingNoops] == nullptr); - WaitingNoops[completionAction->OperationIdx % MaxWaitingNoops] = completionAction; - } else { - Y_VERIFY_DEBUG(false, "Threre must not be writes of size 0 in TRealBlockDevice"); - } - } else { - if ((ui64)op->GetOffset() != EndOffset) { - isSeekExpected = true; - } - EndOffset = op->GetOffset() + opSize; - - ui64 duration = HPNow() - completionAction->SubmitTime; - ui64 durationMs = HPMilliSecondsFloat(duration); - if (op->GetType() == IAsyncIoOperation::EType::PRead) { - NSan::Unpoison(op->GetData(), opSize); - REQUEST_VALGRIND_MAKE_MEM_DEFINED(op->GetData(), opSize); - Device.Mon.DeviceReadDuration.Increment(durationMs); - LWPROBE(PDiskDeviceReadDuration, Device.GetPDiskId(), HPMilliSecondsFloat(duration), opSize); - } else { - Device.Mon.DeviceWriteDuration.Increment(durationMs); - LWPROBE(PDiskDeviceWriteDuration, Device.GetPDiskId(), HPMilliSecondsFloat(duration), opSize); - } - if (completionAction->FlushAction) { - ui64 idx = completionAction->FlushAction->OperationIdx; - Y_VERIFY(WaitingNoops[idx % MaxWaitingNoops] == nullptr); - WaitingNoops[idx % MaxWaitingNoops] = completionAction->FlushAction; - completionAction->FlushAction = nullptr; - } - Device.CompletionThread->Schedule(completionAction); - auto seqnoL6 = AtomicGetAndIncrement(Device.Mon.SeqnoL6); - Device.Mon.L6.Set(duration > Device.Reordering, seqnoL6); - } - - if (isSeekExpected) { - Device.Mon.DeviceExpectedSeeks->Inc(); - totalCostNs += Device.SeekCostNs; - } - - Device.IoContext->DestroyAsyncIoOperation(op); - ui64 firstIncompleteIdx = Device.FlightControl.FirstIncompleteIdx(); - while (NextPossibleNoop < firstIncompleteIdx) { - ui64 i = NextPossibleNoop % MaxWaitingNoops; - if (WaitingNoops[i] && WaitingNoops[i]->OperationIdx == NextPossibleNoop) { - Device.CompletionThread->Schedule(WaitingNoops[i]); - WaitingNoops[i] = nullptr; - } - ++NextPossibleNoop; - } - *Device.Mon.DeviceEstimatedCostNs += totalCostNs; - *Device.Mon.DeviceActualCostNs += HPNanoSeconds(totalExecutionCycles); - - if (PrevEstimationAtCycle > eventGotAtCycle) { - PrevEstimationAtCycle = eventGotAtCycle; - } - if (HPMilliSeconds(eventGotAtCycle - PrevEstimationAtCycle) >= 15000) { - ui64 estimated = (*Device.Mon.DeviceEstimatedCostNs - PrevEstimatedCostNs); - ui64 actual = (*Device.Mon.DeviceActualCostNs - PrevActualCostNs + 30000000ull); - if (estimated != 0) { - *Device.Mon.DeviceOverestimationRatio = 1000ull * actual / (estimated + 30000000ull); - if (actual > estimated) { - if (actual - estimated < 15000000000ull) { - *Device.Mon.DeviceNonperformanceMs = (actual - estimated) / 15000000ull; + Device.BecomeErrorState(TStringBuilder() << " Get error, reason# " << (EIoResult)-ret); + } + } + } + private: + TRealBlockDevice &Device; + }; + + + class TSharedCallback : public ICallback { + ui64 NextPossibleNoop = 0; + ui64 EndOffset = 0; + ui64 PrevEventGotAtCycle = HPNow(); + ui64 PrevEstimationAtCycle = HPNow(); + ui64 PrevEstimatedCostNs = 0; + ui64 PrevActualCostNs = 0; + + TCompletionAction* WaitingNoops[MaxWaitingNoops] = {nullptr}; + TRealBlockDevice &Device; + + public: + TSharedCallback(TRealBlockDevice &device) + : Device(device) + {} + + void FillCompletionAction(TCompletionAction *action, IAsyncIoOperation *op, EIoResult result) { + action->TraceId = std::move(*op->GetTraceIdPtr()); + action->SetResult(result); + if (result != EIoResult::Ok) { + // Previously seen errors: OutOfMemory, IOError; + action->SetErrorReason(TStringBuilder() + << " type# " << op->GetType() + << " offset# " << op->GetOffset() + << " size# " << op->GetSize() + << " Result# " << result); + LOG_ERROR_S(*Device.ActorSystem, NKikimrServices::BS_DEVICE, "IAsyncIoOperation error, reason# " + << action->ErrorReason); + ++*Device.Mon.DeviceIoErrors; + } + } + + void Exec(TAsyncIoOperationResult *event) { + IAsyncIoOperation *op = event->Operation; + // Add up the execution time of all the events + ui64 totalExecutionCycles = 0; + ui64 totalCostNs = 0; + ui64 eventGotAtCycle = HPNow(); + AtomicSet(Device.Mon.LastDoneOperationTimestamp, eventGotAtCycle); + + TCompletionAction *completionAction = static_cast<TCompletionAction*>(op->GetCookie()); + FillCompletionAction(completionAction, op, event->Result); + + Device.QuitCounter.Decrement(); + Device.IdleCounter.Decrement(); + Device.FlightControl.MarkComplete(completionAction->OperationIdx); + + ui64 startCycle = Max((ui64)completionAction->SubmitTime, PrevEventGotAtCycle); + ui64 durationCycles = (eventGotAtCycle > startCycle) ? eventGotAtCycle - startCycle : 0; + totalExecutionCycles = Max(totalExecutionCycles, durationCycles); + totalCostNs += completionAction->CostNs; + + bool isSeekExpected = + ((ui64)completionAction->SubmitTime + Device.SeekCostNs / 25ull >= PrevEventGotAtCycle); + + const ui64 opSize = op->GetSize(); + Device.DecrementMonInFlight(op->GetType(), opSize); + if (opSize == 0) { + if (op->GetType() == IAsyncIoOperation::EType::PRead) { + Y_VERIFY(WaitingNoops[completionAction->OperationIdx % MaxWaitingNoops] == nullptr); + WaitingNoops[completionAction->OperationIdx % MaxWaitingNoops] = completionAction; + } else { + Y_VERIFY_DEBUG(false, "Threre must not be writes of size 0 in TRealBlockDevice"); + } + } else { + if ((ui64)op->GetOffset() != EndOffset) { + isSeekExpected = true; + } + EndOffset = op->GetOffset() + opSize; + + ui64 duration = HPNow() - completionAction->SubmitTime; + ui64 durationMs = HPMilliSecondsFloat(duration); + if (op->GetType() == IAsyncIoOperation::EType::PRead) { + NSan::Unpoison(op->GetData(), opSize); + REQUEST_VALGRIND_MAKE_MEM_DEFINED(op->GetData(), opSize); + Device.Mon.DeviceReadDuration.Increment(durationMs); + LWPROBE(PDiskDeviceReadDuration, Device.GetPDiskId(), HPMilliSecondsFloat(duration), opSize); + } else { + Device.Mon.DeviceWriteDuration.Increment(durationMs); + LWPROBE(PDiskDeviceWriteDuration, Device.GetPDiskId(), HPMilliSecondsFloat(duration), opSize); + } + if (completionAction->FlushAction) { + ui64 idx = completionAction->FlushAction->OperationIdx; + Y_VERIFY(WaitingNoops[idx % MaxWaitingNoops] == nullptr); + WaitingNoops[idx % MaxWaitingNoops] = completionAction->FlushAction; + completionAction->FlushAction = nullptr; + } + Device.CompletionThread->Schedule(completionAction); + auto seqnoL6 = AtomicGetAndIncrement(Device.Mon.SeqnoL6); + Device.Mon.L6.Set(duration > Device.Reordering, seqnoL6); + } + + if (isSeekExpected) { + Device.Mon.DeviceExpectedSeeks->Inc(); + totalCostNs += Device.SeekCostNs; + } + + Device.IoContext->DestroyAsyncIoOperation(op); + ui64 firstIncompleteIdx = Device.FlightControl.FirstIncompleteIdx(); + while (NextPossibleNoop < firstIncompleteIdx) { + ui64 i = NextPossibleNoop % MaxWaitingNoops; + if (WaitingNoops[i] && WaitingNoops[i]->OperationIdx == NextPossibleNoop) { + Device.CompletionThread->Schedule(WaitingNoops[i]); + WaitingNoops[i] = nullptr; + } + ++NextPossibleNoop; + } + *Device.Mon.DeviceEstimatedCostNs += totalCostNs; + *Device.Mon.DeviceActualCostNs += HPNanoSeconds(totalExecutionCycles); + + if (PrevEstimationAtCycle > eventGotAtCycle) { + PrevEstimationAtCycle = eventGotAtCycle; + } + if (HPMilliSeconds(eventGotAtCycle - PrevEstimationAtCycle) >= 15000) { + ui64 estimated = (*Device.Mon.DeviceEstimatedCostNs - PrevEstimatedCostNs); + ui64 actual = (*Device.Mon.DeviceActualCostNs - PrevActualCostNs + 30000000ull); + if (estimated != 0) { + *Device.Mon.DeviceOverestimationRatio = 1000ull * actual / (estimated + 30000000ull); + if (actual > estimated) { + if (actual - estimated < 15000000000ull) { + *Device.Mon.DeviceNonperformanceMs = (actual - estimated) / 15000000ull; } else { - *Device.Mon.DeviceNonperformanceMs = 1000; + *Device.Mon.DeviceNonperformanceMs = 1000; } - } else { - *Device.Mon.DeviceNonperformanceMs = 0; + } else { + *Device.Mon.DeviceNonperformanceMs = 0; } - } else { - *Device.Mon.DeviceOverestimationRatio = 1000ull; - *Device.Mon.DeviceNonperformanceMs = 0ull; - } - - PrevEstimatedCostNs = *Device.Mon.DeviceEstimatedCostNs; - PrevActualCostNs = *Device.Mon.DeviceActualCostNs; - PrevEstimationAtCycle = eventGotAtCycle; - *Device.Mon.GetThreadCPU = ThreadCPUTime(); + } else { + *Device.Mon.DeviceOverestimationRatio = 1000ull; + *Device.Mon.DeviceNonperformanceMs = 0ull; + } + + PrevEstimatedCostNs = *Device.Mon.DeviceEstimatedCostNs; + PrevActualCostNs = *Device.Mon.DeviceActualCostNs; + PrevEstimationAtCycle = eventGotAtCycle; + *Device.Mon.GetThreadCPU = ThreadCPUTime(); } - - PrevEventGotAtCycle = eventGotAtCycle; - } - - void Destroy() { + + PrevEventGotAtCycle = eventGotAtCycle; + } + + void Destroy() { // There are no Schedule() calls in progress - for (ui64 idx = 0; idx < MaxWaitingNoops; ++idx) { - if (WaitingNoops[idx]) { - WaitingNoops[idx]->Release(Device.ActorSystem); - } + for (ui64 idx = 0; idx < MaxWaitingNoops; ++idx) { + if (WaitingNoops[idx]) { + WaitingNoops[idx]->Release(Device.ActorSystem); + } } // Stop the completion thread Device.CompletionThread->Schedule(nullptr); } }; - //////////////////////////////////////////////////////// - // TSubmitGetThread - //////////////////////////////////////////////////////// - class TSubmitGetThread : public TSubmitThreadBase{ - public: - TSubmitGetThread(TRealBlockDevice &device) - : TSubmitThreadBase(device, &ThreadProc, this) - {} - - static int ThreadProcSpdk(void* _this) { - SetCurrentThreadName("PdSbmGet"); - static_cast<TSubmitGetThread*>(_this)->Exec(); - return 0; - } - - static void* ThreadProc(void* _this) { - ThreadProcSpdk(_this); - return nullptr; - } - - void ReleaseOp(IAsyncIoOperation *op) { - Device.DecrementMonInFlight(op->GetType(), op->GetSize()); - Device.FreeOperation(op); - Device.QuitCounter.Decrement(); - Device.IdleCounter.Decrement(); - } - - bool Submit(IAsyncIoOperation *op, i64 *inFlight) { - TCompletionAction *action = static_cast<TCompletionAction*>(op->GetCookie()); - - action->OperationIdx = Device.FlightControl.TrySchedule(); - if (action->OperationIdx == 0) { - return false; - } - - if (!Device.QuitCounter.Increment()) { - Device.FreeOperation(op); - TGuard<TMutex> guard(SubmitMtx); - SubmitCondVar.Signal(); - (*inFlight)--; - return true; - } - Device.IdleCounter.Increment(); - - if (action->FlushAction) { - action->FlushAction->OperationIdx = action->OperationIdx; - } - - if (op->GetSize() == 0) { - TAsyncIoOperationResult result; - result.Operation = op; - result.Result = EIoResult::Ok; - Device.SharedCallback->Exec(&result); - (*inFlight)--; - return true; - } - - Device.IncrementMonInFlight(op->GetType(), op->GetSize()); - - EIoResult ret = EIoResult::TryAgain; - while (ret == EIoResult::TryAgain) { - action->SubmitTime = HPNow(); - ret = Device.IoContext->Submit(op, Device.SharedCallback.Get()); - if (ret == EIoResult::Ok) { - return true; - } - if (Device.QuitCounter.IsBlocked()) { - ReleaseOp(op); - (*inFlight)--; - return true; - } - } - ReleaseOp(op); - (*inFlight)--; - Device.BecomeErrorState(TStringBuilder() << " SpdkSubmit error, reason# " << ret); - return true; - } - - void Exec() { - i64 inFlight = 0; - bool isExiting = false; - - TAsyncIoOperationResult events[MaxEvents]; - - while(!SubmitQuitCounter.IsBlocked() || SubmitQuitCounter.Get()) { - // Submit events - TAtomicBase ops = OperationsToBeSubmit.GetWaitingSize(); - if (inFlight < (i64)Device.DeviceInFlight && ops > 0) { - for (TAtomicBase idx = 0; idx < ops; ++idx) { - IAsyncIoOperation *op = OperationsToBeSubmit.Head(); - if (op) { - const ui64 opSize = op->GetSize(); - if (isExiting) { - OperationsToBeSubmit.Pop(); - SubmitQuitCounter.Decrement(); - Device.FreeOperation(op); - } else if (Submit(op, &inFlight)) { - OperationsToBeSubmit.Pop(); - SubmitQuitCounter.Decrement(); - ++inFlight; - TGuard<TMutex> guard(SubmitMtx); - AtomicSub(SubmitInFlightBytes, opSize); - SubmitCondVar.Signal(); - } else { - break; - } - } else { - OperationsToBeSubmit.Pop(); - SubmitQuitCounter.Decrement(); - isExiting = true; - } - } - } else if (inFlight == 0) { - if (isExiting) { - break; - } else { - OperationsToBeSubmit.ProducedWaitI(); - } - } - - // Get events - do { - i64 ret = Device.IoContext->GetEvents(0, MaxEvents, events, TDuration::MilliSeconds(WaitTimeoutMs)); - // TODO Stop working here in case of error - if (ret < 0) { - Device.BecomeErrorState(TStringBuilder() << " error in IoContext->GetEvents, reason# " - << (EIoResult)-ret); - } - inFlight -= ret; - Y_VERIFY_S(inFlight >= 0, "Error in inFlight# " << inFlight); - } while (inFlight == (i64)Device.DeviceInFlight || isExiting && inFlight > 0); - } - - Y_VERIFY(OperationsToBeSubmit.GetWaitingSize() == 0); - } - }; - - //////////////////////////////////////////////////////// - // TTrimThread - //////////////////////////////////////////////////////// - class TTrimThread : public TThread { - public: - TTrimThread(TRealBlockDevice &device) - : TThread(&ThreadProc, this) - , Device(device) - {} - - static void* ThreadProc(void* _this) { - SetCurrentThreadName("PdTrim"); - static_cast<TTrimThread*>(_this)->Exec(); - return nullptr; - } - - void Exec() { - while(true) { - TAtomicBase actionCount = TrimOperations.GetWaitingSize(); - if (actionCount > 0) { - for (TAtomicBase idx = 0; idx < actionCount; ++idx) { - IAsyncIoOperation *op = TrimOperations.Pop(); - if (op == nullptr) { - Device.CompletionThread->Schedule(nullptr); - return; - } - Y_VERIFY(op->GetType() == IAsyncIoOperation::EType::PTrim); - auto *completion = static_cast<TCompletionAction*>(op->GetCookie()); - if (Device.IsTrimEnabled) { - Device.IdleCounter.Increment(); - NHPTimer::STime beginTime = HPNow(); - Device.IsTrimEnabled = Device.IoContext->DoTrim(op); - NHPTimer::STime endTime = HPNow(); - Device.IdleCounter.Decrement(); - const ui64 durationUs = HPMicroSeconds(endTime - beginTime); - Device.Mon.DeviceTrimDuration.Increment(durationUs); - *Device.Mon.DeviceEstimatedCostNs += completion->CostNs; - if (Device.ActorSystem && Device.IsTrimEnabled) { - LOG_DEBUG_S(*Device.ActorSystem, NKikimrServices::BS_DEVICE, - "PDiskId# " << Device.GetPDiskId() - << " ReqId# " << op->GetReqId() - << " Trim duration# " << HPMilliSeconds(endTime - beginTime) - << " ms path# \"" << Device.Path - << "\" offset# " << op->GetOffset() - << " size# " << op->GetSize()); - LWPROBE(PDiskDeviceTrimDuration, Device.GetPDiskId(), - HPMilliSecondsFloat(endTime - beginTime), op->GetOffset()); - } - } - completion->SetResult(EIoResult::Ok); - Device.CompletionThread->Schedule(completion); - Device.IoContext->DestroyAsyncIoOperation(op); - } - } else { - *Device.Mon.TrimThreadCPU = ThreadCPUTime(); - TrimOperations.ProducedWaitI(); - } - } - } - - // Schedule action execution - // pass action = nullptr to quit - void Schedule(IAsyncIoOperation *op) noexcept { - TrimOperations.Push(op); - } - - private: - TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> TrimOperations; - TRealBlockDevice &Device; - }; - - -protected: + //////////////////////////////////////////////////////// + // TSubmitGetThread + //////////////////////////////////////////////////////// + class TSubmitGetThread : public TSubmitThreadBase{ + public: + TSubmitGetThread(TRealBlockDevice &device) + : TSubmitThreadBase(device, &ThreadProc, this) + {} + + static int ThreadProcSpdk(void* _this) { + SetCurrentThreadName("PdSbmGet"); + static_cast<TSubmitGetThread*>(_this)->Exec(); + return 0; + } + + static void* ThreadProc(void* _this) { + ThreadProcSpdk(_this); + return nullptr; + } + + void ReleaseOp(IAsyncIoOperation *op) { + Device.DecrementMonInFlight(op->GetType(), op->GetSize()); + Device.FreeOperation(op); + Device.QuitCounter.Decrement(); + Device.IdleCounter.Decrement(); + } + + bool Submit(IAsyncIoOperation *op, i64 *inFlight) { + TCompletionAction *action = static_cast<TCompletionAction*>(op->GetCookie()); + + action->OperationIdx = Device.FlightControl.TrySchedule(); + if (action->OperationIdx == 0) { + return false; + } + + if (!Device.QuitCounter.Increment()) { + Device.FreeOperation(op); + TGuard<TMutex> guard(SubmitMtx); + SubmitCondVar.Signal(); + (*inFlight)--; + return true; + } + Device.IdleCounter.Increment(); + + if (action->FlushAction) { + action->FlushAction->OperationIdx = action->OperationIdx; + } + + if (op->GetSize() == 0) { + TAsyncIoOperationResult result; + result.Operation = op; + result.Result = EIoResult::Ok; + Device.SharedCallback->Exec(&result); + (*inFlight)--; + return true; + } + + Device.IncrementMonInFlight(op->GetType(), op->GetSize()); + + EIoResult ret = EIoResult::TryAgain; + while (ret == EIoResult::TryAgain) { + action->SubmitTime = HPNow(); + ret = Device.IoContext->Submit(op, Device.SharedCallback.Get()); + if (ret == EIoResult::Ok) { + return true; + } + if (Device.QuitCounter.IsBlocked()) { + ReleaseOp(op); + (*inFlight)--; + return true; + } + } + ReleaseOp(op); + (*inFlight)--; + Device.BecomeErrorState(TStringBuilder() << " SpdkSubmit error, reason# " << ret); + return true; + } + + void Exec() { + i64 inFlight = 0; + bool isExiting = false; + + TAsyncIoOperationResult events[MaxEvents]; + + while(!SubmitQuitCounter.IsBlocked() || SubmitQuitCounter.Get()) { + // Submit events + TAtomicBase ops = OperationsToBeSubmit.GetWaitingSize(); + if (inFlight < (i64)Device.DeviceInFlight && ops > 0) { + for (TAtomicBase idx = 0; idx < ops; ++idx) { + IAsyncIoOperation *op = OperationsToBeSubmit.Head(); + if (op) { + const ui64 opSize = op->GetSize(); + if (isExiting) { + OperationsToBeSubmit.Pop(); + SubmitQuitCounter.Decrement(); + Device.FreeOperation(op); + } else if (Submit(op, &inFlight)) { + OperationsToBeSubmit.Pop(); + SubmitQuitCounter.Decrement(); + ++inFlight; + TGuard<TMutex> guard(SubmitMtx); + AtomicSub(SubmitInFlightBytes, opSize); + SubmitCondVar.Signal(); + } else { + break; + } + } else { + OperationsToBeSubmit.Pop(); + SubmitQuitCounter.Decrement(); + isExiting = true; + } + } + } else if (inFlight == 0) { + if (isExiting) { + break; + } else { + OperationsToBeSubmit.ProducedWaitI(); + } + } + + // Get events + do { + i64 ret = Device.IoContext->GetEvents(0, MaxEvents, events, TDuration::MilliSeconds(WaitTimeoutMs)); + // TODO Stop working here in case of error + if (ret < 0) { + Device.BecomeErrorState(TStringBuilder() << " error in IoContext->GetEvents, reason# " + << (EIoResult)-ret); + } + inFlight -= ret; + Y_VERIFY_S(inFlight >= 0, "Error in inFlight# " << inFlight); + } while (inFlight == (i64)Device.DeviceInFlight || isExiting && inFlight > 0); + } + + Y_VERIFY(OperationsToBeSubmit.GetWaitingSize() == 0); + } + }; + + //////////////////////////////////////////////////////// + // TTrimThread + //////////////////////////////////////////////////////// + class TTrimThread : public TThread { + public: + TTrimThread(TRealBlockDevice &device) + : TThread(&ThreadProc, this) + , Device(device) + {} + + static void* ThreadProc(void* _this) { + SetCurrentThreadName("PdTrim"); + static_cast<TTrimThread*>(_this)->Exec(); + return nullptr; + } + + void Exec() { + while(true) { + TAtomicBase actionCount = TrimOperations.GetWaitingSize(); + if (actionCount > 0) { + for (TAtomicBase idx = 0; idx < actionCount; ++idx) { + IAsyncIoOperation *op = TrimOperations.Pop(); + if (op == nullptr) { + Device.CompletionThread->Schedule(nullptr); + return; + } + Y_VERIFY(op->GetType() == IAsyncIoOperation::EType::PTrim); + auto *completion = static_cast<TCompletionAction*>(op->GetCookie()); + if (Device.IsTrimEnabled) { + Device.IdleCounter.Increment(); + NHPTimer::STime beginTime = HPNow(); + Device.IsTrimEnabled = Device.IoContext->DoTrim(op); + NHPTimer::STime endTime = HPNow(); + Device.IdleCounter.Decrement(); + const ui64 durationUs = HPMicroSeconds(endTime - beginTime); + Device.Mon.DeviceTrimDuration.Increment(durationUs); + *Device.Mon.DeviceEstimatedCostNs += completion->CostNs; + if (Device.ActorSystem && Device.IsTrimEnabled) { + LOG_DEBUG_S(*Device.ActorSystem, NKikimrServices::BS_DEVICE, + "PDiskId# " << Device.GetPDiskId() + << " ReqId# " << op->GetReqId() + << " Trim duration# " << HPMilliSeconds(endTime - beginTime) + << " ms path# \"" << Device.Path + << "\" offset# " << op->GetOffset() + << " size# " << op->GetSize()); + LWPROBE(PDiskDeviceTrimDuration, Device.GetPDiskId(), + HPMilliSecondsFloat(endTime - beginTime), op->GetOffset()); + } + } + completion->SetResult(EIoResult::Ok); + Device.CompletionThread->Schedule(completion); + Device.IoContext->DestroyAsyncIoOperation(op); + } + } else { + *Device.Mon.TrimThreadCPU = ThreadCPUTime(); + TrimOperations.ProducedWaitI(); + } + } + } + + // Schedule action execution + // pass action = nullptr to quit + void Schedule(IAsyncIoOperation *op) noexcept { + TrimOperations.Push(op); + } + + private: + TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> TrimOperations; + TRealBlockDevice &Device; + }; + + +protected: TPDiskMon &Mon; TActorSystem *ActorSystem; - TString Path; - ui32 PDiskId; - TActorId PDiskActor; + TString Path; + ui32 PDiskId; + TActorId PDiskActor; -private: +private: THolder<TCompletionThread> CompletionThread; - THolder<TTrimThread> TrimThread; - THolder<TGetThread> GetEventsThread; - THolder<TSubmitGetThread> SpdkSubmitGetThread; - - THolder<TSharedCallback> SharedCallback; - THolder<TSubmitThreadBase> SubmitThread; - - bool IsFileOpened; - bool IsInitialized; + THolder<TTrimThread> TrimThread; + THolder<TGetThread> GetEventsThread; + THolder<TSubmitGetThread> SpdkSubmitGetThread; + + THolder<TSharedCallback> SharedCallback; + THolder<TSubmitThreadBase> SubmitThread; + + bool IsFileOpened; + bool IsInitialized; ui64 Reordering; ui64 SeekCostNs; - bool IsTrimEnabled; - ui32 MaxQueuedCompletionActions; + bool IsTrimEnabled; + ui32 MaxQueuedCompletionActions; + + TIdleCounter IdleCounter; // Includes reads, writes and trims - TIdleCounter IdleCounter; // Includes reads, writes and trims - - TDeviceMode::TFlags Flags; - TIntrusivePtr<TSectorMap> SectorMap; - std::unique_ptr<IAsyncIoContext> IoContext; - ISpdkState *SpdkState = nullptr; + TDeviceMode::TFlags Flags; + TIntrusivePtr<TSectorMap> SectorMap; + std::unique_ptr<IAsyncIoContext> IoContext; + ISpdkState *SpdkState = nullptr; - static constexpr int WaitTimeoutMs = 1; + static constexpr int WaitTimeoutMs = 1; static constexpr int MaxEvents = 32; - ui64 DeviceInFlight; + ui64 DeviceInFlight; TFlightControl FlightControl; TAtomicBlockCounter QuitCounter; - TString LastWarning; - TDeque<IAsyncIoOperation*> Trash; - TMutex TrashMutex; + TString LastWarning; + TDeque<IAsyncIoOperation*> Trash; + TMutex TrashMutex; + + std::optional<TDriveData> DriveData; - std::optional<TDriveData> DriveData; - public: - TRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles, - ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions, - TIntrusivePtr<TSectorMap> sectorMap) - : Mon(mon) + TRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles, + ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions, + TIntrusivePtr<TSectorMap> sectorMap) + : Mon(mon) , ActorSystem(nullptr) - , Path(path) - , PDiskId(pDiskId) + , Path(path) + , PDiskId(pDiskId) , CompletionThread(nullptr) - , TrimThread(nullptr) + , TrimThread(nullptr) , GetEventsThread(nullptr) - , SharedCallback(nullptr) - , SubmitThread(nullptr) - , IsFileOpened(false) - , IsInitialized(false) + , SharedCallback(nullptr) + , SubmitThread(nullptr) + , IsFileOpened(false) + , IsInitialized(false) , Reordering(reorderingCycles) , SeekCostNs(seekCostNs) - , IsTrimEnabled(true) - , MaxQueuedCompletionActions(maxQueuedCompletionActions) - , IdleCounter(Mon.IdleLight) - , Flags(flags) - , SectorMap(sectorMap) - , DeviceInFlight(FastClp2(deviceInFlight)) - , FlightControl(CountTrailingZeroBits(DeviceInFlight)) - , LastWarning(IsPowerOf2(deviceInFlight) ? "" : "Device inflight must be a power of 2") + , IsTrimEnabled(true) + , MaxQueuedCompletionActions(maxQueuedCompletionActions) + , IdleCounter(Mon.IdleLight) + , Flags(flags) + , SectorMap(sectorMap) + , DeviceInFlight(FastClp2(deviceInFlight)) + , FlightControl(CountTrailingZeroBits(DeviceInFlight)) + , LastWarning(IsPowerOf2(deviceInFlight) ? "" : "Device inflight must be a power of 2") { - if (sectorMap) { - DriveData = TDriveData(); - DriveData->Path = path; - DriveData->SerialNumber = sectorMap->Serial; - DriveData->FirmwareRevision = "rev 1.0"; - DriveData->ModelNumber = "SectorMap"; - } - } - -protected: - void Initialize(TActorSystem *actorSystem, const TActorId& pdiskActor) override { - ActorSystem = actorSystem; - PDiskActor = pdiskActor; - Y_VERIFY(ActorSystem); - - TString errStr = TDeviceMode::Validate(Flags); - if (errStr) { - Y_FAIL_S(IoContext->GetPDiskInfo() << " Error in device flags: " << errStr); - } - - Y_VERIFY(ActorSystem->AppData<TAppData>()); - Y_VERIFY(ActorSystem->AppData<TAppData>()->IoContextFactory); - auto *factory = ActorSystem->AppData<TAppData>()->IoContextFactory; - IoContext = factory->CreateAsyncIoContext(Path, PDiskId, Flags, SectorMap); - if (Flags & TDeviceMode::UseSpdk) { - SpdkState = factory->CreateSpdkState(); - } - - while (true) { - EIoResult ret = IoContext->Setup(MaxEvents, Flags & TDeviceMode::LockFile); - if (ret == EIoResult::Ok) { - IsFileOpened = true; - break; - } else if (ret == EIoResult::FileOpenError || ret == EIoResult::FileLockError) { - IsFileOpened = false; - if (ret == EIoResult::FileOpenError) { - LastWarning = "got EIoResult::FileOpenError from IoContext->Setup"; - } else if (ret == EIoResult::FileLockError) { - LastWarning = "got EIoResult::FileLockError from IoContext->Setup"; - } - break; - } else if (ret == EIoResult::TryAgain) { - continue; - } else { - Y_FAIL_S(IoContext->GetPDiskInfo() << " Error initing IoContext: " << ret); + if (sectorMap) { + DriveData = TDriveData(); + DriveData->Path = path; + DriveData->SerialNumber = sectorMap->Serial; + DriveData->FirmwareRevision = "rev 1.0"; + DriveData->ModelNumber = "SectorMap"; + } + } + +protected: + void Initialize(TActorSystem *actorSystem, const TActorId& pdiskActor) override { + ActorSystem = actorSystem; + PDiskActor = pdiskActor; + Y_VERIFY(ActorSystem); + + TString errStr = TDeviceMode::Validate(Flags); + if (errStr) { + Y_FAIL_S(IoContext->GetPDiskInfo() << " Error in device flags: " << errStr); + } + + Y_VERIFY(ActorSystem->AppData<TAppData>()); + Y_VERIFY(ActorSystem->AppData<TAppData>()->IoContextFactory); + auto *factory = ActorSystem->AppData<TAppData>()->IoContextFactory; + IoContext = factory->CreateAsyncIoContext(Path, PDiskId, Flags, SectorMap); + if (Flags & TDeviceMode::UseSpdk) { + SpdkState = factory->CreateSpdkState(); + } + + while (true) { + EIoResult ret = IoContext->Setup(MaxEvents, Flags & TDeviceMode::LockFile); + if (ret == EIoResult::Ok) { + IsFileOpened = true; + break; + } else if (ret == EIoResult::FileOpenError || ret == EIoResult::FileLockError) { + IsFileOpened = false; + if (ret == EIoResult::FileOpenError) { + LastWarning = "got EIoResult::FileOpenError from IoContext->Setup"; + } else if (ret == EIoResult::FileLockError) { + LastWarning = "got EIoResult::FileLockError from IoContext->Setup"; + } + break; + } else if (ret == EIoResult::TryAgain) { + continue; + } else { + Y_FAIL_S(IoContext->GetPDiskInfo() << " Error initing IoContext: " << ret); } } - - IoContext->InitializeMonitoring(Mon); - //IoContext->InitializeMonitoring(Mon.DeviceOperationPoolTotalAllocations, Mon.DeviceOperationPoolFreeObjectsMin); + + IoContext->InitializeMonitoring(Mon); + //IoContext->InitializeMonitoring(Mon.DeviceOperationPoolTotalAllocations, Mon.DeviceOperationPoolFreeObjectsMin); if (!LastWarning.empty() && ActorSystem) { - LOG_WARN_S(*ActorSystem, NKikimrServices::BS_DEVICE, "PDiskId# " << PDiskId - << " Warning# " << LastWarning); - } - if (IsFileOpened) { + LOG_WARN_S(*ActorSystem, NKikimrServices::BS_DEVICE, "PDiskId# " << PDiskId + << " Warning# " << LastWarning); + } + if (IsFileOpened) { IoContext->SetActorSystem(ActorSystem); CompletionThread = MakeHolder<TCompletionThread>(*this, MaxQueuedCompletionActions); TrimThread = MakeHolder<TTrimThread>(*this); SharedCallback = MakeHolder<TSharedCallback>(*this); - if (Flags & TDeviceMode::UseSpdk) { + if (Flags & TDeviceMode::UseSpdk) { SpdkSubmitGetThread = MakeHolder<TSubmitGetThread>(*this); - SpdkState->LaunchThread(TSubmitGetThread::ThreadProcSpdk, SpdkSubmitGetThread.Get()); - } else { - if (Flags & TDeviceMode::UseSubmitGetThread) { + SpdkState->LaunchThread(TSubmitGetThread::ThreadProcSpdk, SpdkSubmitGetThread.Get()); + } else { + if (Flags & TDeviceMode::UseSubmitGetThread) { SubmitThread = MakeHolder<TSubmitGetThread>(*this); - SubmitThread->Start(); - } else { + SubmitThread->Start(); + } else { SubmitThread = MakeHolder<TSubmitThread>(*this); - SubmitThread->Start(); + SubmitThread->Start(); GetEventsThread = MakeHolder<TGetThread>(*this); - GetEventsThread->Start(); - } - } + GetEventsThread->Start(); + } + } CompletionThread->Start(); - TrimThread->Start(); - IsInitialized = true; + TrimThread->Start(); + IsInitialized = true; } } bool IsGood() override { - return IsFileOpened && IsInitialized; + return IsFileOpened && IsInitialized; + } + + int GetLastErrno() override { + return IoContext->GetLastErrno(); + } + + TString DebugInfo() override { + TStringStream str; + str << " Path# " << Path.Quote(); + str << " IsFileOpened# " << IsFileOpened; + str << " IsInitialized# " << IsInitialized; + str << " LastWarning# " << LastWarning.Quote(); + str << " LastErrno# " << IoContext->GetLastErrno(); + return str.Str(); } - int GetLastErrno() override { - return IoContext->GetLastErrno(); - } - - TString DebugInfo() override { - TStringStream str; - str << " Path# " << Path.Quote(); - str << " IsFileOpened# " << IsFileOpened; - str << " IsInitialized# " << IsInitialized; - str << " LastWarning# " << LastWarning.Quote(); - str << " LastErrno# " << IoContext->GetLastErrno(); - return str.Str(); - } - - void IncrementMonInFlight(IAsyncIoOperation::EType type, ui32 size) { - switch (type) { - case IAsyncIoOperation::EType::PWrite: - (*Mon.DeviceInFlightBytesWrite) += size; - Mon.DeviceInFlightWrites->Inc(); - break; - case IAsyncIoOperation::EType::PRead: - (*Mon.DeviceInFlightBytesRead) += size; - Mon.DeviceInFlightReads->Inc(); - break; - default: - break; - } + void IncrementMonInFlight(IAsyncIoOperation::EType type, ui32 size) { + switch (type) { + case IAsyncIoOperation::EType::PWrite: + (*Mon.DeviceInFlightBytesWrite) += size; + Mon.DeviceInFlightWrites->Inc(); + break; + case IAsyncIoOperation::EType::PRead: + (*Mon.DeviceInFlightBytesRead) += size; + Mon.DeviceInFlightReads->Inc(); + break; + default: + break; + } Mon.DeviceTakeoffs->Inc(); - } - - void DecrementMonInFlight(IAsyncIoOperation::EType type, ui32 size) { - switch (type) { - case IAsyncIoOperation::EType::PWrite: - (*Mon.DeviceInFlightBytesWrite) -= size; - Mon.DeviceInFlightWrites->Dec(); - (*Mon.DeviceBytesWritten) += size; - Mon.DeviceWrites->Inc(); - break; - case IAsyncIoOperation::EType::PRead: - (*Mon.DeviceInFlightBytesRead) -= size; - Mon.DeviceInFlightReads->Dec(); - (*Mon.DeviceBytesRead) += size; - Mon.DeviceReads->Inc(); - break; - default: - break; - } - Mon.DeviceLandings->Inc(); - } - - void FreeOperation(IAsyncIoOperation *op) { - TCompletionAction *action = static_cast<TCompletionAction*>(op->GetCookie()); - - if (action->FlushAction) { - action->FlushAction->Release(ActorSystem); - } - action->Release(ActorSystem); - { - TGuard<TMutex> guard(TrashMutex); - Trash.push_back(op); - } - } - + } + + void DecrementMonInFlight(IAsyncIoOperation::EType type, ui32 size) { + switch (type) { + case IAsyncIoOperation::EType::PWrite: + (*Mon.DeviceInFlightBytesWrite) -= size; + Mon.DeviceInFlightWrites->Dec(); + (*Mon.DeviceBytesWritten) += size; + Mon.DeviceWrites->Inc(); + break; + case IAsyncIoOperation::EType::PRead: + (*Mon.DeviceInFlightBytesRead) -= size; + Mon.DeviceInFlightReads->Dec(); + (*Mon.DeviceBytesRead) += size; + Mon.DeviceReads->Inc(); + break; + default: + break; + } + Mon.DeviceLandings->Inc(); + } + + void FreeOperation(IAsyncIoOperation *op) { + TCompletionAction *action = static_cast<TCompletionAction*>(op->GetCookie()); + + if (action->FlushAction) { + action->FlushAction->Release(ActorSystem); + } + action->Release(ActorSystem); + { + TGuard<TMutex> guard(TrashMutex); + Trash.push_back(op); + } + } + void Submit(IAsyncIoOperation *op) { - if (QuitCounter.IsBlocked()) { - FreeOperation(op); - return; - } - - const ui64 size = op->GetSize(); - const ui64 type = static_cast<ui64>(op->GetType()); - LWPROBE(PDiskDeviceOperationSizeAndType, GetPDiskId(), size, type); - - if (Flags & TDeviceMode::UseSpdk) { - SpdkSubmitGetThread->Schedule(op); - } else { - SubmitThread->Schedule(op); - } + if (QuitCounter.IsBlocked()) { + FreeOperation(op); + return; + } + + const ui64 size = op->GetSize(); + const ui64 type = static_cast<ui64>(op->GetType()); + LWPROBE(PDiskDeviceOperationSizeAndType, GetPDiskId(), size, type); + + if (Flags & TDeviceMode::UseSpdk) { + SpdkSubmitGetThread->Schedule(op); + } else { + SubmitThread->Schedule(op); + } + } + + void PreadSync(void *data, ui32 size, ui64 offset, TReqId reqId, NWilson::TTraceId *traceId) override { + TSignalEvent doneEvent; + PreadAsync(data, size, offset, new TCompletionSignal(&doneEvent), reqId, traceId); + doneEvent.WaitI(); + } + + void PwriteSync(const void *data, ui64 size, ui64 offset, TReqId reqId, NWilson::TTraceId *traceId) override { + TSignalEvent doneEvent; + PwriteAsync(data, size, offset, new TCompletionSignal(&doneEvent), reqId, traceId); + doneEvent.WaitI(); + } + + void TrimSync(ui32 size, ui64 offset) override { + IAsyncIoOperation* op = IoContext->CreateAsyncIoOperation(nullptr, {}, nullptr); + IoContext->PreparePTrim(op, size, offset); + IsTrimEnabled = IoContext->DoTrim(op); + IoContext->DestroyAsyncIoOperation(op); } - void PreadSync(void *data, ui32 size, ui64 offset, TReqId reqId, NWilson::TTraceId *traceId) override { - TSignalEvent doneEvent; - PreadAsync(data, size, offset, new TCompletionSignal(&doneEvent), reqId, traceId); - doneEvent.WaitI(); - } - - void PwriteSync(const void *data, ui64 size, ui64 offset, TReqId reqId, NWilson::TTraceId *traceId) override { - TSignalEvent doneEvent; - PwriteAsync(data, size, offset, new TCompletionSignal(&doneEvent), reqId, traceId); - doneEvent.WaitI(); - } - - void TrimSync(ui32 size, ui64 offset) override { - IAsyncIoOperation* op = IoContext->CreateAsyncIoOperation(nullptr, {}, nullptr); - IoContext->PreparePTrim(op, size, offset); - IsTrimEnabled = IoContext->DoTrim(op); - IoContext->DestroyAsyncIoOperation(op); - } - void PreadAsync(void *data, ui32 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId, NWilson::TTraceId *traceId) override { Y_VERIFY(completionAction); - if (!IsInitialized) { - completionAction->Release(ActorSystem); - return; - } + if (!IsInitialized) { + completionAction->Release(ActorSystem); + return; + } if (data && size) { Y_VERIFY(intptr_t(data) % 512 == 0); REQUEST_VALGRIND_CHECK_MEM_IS_ADDRESSABLE(data, size); @@ -947,17 +947,17 @@ protected: WILSON_TRACE(*ActorSystem, traceId, BlockPread, DiskOffset = offset, Size = size); } IAsyncIoOperation* op = IoContext->CreateAsyncIoOperation(completionAction, reqId, traceId); - IoContext->PreparePRead(op, data, size, offset); + IoContext->PreparePRead(op, data, size, offset); Submit(op); } void PwriteAsync(const void *data, ui64 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId, NWilson::TTraceId *traceId) override { Y_VERIFY(completionAction); - if (!IsInitialized) { - completionAction->Release(ActorSystem); - return; - } + if (!IsInitialized) { + completionAction->Release(ActorSystem); + return; + } if (data && size) { Y_VERIFY(intptr_t(data) % 512 == 0); REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(data, size); @@ -967,93 +967,93 @@ protected: WILSON_TRACE(*ActorSystem, traceId, BlockPwrite, DiskOffset = offset, Size = size); } IAsyncIoOperation* op = IoContext->CreateAsyncIoOperation(completionAction, reqId, traceId); - IoContext->PreparePWrite(op, const_cast<void*>(data), size, offset); + IoContext->PreparePWrite(op, const_cast<void*>(data), size, offset); Submit(op); } void FlushAsync(TCompletionAction *completionAction, TReqId reqId) override { Y_VERIFY(completionAction); - if (!IsInitialized) { - completionAction->Release(ActorSystem); - return; - } + if (!IsInitialized) { + completionAction->Release(ActorSystem); + return; + } IAsyncIoOperation* op = IoContext->CreateAsyncIoOperation(completionAction, reqId, nullptr); - IoContext->PreparePRead(op, nullptr, 0, 0); + IoContext->PreparePRead(op, nullptr, 0, 0); Submit(op); } void NoopAsync(TCompletionAction *completionAction, TReqId /*reqId*/) override { - Y_VERIFY(completionAction); - if (!IsInitialized) { - completionAction->Release(ActorSystem); - return; - } + Y_VERIFY(completionAction); + if (!IsInitialized) { + completionAction->Release(ActorSystem); + return; + } if (QuitCounter.IsBlocked()) { completionAction->Release(ActorSystem); return; } - - completionAction->SetResult(EIoResult::Ok); - CompletionThread->Schedule(completionAction); - } - - void NoopAsyncHackForLogReader(TCompletionAction *completionAction, TReqId /*reqId*/) override { - Y_VERIFY(completionAction); - if (!IsInitialized) { - completionAction->Release(ActorSystem); - return; - } + + completionAction->SetResult(EIoResult::Ok); + CompletionThread->Schedule(completionAction); + } + + void NoopAsyncHackForLogReader(TCompletionAction *completionAction, TReqId /*reqId*/) override { + Y_VERIFY(completionAction); + if (!IsInitialized) { + completionAction->Release(ActorSystem); + return; + } if (QuitCounter.IsBlocked()) { completionAction->Release(ActorSystem); return; } - - completionAction->SetResult(EIoResult::Ok); - CompletionThread->ScheduleHackForLogReader(completionAction); - } - - void TrimAsync(ui32 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId) override { + + completionAction->SetResult(EIoResult::Ok); + CompletionThread->ScheduleHackForLogReader(completionAction); + } + + void TrimAsync(ui32 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId) override { Y_VERIFY(completionAction); - if (!IsInitialized || QuitCounter.IsBlocked()) { - return; - } + if (!IsInitialized || QuitCounter.IsBlocked()) { + return; + } - IAsyncIoOperation* op = IoContext->CreateAsyncIoOperation(completionAction, reqId, nullptr); - IoContext->PreparePTrim(op, size, offset); - TrimThread->Schedule(op); + IAsyncIoOperation* op = IoContext->CreateAsyncIoOperation(completionAction, reqId, nullptr); + IoContext->PreparePTrim(op, size, offset); + TrimThread->Schedule(op); } bool GetIsTrimEnabled() override { - return IsTrimEnabled; - } - - TDriveData GetDriveData() override { - if (!DriveData) { - TStringStream details; - if (DriveData = ::NKikimr::NPDisk::GetDriveData(Path, &details)) { - if (ActorSystem) { - LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " Gathered DriveData, data# " << DriveData->ToString(false)); - } - } else { - if (ActorSystem) { - LOG_WARN_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " error on GetDriveData, detail# " << details.Str()); - } + return IsTrimEnabled; + } + + TDriveData GetDriveData() override { + if (!DriveData) { + TStringStream details; + if (DriveData = ::NKikimr::NPDisk::GetDriveData(Path, &details)) { + if (ActorSystem) { + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " Gathered DriveData, data# " << DriveData->ToString(false)); + } + } else { + if (ActorSystem) { + LOG_WARN_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " error on GetDriveData, detail# " << details.Str()); + } } } - return DriveData.value_or(TDriveData()); + return DriveData.value_or(TDriveData()); } void SetWriteCache(bool isEnable) override { - if (TFileHandle *handle = IoContext->GetFileHandle()) { - TStringStream details; - EWriteCacheResult res = NKikimr::NPDisk::SetWriteCache(*handle, Path, isEnable, &details); - if (res != WriteCacheResultOk) { - if (ActorSystem) { - LOG_WARN_S(*ActorSystem, NKikimrServices::BS_DEVICE, details.Str()); - } + if (TFileHandle *handle = IoContext->GetFileHandle()) { + TStringStream details; + EWriteCacheResult res = NKikimr::NPDisk::SetWriteCache(*handle, Path, isEnable, &details); + if (res != WriteCacheResultOk) { + if (ActorSystem) { + LOG_WARN_S(*ActorSystem, NKikimrServices::BS_DEVICE, details.Str()); + } } } } @@ -1062,334 +1062,334 @@ protected: return PDiskId; } - virtual ~TRealBlockDevice() { + virtual ~TRealBlockDevice() { Stop(); - while (Trash.size() > 0) { - IAsyncIoOperation *op = Trash.front(); - Trash.pop_front(); - IoContext->DestroyAsyncIoOperation(op); - } + while (Trash.size() > 0) { + IAsyncIoOperation *op = Trash.front(); + Trash.pop_front(); + IoContext->DestroyAsyncIoOperation(op); + } + } + + void BecomeErrorState(const TString& info) { + // Block only B flag so device will not working but when Stop() will be called AFlag will be toggled + QuitCounter.BlockB(); + TString fullInfo = TStringBuilder() << IoContext->GetPDiskInfo() << info; + if (ActorSystem) { + ActorSystem->Send(PDiskActor, new TEvDeviceError(fullInfo)); + } else { + Y_FAIL_S(fullInfo); + } } - void BecomeErrorState(const TString& info) { - // Block only B flag so device will not working but when Stop() will be called AFlag will be toggled - QuitCounter.BlockB(); - TString fullInfo = TStringBuilder() << IoContext->GetPDiskInfo() << info; - if (ActorSystem) { - ActorSystem->Send(PDiskActor, new TEvDeviceError(fullInfo)); - } else { - Y_FAIL_S(fullInfo); - } - } - void Stop() override { TAtomicBlockCounter::TResult res; - QuitCounter.BlockA(res); - if (res.PrevA ^ res.A) { // res.ToggledA() - if (IsInitialized) { - Y_VERIFY(TrimThread); - Y_VERIFY(CompletionThread); - TrimThread->Schedule(nullptr); // Stop the Trim thread - if (Flags & TDeviceMode::UseSpdk) { - Y_VERIFY(SpdkSubmitGetThread); - SpdkSubmitGetThread->Schedule(nullptr); // Stop the SpdkSubmitGetEvents thread - SpdkState->WaitAllThreads(); - } else { - Y_VERIFY(SubmitThread); - SubmitThread->Schedule(nullptr); // Stop the SubminEvents thread - SubmitThread->Join(); - - if (!(Flags & TDeviceMode::UseSubmitGetThread)) { - Y_VERIFY(GetEventsThread); - GetEventsThread->Join(); - } - } - SharedCallback->Destroy(); - TrimThread->Join(); + QuitCounter.BlockA(res); + if (res.PrevA ^ res.A) { // res.ToggledA() + if (IsInitialized) { + Y_VERIFY(TrimThread); + Y_VERIFY(CompletionThread); + TrimThread->Schedule(nullptr); // Stop the Trim thread + if (Flags & TDeviceMode::UseSpdk) { + Y_VERIFY(SpdkSubmitGetThread); + SpdkSubmitGetThread->Schedule(nullptr); // Stop the SpdkSubmitGetEvents thread + SpdkState->WaitAllThreads(); + } else { + Y_VERIFY(SubmitThread); + SubmitThread->Schedule(nullptr); // Stop the SubminEvents thread + SubmitThread->Join(); + + if (!(Flags & TDeviceMode::UseSubmitGetThread)) { + Y_VERIFY(GetEventsThread); + GetEventsThread->Join(); + } + } + SharedCallback->Destroy(); + TrimThread->Join(); CompletionThread->Join(); - IsInitialized = false; - } else { - Y_VERIFY(SubmitThread.Get() == nullptr); - Y_VERIFY(GetEventsThread.Get() == nullptr); - Y_VERIFY(TrimThread.Get() == nullptr); - Y_VERIFY(CompletionThread.Get() == nullptr); - } - if (IsFileOpened) { - EIoResult ret = IoContext->Destroy(); - if (ret != EIoResult::Ok) { - BecomeErrorState(TStringBuilder() << " Error in IoContext desctruction, reason# " << ret); + IsInitialized = false; + } else { + Y_VERIFY(SubmitThread.Get() == nullptr); + Y_VERIFY(GetEventsThread.Get() == nullptr); + Y_VERIFY(TrimThread.Get() == nullptr); + Y_VERIFY(CompletionThread.Get() == nullptr); + } + if (IsFileOpened) { + EIoResult ret = IoContext->Destroy(); + if (ret != EIoResult::Ok) { + BecomeErrorState(TStringBuilder() << " Error in IoContext desctruction, reason# " << ret); } - IsFileOpened = false; + IsFileOpened = false; } } } }; -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TCachedBlockDevice -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -class TCachedBlockDevice : public TRealBlockDevice { - - class TCachedReadCompletion : public TCompletionAction { - TCachedBlockDevice &CachedBlockDevice; - void *Data; - ui32 Size; - ui64 Offset; - TReqId ReqId; - public: - TCachedReadCompletion(TCachedBlockDevice &cachedBlockDevice, void *data, ui32 size, ui64 offset, TReqId reqId) - : CachedBlockDevice(cachedBlockDevice) - , Data(data) - , Size(size) - , Offset(offset) - , ReqId(reqId) - {} - - ui64 GetOffset() { - return Offset; - } - - ui32 GetSize() { - return Size; - } - - void* GetData() { - return Data; - } - - TVector<ui64>& GetBadOffsets() { - return BadOffsets; - } - - void Exec(TActorSystem *actorSystem) override { - if (actorSystem) { - LOG_DEBUG_S(*actorSystem, NKikimrServices::BS_PDISK, - "PDisk# " << CachedBlockDevice.GetPDiskId() << " ReqId# " << ReqId - << "Exec TCachedReadCompletion Offset# " << Offset); - } - CachedBlockDevice.ExecRead(this, actorSystem); - } - - void Release(TActorSystem *actorSystem) override { - if (actorSystem) { - LOG_DEBUG_S(*actorSystem, NKikimrServices::BS_PDISK, - "PDisk# " << CachedBlockDevice.GetPDiskId() << " ReqId# " << ReqId - << "Release TCachedReadCompletion Offset# " << Offset); - } - CachedBlockDevice.ReleaseRead(this, actorSystem); - } - }; - - struct TRead { - void *Data; - ui32 Size; - ui64 Offset; - TCompletionAction *CompletionAction; - TReqId ReqId; - NWilson::TTraceId *TraceId; - - TRead(void *data, ui32 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId, - NWilson::TTraceId *traceId) - : Data(data) - , Size(size) - , Offset(offset) - , CompletionAction(completionAction) - , ReqId(reqId) - , TraceId(traceId) - { - } - }; - - struct TCacheRecord : public TThrRefBase { - char *Data; - ui64 Size; - TVector<ui64> BadOffsets; - - TCacheRecord(TCachedReadCompletion *source) - : Data(static_cast<char*>(malloc(source->GetSize()))) - , Size(source->GetSize()) - , BadOffsets(std::move(source->GetBadOffsets())) - { - memcpy(Data, source->GetData(), source->GetSize()); - } - - ~TCacheRecord() { - free(Data); - } - }; - - static constexpr ui64 MaxCount = 500ull; - static constexpr ui64 MaxReadsInFly = 2; - - TMutex CacheMutex; - NCache::TLruCache<ui64, TIntrusivePtr<TCacheRecord>> Cache; - TMultiMap<ui64, TRead> ReadsForOffset; - TMap<ui64, TCachedReadCompletion*> CurrentReads; - ui64 ReadsInFly; - - void UpdateReads() { - auto nextIt = ReadsForOffset.begin(); - for (auto it = ReadsForOffset.begin(); it != ReadsForOffset.end(); it = nextIt) { - nextIt++; - TRead &read = it->second; - TIntrusivePtr<TCacheRecord> *found; - bool isFound = Cache.Find(read.Offset, found); - if (isFound) { - TCacheRecord &cached = *found->Get(); - if (read.Size <= cached.Size) { - memcpy(read.Data, cached.Data, read.Size); - Mon.DeviceReadCacheHits->Inc(); - Y_VERIFY(read.CompletionAction); - for (size_t i = 0; i < cached.BadOffsets.size(); ++i) { - read.CompletionAction->RegisterBadOffset(cached.BadOffsets[i]); - } - NoopAsyncHackForLogReader(read.CompletionAction, read.ReqId); - ReadsForOffset.erase(it); - } - } - } - if (ReadsInFly >= MaxReadsInFly) { - return; - } - - for (auto it = ReadsForOffset.begin(); it != ReadsForOffset.end(); it++) { - TRead &read = it->second; - auto currentIt = CurrentReads.find(read.Offset); - if (currentIt == CurrentReads.end()) { - TCachedReadCompletion *ptr = new TCachedReadCompletion(*this, read.Data, read.Size, read.Offset, - read.ReqId); - CurrentReads[read.Offset] = ptr; - ActorSystem->Send(PDiskActor, new TEvReadLogContinue(read.Data, read.Size, read.Offset, - ptr, read.ReqId)); - ReadsInFly++; - if (ReadsInFly >= MaxReadsInFly) { - return; - } - } - } - } - -public: - TCachedBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles, - ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions, - TIntrusivePtr<TSectorMap> sectorMap) - : TRealBlockDevice(path, pDiskId, mon, reorderingCycles, seekCostNs, deviceInFlight, flags, - maxQueuedCompletionActions, sectorMap) - , ReadsInFly(0) - {} - - void ExecRead(TCachedReadCompletion *completion, TActorSystem *actorSystem) { - TStackVec<TCompletionAction*, 32> pendingActions; - { - TGuard<TMutex> guard(CacheMutex); - ui64 offset = completion->GetOffset(); - auto currentReadIt = CurrentReads.find(offset); - Y_VERIFY(currentReadIt != CurrentReads.end()); - auto range = ReadsForOffset.equal_range(offset); - if (Cache.GetCount() >= MaxCount) { - Cache.Pop(); - } - TIntrusivePtr<TCacheRecord> cacheRecord(new TCacheRecord(completion)); - TIntrusivePtr<TCacheRecord> *junk; - Cache.Erase(offset); - bool isOk = Cache.Insert(offset, cacheRecord, junk); - Y_VERIFY(isOk); - auto nextIt = range.first; - for (auto it = range.first; it != range.second; it = nextIt) { - nextIt++; - TRead &read = it->second; - if (read.Size <= completion->GetSize()) { - if (read.Data != completion->GetData()) { - memcpy(read.Data, cacheRecord->Data, read.Size); - Mon.DeviceReadCacheHits->Inc(); - } else { - Mon.DeviceReadCacheMisses->Inc(); - } - Y_VERIFY(read.CompletionAction); - for (size_t i = 0; i < cacheRecord->BadOffsets.size(); ++i) { - read.CompletionAction->RegisterBadOffset(cacheRecord->BadOffsets[i]); - } - pendingActions.push_back(read.CompletionAction); - ReadsForOffset.erase(it); - } - } - delete currentReadIt->second; - CurrentReads.erase(currentReadIt); - ReadsInFly--; - UpdateReads(); - } - - for (size_t i = 0; i < pendingActions.size(); ++i) { - pendingActions[i]->Exec(actorSystem); - } - - { - TGuard<TMutex> guard(CacheMutex); - if (ReadsInFly == 0) { - ClearCache(); - } - } - } - - void ReleaseRead(TCachedReadCompletion *completion, TActorSystem *actorSystem) { - TGuard<TMutex> guard(CacheMutex); - Y_UNUSED(actorSystem); - auto it = CurrentReads.find(completion->GetOffset()); - Y_VERIFY(it != CurrentReads.end()); - delete it->second; - CurrentReads.erase(it); - ReadsInFly--; - } - - virtual ~TCachedBlockDevice() { - Stop(); - } - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // cache related methods implementation - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - // Can be called from completion Exec - virtual void CachedPreadAsync(void *data, ui32 size, ui64 offset, TCompletionAction *completionAction, - TReqId reqId, NWilson::TTraceId *traceId) override { - TGuard<TMutex> guard(CacheMutex); - ReadsForOffset.emplace(offset, TRead(data, size, offset, completionAction, reqId, traceId)); - UpdateReads(); - } - - virtual void ClearCache() override { - TGuard<TMutex> guard(CacheMutex); - Cache.Clear(); - } - - void Stop() override { - TRealBlockDevice::Stop(); - for (auto it = CurrentReads.begin(); it != CurrentReads.end(); ++it) { - delete it->second; - } - CurrentReads.clear(); - for (auto it = ReadsForOffset.begin(); it != ReadsForOffset.end(); ++it) { - if (it->second.CompletionAction) { - it->second.CompletionAction->Release(ActorSystem); - } - } - ReadsForOffset.clear(); - } -}; - -IBlockDevice* CreateRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles, - ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions, +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// TCachedBlockDevice +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class TCachedBlockDevice : public TRealBlockDevice { + + class TCachedReadCompletion : public TCompletionAction { + TCachedBlockDevice &CachedBlockDevice; + void *Data; + ui32 Size; + ui64 Offset; + TReqId ReqId; + public: + TCachedReadCompletion(TCachedBlockDevice &cachedBlockDevice, void *data, ui32 size, ui64 offset, TReqId reqId) + : CachedBlockDevice(cachedBlockDevice) + , Data(data) + , Size(size) + , Offset(offset) + , ReqId(reqId) + {} + + ui64 GetOffset() { + return Offset; + } + + ui32 GetSize() { + return Size; + } + + void* GetData() { + return Data; + } + + TVector<ui64>& GetBadOffsets() { + return BadOffsets; + } + + void Exec(TActorSystem *actorSystem) override { + if (actorSystem) { + LOG_DEBUG_S(*actorSystem, NKikimrServices::BS_PDISK, + "PDisk# " << CachedBlockDevice.GetPDiskId() << " ReqId# " << ReqId + << "Exec TCachedReadCompletion Offset# " << Offset); + } + CachedBlockDevice.ExecRead(this, actorSystem); + } + + void Release(TActorSystem *actorSystem) override { + if (actorSystem) { + LOG_DEBUG_S(*actorSystem, NKikimrServices::BS_PDISK, + "PDisk# " << CachedBlockDevice.GetPDiskId() << " ReqId# " << ReqId + << "Release TCachedReadCompletion Offset# " << Offset); + } + CachedBlockDevice.ReleaseRead(this, actorSystem); + } + }; + + struct TRead { + void *Data; + ui32 Size; + ui64 Offset; + TCompletionAction *CompletionAction; + TReqId ReqId; + NWilson::TTraceId *TraceId; + + TRead(void *data, ui32 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId, + NWilson::TTraceId *traceId) + : Data(data) + , Size(size) + , Offset(offset) + , CompletionAction(completionAction) + , ReqId(reqId) + , TraceId(traceId) + { + } + }; + + struct TCacheRecord : public TThrRefBase { + char *Data; + ui64 Size; + TVector<ui64> BadOffsets; + + TCacheRecord(TCachedReadCompletion *source) + : Data(static_cast<char*>(malloc(source->GetSize()))) + , Size(source->GetSize()) + , BadOffsets(std::move(source->GetBadOffsets())) + { + memcpy(Data, source->GetData(), source->GetSize()); + } + + ~TCacheRecord() { + free(Data); + } + }; + + static constexpr ui64 MaxCount = 500ull; + static constexpr ui64 MaxReadsInFly = 2; + + TMutex CacheMutex; + NCache::TLruCache<ui64, TIntrusivePtr<TCacheRecord>> Cache; + TMultiMap<ui64, TRead> ReadsForOffset; + TMap<ui64, TCachedReadCompletion*> CurrentReads; + ui64 ReadsInFly; + + void UpdateReads() { + auto nextIt = ReadsForOffset.begin(); + for (auto it = ReadsForOffset.begin(); it != ReadsForOffset.end(); it = nextIt) { + nextIt++; + TRead &read = it->second; + TIntrusivePtr<TCacheRecord> *found; + bool isFound = Cache.Find(read.Offset, found); + if (isFound) { + TCacheRecord &cached = *found->Get(); + if (read.Size <= cached.Size) { + memcpy(read.Data, cached.Data, read.Size); + Mon.DeviceReadCacheHits->Inc(); + Y_VERIFY(read.CompletionAction); + for (size_t i = 0; i < cached.BadOffsets.size(); ++i) { + read.CompletionAction->RegisterBadOffset(cached.BadOffsets[i]); + } + NoopAsyncHackForLogReader(read.CompletionAction, read.ReqId); + ReadsForOffset.erase(it); + } + } + } + if (ReadsInFly >= MaxReadsInFly) { + return; + } + + for (auto it = ReadsForOffset.begin(); it != ReadsForOffset.end(); it++) { + TRead &read = it->second; + auto currentIt = CurrentReads.find(read.Offset); + if (currentIt == CurrentReads.end()) { + TCachedReadCompletion *ptr = new TCachedReadCompletion(*this, read.Data, read.Size, read.Offset, + read.ReqId); + CurrentReads[read.Offset] = ptr; + ActorSystem->Send(PDiskActor, new TEvReadLogContinue(read.Data, read.Size, read.Offset, + ptr, read.ReqId)); + ReadsInFly++; + if (ReadsInFly >= MaxReadsInFly) { + return; + } + } + } + } + +public: + TCachedBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles, + ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions, + TIntrusivePtr<TSectorMap> sectorMap) + : TRealBlockDevice(path, pDiskId, mon, reorderingCycles, seekCostNs, deviceInFlight, flags, + maxQueuedCompletionActions, sectorMap) + , ReadsInFly(0) + {} + + void ExecRead(TCachedReadCompletion *completion, TActorSystem *actorSystem) { + TStackVec<TCompletionAction*, 32> pendingActions; + { + TGuard<TMutex> guard(CacheMutex); + ui64 offset = completion->GetOffset(); + auto currentReadIt = CurrentReads.find(offset); + Y_VERIFY(currentReadIt != CurrentReads.end()); + auto range = ReadsForOffset.equal_range(offset); + if (Cache.GetCount() >= MaxCount) { + Cache.Pop(); + } + TIntrusivePtr<TCacheRecord> cacheRecord(new TCacheRecord(completion)); + TIntrusivePtr<TCacheRecord> *junk; + Cache.Erase(offset); + bool isOk = Cache.Insert(offset, cacheRecord, junk); + Y_VERIFY(isOk); + auto nextIt = range.first; + for (auto it = range.first; it != range.second; it = nextIt) { + nextIt++; + TRead &read = it->second; + if (read.Size <= completion->GetSize()) { + if (read.Data != completion->GetData()) { + memcpy(read.Data, cacheRecord->Data, read.Size); + Mon.DeviceReadCacheHits->Inc(); + } else { + Mon.DeviceReadCacheMisses->Inc(); + } + Y_VERIFY(read.CompletionAction); + for (size_t i = 0; i < cacheRecord->BadOffsets.size(); ++i) { + read.CompletionAction->RegisterBadOffset(cacheRecord->BadOffsets[i]); + } + pendingActions.push_back(read.CompletionAction); + ReadsForOffset.erase(it); + } + } + delete currentReadIt->second; + CurrentReads.erase(currentReadIt); + ReadsInFly--; + UpdateReads(); + } + + for (size_t i = 0; i < pendingActions.size(); ++i) { + pendingActions[i]->Exec(actorSystem); + } + + { + TGuard<TMutex> guard(CacheMutex); + if (ReadsInFly == 0) { + ClearCache(); + } + } + } + + void ReleaseRead(TCachedReadCompletion *completion, TActorSystem *actorSystem) { + TGuard<TMutex> guard(CacheMutex); + Y_UNUSED(actorSystem); + auto it = CurrentReads.find(completion->GetOffset()); + Y_VERIFY(it != CurrentReads.end()); + delete it->second; + CurrentReads.erase(it); + ReadsInFly--; + } + + virtual ~TCachedBlockDevice() { + Stop(); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // cache related methods implementation + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + // Can be called from completion Exec + virtual void CachedPreadAsync(void *data, ui32 size, ui64 offset, TCompletionAction *completionAction, + TReqId reqId, NWilson::TTraceId *traceId) override { + TGuard<TMutex> guard(CacheMutex); + ReadsForOffset.emplace(offset, TRead(data, size, offset, completionAction, reqId, traceId)); + UpdateReads(); + } + + virtual void ClearCache() override { + TGuard<TMutex> guard(CacheMutex); + Cache.Clear(); + } + + void Stop() override { + TRealBlockDevice::Stop(); + for (auto it = CurrentReads.begin(); it != CurrentReads.end(); ++it) { + delete it->second; + } + CurrentReads.clear(); + for (auto it = ReadsForOffset.begin(); it != ReadsForOffset.end(); ++it) { + if (it->second.CompletionAction) { + it->second.CompletionAction->Release(ActorSystem); + } + } + ReadsForOffset.clear(); + } +}; + +IBlockDevice* CreateRealBlockDevice(const TString &path, ui32 pDiskId, TPDiskMon &mon, ui64 reorderingCycles, + ui64 seekCostNs, ui64 deviceInFlight, TDeviceMode::TFlags flags, ui32 maxQueuedCompletionActions, TIntrusivePtr<TSectorMap> sectorMap) { - return new TCachedBlockDevice(path, pDiskId, mon, reorderingCycles, seekCostNs, deviceInFlight, flags, + return new TCachedBlockDevice(path, pDiskId, mon, reorderingCycles, seekCostNs, deviceInFlight, flags, maxQueuedCompletionActions, sectorMap); } -IBlockDevice* CreateRealBlockDeviceWithDefaults(const TString &path, TPDiskMon &mon, TDeviceMode::TFlags flags, - TIntrusivePtr<TSectorMap> sectorMap, TActorSystem *actorSystem) { +IBlockDevice* CreateRealBlockDeviceWithDefaults(const TString &path, TPDiskMon &mon, TDeviceMode::TFlags flags, + TIntrusivePtr<TSectorMap> sectorMap, TActorSystem *actorSystem) { IBlockDevice *device = CreateRealBlockDevice(path, 0, mon, 0, 0, 4, flags, 8, sectorMap); - device->Initialize(actorSystem, {}); - return device; -} - + device->Initialize(actorSystem, {}); + return device; +} + } // NPDisk } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_ut.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_ut.cpp index c0bcbb3eb8..90816f5a22 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_ut.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_ut.cpp @@ -1,13 +1,13 @@ #include "defs.h" -#include "blobstorage_pdisk_blockdevice.h" +#include "blobstorage_pdisk_blockdevice.h" #include <ydb/library/pdisk_io/buffers.h> -#include "blobstorage_pdisk_actorsystem_creator.h" -#include "blobstorage_pdisk_mon.h" -#include "blobstorage_pdisk_ut_defs.h" +#include "blobstorage_pdisk_actorsystem_creator.h" +#include "blobstorage_pdisk_mon.h" +#include "blobstorage_pdisk_ut_defs.h" #include <ydb/core/control/immediate_control_board_wrapper.h> - + #include <library/cpp/testing/unittest/registar.h> #include <util/folder/dirut.h> #include <util/folder/tempdir.h> @@ -19,15 +19,15 @@ namespace NKikimr { -class TWriter : public NPDisk::TCompletionAction { +class TWriter : public NPDisk::TCompletionAction { NPDisk::IBlockDevice &Device; - NPDisk::TBuffer *Buffer; + NPDisk::TBuffer *Buffer; const i32 GenerationsToSpawn; TAtomic *Counter; public: - TWriter(NPDisk::IBlockDevice &device, NPDisk::TBuffer *data, const i32 generationsToSpawn, TAtomic *counter) + TWriter(NPDisk::IBlockDevice &device, NPDisk::TBuffer *data, const i32 generationsToSpawn, TAtomic *counter) : Device(device) - , Buffer(data) + , Buffer(data) , GenerationsToSpawn(generationsToSpawn) , Counter(counter) {} @@ -36,10 +36,10 @@ public: Y_UNUSED(actorSystem); AtomicIncrement(*Counter); if (GenerationsToSpawn > 0) { - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(Buffer->Data(), Buffer->Size()); - Device.PwriteAsync(Buffer->Data(), Buffer->Size(), 0, new TWriter(Device, Buffer, GenerationsToSpawn - 1, Counter), + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(Buffer->Data(), Buffer->Size()); + Device.PwriteAsync(Buffer->Data(), Buffer->Size(), 0, new TWriter(Device, Buffer, GenerationsToSpawn - 1, Counter), NPDisk::TReqId(NPDisk::TReqId::Test1, 0), {}); - Device.PwriteAsync(Buffer->Data(), Buffer->Size(), 0, new TWriter(Device, Buffer, GenerationsToSpawn - 1, Counter), + Device.PwriteAsync(Buffer->Data(), Buffer->Size(), 0, new TWriter(Device, Buffer, GenerationsToSpawn - 1, Counter), NPDisk::TReqId(NPDisk::TReqId::Test2, 0), {}); } delete this; @@ -51,7 +51,7 @@ public: } }; -class TFlusher : public NPDisk::TCompletionAction { +class TFlusher : public NPDisk::TCompletionAction { NPDisk::IBlockDevice &Device; const i32 GenerationsToSpawn; TAtomic *Counter; @@ -78,15 +78,15 @@ public: } }; -class TRabbit : public NPDisk::TCompletionAction { +class TRabbit : public NPDisk::TCompletionAction { NPDisk::IBlockDevice &Device; - NPDisk::TBuffer *Buffer; + NPDisk::TBuffer *Buffer; const i32 GenerationsToSpawn; TAtomic *Counter; public: - TRabbit(NPDisk::IBlockDevice &device, NPDisk::TBuffer *data, const i32 generationsToSpawn, TAtomic *counter) + TRabbit(NPDisk::IBlockDevice &device, NPDisk::TBuffer *data, const i32 generationsToSpawn, TAtomic *counter) : Device(device) - , Buffer(data) + , Buffer(data) , GenerationsToSpawn(generationsToSpawn) , Counter(counter) {} @@ -95,11 +95,11 @@ public: Y_UNUSED(actorSystem); AtomicIncrement(*Counter); if (GenerationsToSpawn > 0) { - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(Buffer->Data(), Buffer->Size()); - Device.PwriteAsync(Buffer->Data(), Buffer->Size(), 0, new TRabbit(Device, Buffer, GenerationsToSpawn - 1, Counter), + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(Buffer->Data(), Buffer->Size()); + Device.PwriteAsync(Buffer->Data(), Buffer->Size(), 0, new TRabbit(Device, Buffer, GenerationsToSpawn - 1, Counter), NPDisk::TReqId(NPDisk::TReqId::Test0, 0), {}); Device.FlushAsync(new TRabbit(Device, Buffer, GenerationsToSpawn - 1, Counter), NPDisk::TReqId(NPDisk::TReqId::Test1, 0)); - Device.PreadAsync(Buffer->Data(), Buffer->Size(), 0, new TRabbit(Device, Buffer, GenerationsToSpawn - 1, Counter), + Device.PreadAsync(Buffer->Data(), Buffer->Size(), 0, new TRabbit(Device, Buffer, GenerationsToSpawn - 1, Counter), NPDisk::TReqId(NPDisk::TReqId::Test3, 0), {}); } delete this; @@ -149,100 +149,100 @@ void WaitForValue(TAtomic *counter, TDuration maxDuration, TAtomicBase expectedV } } -void RunTestMultipleRequestsFromCompletionAction() { - const TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; +void RunTestMultipleRequestsFromCompletionAction() { + const TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; THolder<TPDiskMon> mon(new TPDiskMon(counters, 0, nullptr)); - const ui32 dataSize = 4 << 10; - const ui64 generations = 8; - TAtomic counter = 0; - - - TTempDir tempDir; - TString path = CreateFile(tempDir().c_str(), dataSize); - - { - TActorSystemCreator creator; - THolder<NPDisk::TBufferPool> bufferPool(NPDisk::CreateBufferPool(dataSize, 1, false, {})); - NPDisk::TBuffer::TPtr alignedBuffer(bufferPool->Pop()); - memset(alignedBuffer->Data(), 0, dataSize); - THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDevice(path, 0, *mon, 0, 0, 4, - NPDisk::TDeviceMode::LockFile, 2 << generations, nullptr)); - device->Initialize(creator.GetActorSystem(), {}); - - (new TWriter(*device, alignedBuffer.Get(), (i32)generations, &counter))->Exec(nullptr); - - TAtomicBase expectedCounter = 0; - for (ui64 i = 0; i <= generations; ++i) { - expectedCounter += 1ull << i; - } - WaitForValue(&counter, TIMEOUT, expectedCounter); - - TAtomicBase resultingCounter = AtomicGet(counter); - - UNIT_ASSERT_VALUES_EQUAL( - resultingCounter, - expectedCounter - ); - } - Ctest << "Done" << Endl; -} - -void RunTestDestructionWithMultipleFlushesFromCompletionAction() { - const TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; + const ui32 dataSize = 4 << 10; + const ui64 generations = 8; + TAtomic counter = 0; + + + TTempDir tempDir; + TString path = CreateFile(tempDir().c_str(), dataSize); + + { + TActorSystemCreator creator; + THolder<NPDisk::TBufferPool> bufferPool(NPDisk::CreateBufferPool(dataSize, 1, false, {})); + NPDisk::TBuffer::TPtr alignedBuffer(bufferPool->Pop()); + memset(alignedBuffer->Data(), 0, dataSize); + THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDevice(path, 0, *mon, 0, 0, 4, + NPDisk::TDeviceMode::LockFile, 2 << generations, nullptr)); + device->Initialize(creator.GetActorSystem(), {}); + + (new TWriter(*device, alignedBuffer.Get(), (i32)generations, &counter))->Exec(nullptr); + + TAtomicBase expectedCounter = 0; + for (ui64 i = 0; i <= generations; ++i) { + expectedCounter += 1ull << i; + } + WaitForValue(&counter, TIMEOUT, expectedCounter); + + TAtomicBase resultingCounter = AtomicGet(counter); + + UNIT_ASSERT_VALUES_EQUAL( + resultingCounter, + expectedCounter + ); + } + Ctest << "Done" << Endl; +} + +void RunTestDestructionWithMultipleFlushesFromCompletionAction() { + const TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; THolder<TPDiskMon> mon(new TPDiskMon(counters, 0, nullptr)); - const ui32 dataSize = 4 << 10; - const i32 generations = 8; - TAtomic counter = 0; - - TTempDir tempDir; - TString path = CreateFile(tempDir().c_str(), dataSize); - - TActorSystemCreator creator; - THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDevice(path, 0, *mon, 0, 0, 4, - NPDisk::TDeviceMode::LockFile, 2 << generations, nullptr)); - device->Initialize(creator.GetActorSystem(), {}); - - (new TFlusher(*device, generations, &counter))->Exec(nullptr); - device->Stop(); - for (int i = 0; i < 10000; ++i) { - (new TFlusher(*device, generations, &counter))->Exec(nullptr); + const ui32 dataSize = 4 << 10; + const i32 generations = 8; + TAtomic counter = 0; + + TTempDir tempDir; + TString path = CreateFile(tempDir().c_str(), dataSize); + + TActorSystemCreator creator; + THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDevice(path, 0, *mon, 0, 0, 4, + NPDisk::TDeviceMode::LockFile, 2 << generations, nullptr)); + device->Initialize(creator.GetActorSystem(), {}); + + (new TFlusher(*device, generations, &counter))->Exec(nullptr); + device->Stop(); + for (int i = 0; i < 10000; ++i) { + (new TFlusher(*device, generations, &counter))->Exec(nullptr); } - device.Destroy(); + device.Destroy(); - Ctest << "Done" << Endl; -} + Ctest << "Done" << Endl; +} Y_UNIT_TEST_SUITE(TBlockDeviceTest) { - Y_UNIT_TEST(TestMultipleRequestsFromCompletionAction) { - RunTestMultipleRequestsFromCompletionAction(); - } - - Y_UNIT_TEST(TestDestructionWithMultipleFlushesFromCompletionAction) { - RunTestDestructionWithMultipleFlushesFromCompletionAction(); - } - - Y_UNIT_TEST(TestDeviceWithSubmitGetThread) { - const TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; - THolder<TPDiskMon> mon(new TPDiskMon(counters, 0, nullptr)); - const ui32 fileSize = 4 << 20; - const ui32 dataSize = 4 << 10; - NPDisk::TAlignedData data(dataSize); - - TTempDir tempDir; - TString path = CreateFile(tempDir().c_str(), fileSize); - - TActorSystemCreator creator; - THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDeviceWithDefaults(path, *mon, - NPDisk::TDeviceMode::LockFile | NPDisk::TDeviceMode::UseSubmitGetThread, nullptr, creator.GetActorSystem())); - - device->PreadSync(data.Get(), data.Size(), 0, {}, nullptr); - device->PwriteSync(data.Get(), data.Size(), 0, {}, nullptr); - - device.Destroy(); - Ctest << "Done" << Endl; - } - + Y_UNIT_TEST(TestMultipleRequestsFromCompletionAction) { + RunTestMultipleRequestsFromCompletionAction(); + } + + Y_UNIT_TEST(TestDestructionWithMultipleFlushesFromCompletionAction) { + RunTestDestructionWithMultipleFlushesFromCompletionAction(); + } + + Y_UNIT_TEST(TestDeviceWithSubmitGetThread) { + const TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; + THolder<TPDiskMon> mon(new TPDiskMon(counters, 0, nullptr)); + const ui32 fileSize = 4 << 20; + const ui32 dataSize = 4 << 10; + NPDisk::TAlignedData data(dataSize); + + TTempDir tempDir; + TString path = CreateFile(tempDir().c_str(), fileSize); + + TActorSystemCreator creator; + THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDeviceWithDefaults(path, *mon, + NPDisk::TDeviceMode::LockFile | NPDisk::TDeviceMode::UseSubmitGetThread, nullptr, creator.GetActorSystem())); + + device->PreadSync(data.Get(), data.Size(), 0, {}, nullptr); + device->PwriteSync(data.Get(), data.Size(), 0, {}, nullptr); + + device.Destroy(); + Ctest << "Done" << Endl; + } + /* Y_UNIT_TEST(TestRabbitCompletionAction) { const TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; @@ -257,7 +257,7 @@ Y_UNIT_TEST_SUITE(TBlockDeviceTest) { NPDisk::TAlignedData alignedBuffer; alignedBuffer.Resize(dataSize); memset(alignedBuffer.Get(), 0, dataSize); - THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDevice(path, 0, *mon)); + THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDevice(path, 0, *mon)); device->Initialize(nullptr); (new TRabbit(*device, alignedBuffer, generations, &counter))->Exec(nullptr); @@ -278,7 +278,7 @@ Y_UNIT_TEST_SUITE(TBlockDeviceTest) { device.Destroy(); } - Ctest << "Done" << Endl; + Ctest << "Done" << Endl; } */ } diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_id_formatter.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_id_formatter.h index 3596212304..61cbf60b3d 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_id_formatter.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_id_formatter.h @@ -7,24 +7,24 @@ namespace NKikimr { namespace NPDisk { struct TChunkIdFormatter { - TChunkIdFormatter(IOutputStream& str) + TChunkIdFormatter(IOutputStream& str) : Str(str) , FirstOutput(true) , FirstChunkId(-1) , RunLen(0) {} - template <class T> - void PrintBracedChunksList(const T& chunkIds) { - Str << "{"; - for (ui32 idx : chunkIds) { - PrintChunk(idx); - } - Finish(); - Str << "}"; - } - - void PrintChunk(ui32 chunkId) { + template <class T> + void PrintBracedChunksList(const T& chunkIds) { + Str << "{"; + for (ui32 idx : chunkIds) { + PrintChunk(idx); + } + Finish(); + Str << "}"; + } + + void PrintChunk(ui32 chunkId) { if (chunkId == FirstChunkId + RunLen) { ++RunLen; } else { @@ -50,12 +50,12 @@ struct TChunkIdFormatter { Str << FirstChunkId; if (RunLen > 2) { Str << ".." << FirstChunkId + RunLen - 1; - } else if (RunLen == 2) { + } else if (RunLen == 2) { Str << ", " << FirstChunkId + 1; } } - IOutputStream& Str; + IOutputStream& Str; bool FirstOutput; ui32 FirstChunkId; ui32 RunLen; diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_tracker.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_tracker.h index 9eebd6909a..444830953c 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_tracker.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_tracker.h @@ -1,219 +1,219 @@ #pragma once #include "defs.h" -#include "blobstorage_pdisk_color_limits.h" +#include "blobstorage_pdisk_color_limits.h" #include "blobstorage_pdisk_data.h" -#include "blobstorage_pdisk_defs.h" +#include "blobstorage_pdisk_defs.h" #include "blobstorage_pdisk_keeper_params.h" -#include "blobstorage_pdisk_quota_record.h" -#include "blobstorage_pdisk_util_space_color.h" +#include "blobstorage_pdisk_quota_record.h" +#include "blobstorage_pdisk_util_space_color.h" -#include <util/generic/algorithm.h> -#include <util/generic/queue.h> +#include <util/generic/algorithm.h> +#include <util/generic/queue.h> namespace NKikimr { namespace NPDisk { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Chunk quota tracker. -// Part of the in-memory state. -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -class TPerOwnerQuotaTracker { - - TColorLimits ColorLimits; - i64 Total; - size_t ExpectedOwnerCount; // 0 means 'add and remove owners as you go' - - TStackVec<TOwner, 256> ActiveOwnerIds; // Can be accessed only from the main thread (changes only when owner is - // added or removed). - std::array<TQuotaRecord, 256> QuotaForOwner; // Always allocated, can be read from anywhere - static_assert(sizeof(TOwner) == 1, "Make sure to use large enough QuotaForOwner buffer"); - -public: - TPerOwnerQuotaTracker() { - TColorLimits limits; - Reset(0, limits); - } - - void Reset(i64 total, const TColorLimits &limits) { - ColorLimits = limits; - Total = total; - ExpectedOwnerCount = 0; - ActiveOwnerIds.clear(); - QuotaForOwner.fill(TQuotaRecord{}); - } - - // The following code is expected to behave OK only when you reduce expected owner count. - // Increasing expected owner count is fundamentally unfair and may cause instant jumps right into 0 free, - // overusers will keep their unfair share as a result. - void SetExpectedOwnerCount(size_t newOwnerCount) { - if (newOwnerCount != ExpectedOwnerCount) { - ExpectedOwnerCount = newOwnerCount; - RedistributeQuotas(); - } - } - - i64 ForceHardLimit(TOwner ownerId, i64 limit) { - Y_VERIFY(limit >= 0); - return QuotaForOwner[ownerId].ForceHardLimit(limit, ColorLimits); - } - - void RedistributeQuotas() { - size_t parts = Max(ExpectedOwnerCount, ActiveOwnerIds.size()); - if (parts) { - i64 limit = Total / parts; - - // Divide into equal parts and that's it. - for (TOwner id : ActiveOwnerIds) { - ForceHardLimit(id, limit); - } - } - } - - void AddOwner(TOwner id, TVDiskID vdiskId) { - TQuotaRecord &record = QuotaForOwner[id]; - Y_VERIFY(record.GetHardLimit() == 0); - Y_VERIFY(record.GetFree() == 0); - record.SetName(TStringBuilder() << "Owner# " << id); - record.SetVDiskId(vdiskId); - - ActiveOwnerIds.push_back(id); - if (ActiveOwnerIds.size() <= ExpectedOwnerCount || ExpectedOwnerCount == 0) { - RedistributeQuotas(); - } - } - - void RemoveOwner(TOwner id) { - bool isFound = false; - for (ui64 idx = 0; idx < ActiveOwnerIds.size(); ++idx) { - if (ActiveOwnerIds[idx] == id) { - ActiveOwnerIds[idx] = ActiveOwnerIds.back(); - ActiveOwnerIds.pop_back(); - isFound = true; - break; - } - } - Y_VERIFY(isFound); - ForceHardLimit(id, 0); - } - - i64 AddSystemOwner(TOwner id, i64 quota, TString name) { - TQuotaRecord &record = QuotaForOwner[id]; - Y_VERIFY(record.GetHardLimit() == 0); - Y_VERIFY(record.GetFree() == 0); - record.SetName(name); - i64 inc = ForceHardLimit(id, quota); - ActiveOwnerIds.push_back(id); - return inc; - } - - i64 GetHardLimit(TOwner id) const { - return QuotaForOwner[id].GetHardLimit(); - } - - i64 GetFree(TOwner id) const { - return QuotaForOwner[id].GetFree(); - } - - i64 GetUsed(TOwner id) const { - return QuotaForOwner[id].GetUsed(); - } - - // Tread-safe status flag getter - NKikimrBlobStorage::TPDiskSpaceColor::E EstimateSpaceColor(TOwner id, i64 allocationSize) const { - return QuotaForOwner[id].EstimateSpaceColor(allocationSize); - } - - bool TryAllocate(TOwner id, i64 count, TString &outErrorReason) { - return QuotaForOwner[id].TryAllocate(count, outErrorReason); - } - - bool ForceAllocate(TOwner id, i64 count) { - return QuotaForOwner[id].ForceAllocate(count); - } - - bool InitialAllocate(TOwner id, i64 count) { - Y_VERIFY(count >= 0); - return QuotaForOwner[id].ForceAllocate(count); - } - - void Release(TOwner id, i64 count) { - QuotaForOwner[id].Release(count); - } - - void PrintQuotaRow(IOutputStream &str, const TQuotaRecord& q) { - str << "<tr>"; - str << "<td>" << q.Name << "</td>"; - str << "<td>" << (q.VDiskId ? q.VDiskId->ToStringWOGeneration() : "") << "</td>"; - str << "<td>" << q.GetHardLimit() << "</td>"; - str << "<td>" << q.GetFree() << "</td>"; - str << "<td>" << q.GetUsed() << "</td>"; - str << "<td>" << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(q.EstimateSpaceColor(0)) << "</td>"; - - str << "<td>" << q.Cyan << "</td>"; - str << "<td>" << q.Yellow << "</td>"; - str << "<td>" << q.LightOrange << "</td>"; - str << "<td>" << q.Orange << "</td>"; - str << "<td>" << q.Red << "</td>"; - str << "<td>" << q.Black << "</td>"; - str << "</tr>"; - } - - void PrintHTML(IOutputStream &str, TQuotaRecord *sharedQuota, NKikimrBlobStorage::TPDiskSpaceColor::E *colorBorder) { - str << "<pre>"; - str << "ColorLimits#\n"; - ColorLimits.Print(str); - str << "\nTotal# " << Total; - str << "\nExpectedOwnerCount# " << ExpectedOwnerCount; - str << "\nActiveOwners# " << ActiveOwnerIds.size(); - if (colorBorder) { - str << "\nColorBorder# " << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(*colorBorder) << "\n"; - } - str << "</pre>"; - str << "<table class='table table-sortable tablesorter tablesorter-bootstrap table-bordered'>"; - str << R"_(<tr> - <th>Name</th> - <th>VDiskId</th> - <th>HardLimit</th> - <th>Free</th> - <th>Used</th> - <th>Color</th> - - <th>Cyan</th> - <th>Yellow</th> - <th>LightOrange</th> - <th>Orange</th> - <th>Red</th> - <th>Black</th> - </tr> - )_"; - if (sharedQuota) { - PrintQuotaRow(str, *sharedQuota); - } - for (TOwner id : ActiveOwnerIds) { - PrintQuotaRow(str, QuotaForOwner[id]); - } - str << "</table>"; - } -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Chunk quota tracker. +// Part of the in-memory state. +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +class TPerOwnerQuotaTracker { + + TColorLimits ColorLimits; + i64 Total; + size_t ExpectedOwnerCount; // 0 means 'add and remove owners as you go' + + TStackVec<TOwner, 256> ActiveOwnerIds; // Can be accessed only from the main thread (changes only when owner is + // added or removed). + std::array<TQuotaRecord, 256> QuotaForOwner; // Always allocated, can be read from anywhere + static_assert(sizeof(TOwner) == 1, "Make sure to use large enough QuotaForOwner buffer"); + +public: + TPerOwnerQuotaTracker() { + TColorLimits limits; + Reset(0, limits); + } + + void Reset(i64 total, const TColorLimits &limits) { + ColorLimits = limits; + Total = total; + ExpectedOwnerCount = 0; + ActiveOwnerIds.clear(); + QuotaForOwner.fill(TQuotaRecord{}); + } + + // The following code is expected to behave OK only when you reduce expected owner count. + // Increasing expected owner count is fundamentally unfair and may cause instant jumps right into 0 free, + // overusers will keep their unfair share as a result. + void SetExpectedOwnerCount(size_t newOwnerCount) { + if (newOwnerCount != ExpectedOwnerCount) { + ExpectedOwnerCount = newOwnerCount; + RedistributeQuotas(); + } + } + + i64 ForceHardLimit(TOwner ownerId, i64 limit) { + Y_VERIFY(limit >= 0); + return QuotaForOwner[ownerId].ForceHardLimit(limit, ColorLimits); + } + + void RedistributeQuotas() { + size_t parts = Max(ExpectedOwnerCount, ActiveOwnerIds.size()); + if (parts) { + i64 limit = Total / parts; + + // Divide into equal parts and that's it. + for (TOwner id : ActiveOwnerIds) { + ForceHardLimit(id, limit); + } + } + } + + void AddOwner(TOwner id, TVDiskID vdiskId) { + TQuotaRecord &record = QuotaForOwner[id]; + Y_VERIFY(record.GetHardLimit() == 0); + Y_VERIFY(record.GetFree() == 0); + record.SetName(TStringBuilder() << "Owner# " << id); + record.SetVDiskId(vdiskId); + + ActiveOwnerIds.push_back(id); + if (ActiveOwnerIds.size() <= ExpectedOwnerCount || ExpectedOwnerCount == 0) { + RedistributeQuotas(); + } + } + + void RemoveOwner(TOwner id) { + bool isFound = false; + for (ui64 idx = 0; idx < ActiveOwnerIds.size(); ++idx) { + if (ActiveOwnerIds[idx] == id) { + ActiveOwnerIds[idx] = ActiveOwnerIds.back(); + ActiveOwnerIds.pop_back(); + isFound = true; + break; + } + } + Y_VERIFY(isFound); + ForceHardLimit(id, 0); + } + + i64 AddSystemOwner(TOwner id, i64 quota, TString name) { + TQuotaRecord &record = QuotaForOwner[id]; + Y_VERIFY(record.GetHardLimit() == 0); + Y_VERIFY(record.GetFree() == 0); + record.SetName(name); + i64 inc = ForceHardLimit(id, quota); + ActiveOwnerIds.push_back(id); + return inc; + } + + i64 GetHardLimit(TOwner id) const { + return QuotaForOwner[id].GetHardLimit(); + } + + i64 GetFree(TOwner id) const { + return QuotaForOwner[id].GetFree(); + } + + i64 GetUsed(TOwner id) const { + return QuotaForOwner[id].GetUsed(); + } + + // Tread-safe status flag getter + NKikimrBlobStorage::TPDiskSpaceColor::E EstimateSpaceColor(TOwner id, i64 allocationSize) const { + return QuotaForOwner[id].EstimateSpaceColor(allocationSize); + } + + bool TryAllocate(TOwner id, i64 count, TString &outErrorReason) { + return QuotaForOwner[id].TryAllocate(count, outErrorReason); + } + + bool ForceAllocate(TOwner id, i64 count) { + return QuotaForOwner[id].ForceAllocate(count); + } + + bool InitialAllocate(TOwner id, i64 count) { + Y_VERIFY(count >= 0); + return QuotaForOwner[id].ForceAllocate(count); + } + + void Release(TOwner id, i64 count) { + QuotaForOwner[id].Release(count); + } + + void PrintQuotaRow(IOutputStream &str, const TQuotaRecord& q) { + str << "<tr>"; + str << "<td>" << q.Name << "</td>"; + str << "<td>" << (q.VDiskId ? q.VDiskId->ToStringWOGeneration() : "") << "</td>"; + str << "<td>" << q.GetHardLimit() << "</td>"; + str << "<td>" << q.GetFree() << "</td>"; + str << "<td>" << q.GetUsed() << "</td>"; + str << "<td>" << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(q.EstimateSpaceColor(0)) << "</td>"; + + str << "<td>" << q.Cyan << "</td>"; + str << "<td>" << q.Yellow << "</td>"; + str << "<td>" << q.LightOrange << "</td>"; + str << "<td>" << q.Orange << "</td>"; + str << "<td>" << q.Red << "</td>"; + str << "<td>" << q.Black << "</td>"; + str << "</tr>"; + } + + void PrintHTML(IOutputStream &str, TQuotaRecord *sharedQuota, NKikimrBlobStorage::TPDiskSpaceColor::E *colorBorder) { + str << "<pre>"; + str << "ColorLimits#\n"; + ColorLimits.Print(str); + str << "\nTotal# " << Total; + str << "\nExpectedOwnerCount# " << ExpectedOwnerCount; + str << "\nActiveOwners# " << ActiveOwnerIds.size(); + if (colorBorder) { + str << "\nColorBorder# " << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(*colorBorder) << "\n"; + } + str << "</pre>"; + str << "<table class='table table-sortable tablesorter tablesorter-bootstrap table-bordered'>"; + str << R"_(<tr> + <th>Name</th> + <th>VDiskId</th> + <th>HardLimit</th> + <th>Free</th> + <th>Used</th> + <th>Color</th> + + <th>Cyan</th> + <th>Yellow</th> + <th>LightOrange</th> + <th>Orange</th> + <th>Red</th> + <th>Black</th> + </tr> + )_"; + if (sharedQuota) { + PrintQuotaRow(str, *sharedQuota); + } + for (TOwner id : ActiveOwnerIds) { + PrintQuotaRow(str, QuotaForOwner[id]); + } + str << "</table>"; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Chunk tracker. // Part of the in-memory state. //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class TChunkTracker { - -using TColor = NKikimrBlobStorage::TPDiskSpaceColor; - - THolder<TPerOwnerQuotaTracker> GlobalQuota; - THolder<TQuotaRecord> SharedQuota; - THolder<TPerOwnerQuotaTracker> OwnerQuota; - TKeeperParams Params; - - TColor::E ColorBorder = NKikimrBlobStorage::TPDiskSpaceColor::GREEN; - + +using TColor = NKikimrBlobStorage::TPDiskSpaceColor; + + THolder<TPerOwnerQuotaTracker> GlobalQuota; + THolder<TQuotaRecord> SharedQuota; + THolder<TPerOwnerQuotaTracker> OwnerQuota; + TKeeperParams Params; + + TColor::E ColorBorder = NKikimrBlobStorage::TPDiskSpaceColor::GREEN; + public: // OwnerSystem - common log quota @@ -227,79 +227,79 @@ public: const i64 CommonStaticLogSize = 70; const i64 MinCommonLogSize = 200; - TChunkTracker() - : GlobalQuota(new TPerOwnerQuotaTracker()) - , SharedQuota(new TQuotaRecord()) - , OwnerQuota(new TPerOwnerQuotaTracker()) - {} + TChunkTracker() + : GlobalQuota(new TPerOwnerQuotaTracker()) + , SharedQuota(new TQuotaRecord()) + , OwnerQuota(new TPerOwnerQuotaTracker()) + {} bool Reset(const TKeeperParams ¶ms, TString &outErrorReason) { - Params = params; - + Params = params; + GlobalQuota->Reset(params.TotalChunks, TColorLimits::MakeLogLimits()); - i64 unappropriated = params.TotalChunks; - - unappropriated += GlobalQuota->AddSystemOwner(OwnerSystemLog, params.SysLogSize, "SysLog"); - if (unappropriated < 0) { + i64 unappropriated = params.TotalChunks; + + unappropriated += GlobalQuota->AddSystemOwner(OwnerSystemLog, params.SysLogSize, "SysLog"); + if (unappropriated < 0) { outErrorReason = (TStringBuilder() << "Error adding OwnerSystemLog quota, size# " << params.SysLogSize << " TotalChunks# " << params.TotalChunks); return false; } - - unappropriated += GlobalQuota->AddSystemOwner(OwnerSystemReserve, SysReserveSize, "System Reserve"); - if (unappropriated < 0) { + + unappropriated += GlobalQuota->AddSystemOwner(OwnerSystemReserve, SysReserveSize, "System Reserve"); + if (unappropriated < 0) { outErrorReason = (TStringBuilder() << "Error adding OwnerSystemReserve quota, size# " << SysReserveSize << " TotalChunks# " << params.TotalChunks); return false; } i64 staticLog = params.HasStaticGroups ? CommonStaticLogSize : 0; - unappropriated += GlobalQuota->AddSystemOwner(OwnerCommonStaticLog, staticLog, "Common Log Static Group Bonus"); - if (unappropriated < 0) { + unappropriated += GlobalQuota->AddSystemOwner(OwnerCommonStaticLog, staticLog, "Common Log Static Group Bonus"); + if (unappropriated < 0) { outErrorReason = (TStringBuilder() << "Error adding OwnerCommonStaticLog quota, size# " << staticLog << " TotalChunks# " << params.TotalChunks); return false; } - i64 commonLog = MinCommonLogSize; + i64 commonLog = MinCommonLogSize; if (commonLog + staticLog < params.CommonLogSize) { commonLog = params.CommonLogSize - staticLog; } - unappropriated += GlobalQuota->AddSystemOwner(OwnerSystem, commonLog, "Common Log"); - if (unappropriated < 0) { + unappropriated += GlobalQuota->AddSystemOwner(OwnerSystem, commonLog, "Common Log"); + if (unappropriated < 0) { outErrorReason = (TStringBuilder() << "Error adding OwnerSystem (common log) quota, size# " << commonLog << " TotalChunks# " << params.TotalChunks); return false; } i64 chunksOwned = 0; - for (auto& [ownerId, ownerInfo] : params.OwnersInfo) { - chunksOwned += ownerInfo.ChunksOwned; + for (auto& [ownerId, ownerInfo] : params.OwnersInfo) { + chunksOwned += ownerInfo.ChunksOwned; } - if (chunksOwned > unappropriated) { + if (chunksOwned > unappropriated) { outErrorReason = (TStringBuilder() << "Error adding OwnerBeginUser quota, chunksOwned#" << chunksOwned - << " unappropriated# " << unappropriated << " TotalChunks# " << params.TotalChunks); + << " unappropriated# " << unappropriated << " TotalChunks# " << params.TotalChunks); return false; } - unappropriated += GlobalQuota->AddSystemOwner(OwnerBeginUser, unappropriated, "Per Owner Chunk Pool"); - if (unappropriated < 0) { - outErrorReason = (TStringBuilder() << "Error adding OwnerBeginUser quota, size# " << unappropriated + unappropriated += GlobalQuota->AddSystemOwner(OwnerBeginUser, unappropriated, "Per Owner Chunk Pool"); + if (unappropriated < 0) { + outErrorReason = (TStringBuilder() << "Error adding OwnerBeginUser quota, size# " << unappropriated << " TotalChunks# " << params.TotalChunks); return false; } - SharedQuota->SetName("SharedQuota"); + SharedQuota->SetName("SharedQuota"); TColorLimits chunkLimits = TColorLimits::MakeChunkLimits(); - SharedQuota->ForceHardLimit(GlobalQuota->GetHardLimit(OwnerBeginUser), chunkLimits); + SharedQuota->ForceHardLimit(GlobalQuota->GetHardLimit(OwnerBeginUser), chunkLimits); OwnerQuota->Reset(GlobalQuota->GetHardLimit(OwnerBeginUser), chunkLimits); OwnerQuota->SetExpectedOwnerCount(params.ExpectedOwnerCount); - for (auto& [ownerId, ownerInfo] : params.OwnersInfo) { - i64 chunks = ownerInfo.ChunksOwned; - AddOwner(ownerId, ownerInfo.VDiskId); - if (chunks) { - OwnerQuota->InitialAllocate(ownerId, chunks); - bool isOk = SharedQuota->InitialAllocate(chunks); + for (auto& [ownerId, ownerInfo] : params.OwnersInfo) { + i64 chunks = ownerInfo.ChunksOwned; + AddOwner(ownerId, ownerInfo.VDiskId); + if (chunks) { + OwnerQuota->InitialAllocate(ownerId, chunks); + bool isOk = SharedQuota->InitialAllocate(chunks); if (!isOk) { return false; } @@ -307,19 +307,19 @@ public: } if (params.CommonLogSize) { - bool isOk = GlobalQuota->InitialAllocate(OwnerSystem, params.CommonLogSize); + bool isOk = GlobalQuota->InitialAllocate(OwnerSystem, params.CommonLogSize); if (!isOk) { return false; } } - ColorBorder = params.SpaceColorBorder; - return true; + ColorBorder = params.SpaceColorBorder; + return true; } - void AddOwner(TOwner owner, TVDiskID vdiskId) { + void AddOwner(TOwner owner, TVDiskID vdiskId) { Y_VERIFY(IsOwnerUser(owner)); - OwnerQuota->AddOwner(owner, vdiskId); + OwnerQuota->AddOwner(owner, vdiskId); } void RemoveOwner(TOwner owner) { @@ -327,7 +327,7 @@ public: OwnerQuota->RemoveOwner(owner); } - i64 GetOwnerHardLimit(TOwner owner) const { + i64 GetOwnerHardLimit(TOwner owner) const { if (IsOwnerUser(owner)) { return OwnerQuota->GetHardLimit(owner); } else { @@ -340,25 +340,25 @@ public: } } - i64 GetOwnerUsed(TOwner owner) const { - return OwnerQuota->GetUsed(owner); - } - - ///////////////////////////////////////////////////// - // for used space monitoring - i64 GetTotalUsed() const { - return SharedQuota->GetUsed(); - } - - i64 GetTotalHardLimit() const { - return SharedQuota->GetHardLimit(); - } - ///////////////////////////////////////////////////// - - i64 GetOwnerFree(TOwner owner) const { + i64 GetOwnerUsed(TOwner owner) const { + return OwnerQuota->GetUsed(owner); + } + + ///////////////////////////////////////////////////// + // for used space monitoring + i64 GetTotalUsed() const { + return SharedQuota->GetUsed(); + } + + i64 GetTotalHardLimit() const { + return SharedQuota->GetHardLimit(); + } + ///////////////////////////////////////////////////// + + i64 GetOwnerFree(TOwner owner) const { if (IsOwnerUser(owner)) { - // fix for CLOUDINC-1822: remove OwnerQuota->GetFree(owner) since it broke group balancing in Hive - return SharedQuota->GetFree(); + // fix for CLOUDINC-1822: remove OwnerQuota->GetFree(owner) since it broke group balancing in Hive + return SharedQuota->GetFree(); } else { if (owner == OwnerCommonStaticLog) { // Static groups use both common and bonus pools @@ -369,20 +369,20 @@ public: } } - TStatusFlags GetSpaceStatusFlags(TOwner owner) const { - return SpaceColorToStatusFlag(GetSpaceColor(owner)); + TStatusFlags GetSpaceStatusFlags(TOwner owner) const { + return SpaceColorToStatusFlag(GetSpaceColor(owner)); + } + + TColor::E GetSpaceColor(TOwner owner) const { + return EstimateSpaceColor(owner, 0); } - TColor::E GetSpaceColor(TOwner owner) const { - return EstimateSpaceColor(owner, 0); - } - // Estimate status flags after allocation of allocatinoSize - TColor::E EstimateSpaceColor(TOwner owner, i64 allocationSize) const { + TColor::E EstimateSpaceColor(TOwner owner, i64 allocationSize) const { if (IsOwnerUser(owner)) { - TColor::E ret = Min(ColorBorder, OwnerQuota->EstimateSpaceColor(owner, allocationSize)); - ret = Max(ret, SharedQuota->EstimateSpaceColor(allocationSize)); - return ret; + TColor::E ret = Min(ColorBorder, OwnerQuota->EstimateSpaceColor(owner, allocationSize)); + ret = Max(ret, SharedQuota->EstimateSpaceColor(allocationSize)); + return ret; } else { if (owner == OwnerCommonStaticLog) { if (GlobalQuota->GetHardLimit(OwnerCommonStaticLog) == 0) { @@ -390,14 +390,14 @@ public: owner = OwnerSystem; } } - return GlobalQuota->EstimateSpaceColor(owner, allocationSize); + return GlobalQuota->EstimateSpaceColor(owner, allocationSize); } } bool TryAllocate(TOwner owner, i64 count, TString &outErrorReason) { if (IsOwnerUser(owner)) { - OwnerQuota->ForceAllocate(owner, count); - return SharedQuota->TryAllocate(count, outErrorReason); + OwnerQuota->ForceAllocate(owner, count); + return SharedQuota->TryAllocate(count, outErrorReason); } else { if (owner == OwnerCommonStaticLog) { // Chunk allocation for static log (can use both common and bonus pools) @@ -418,7 +418,7 @@ public: void Release(TOwner owner, i64 count) { if (IsOwnerUser(owner)) { OwnerQuota->Release(owner, count); - SharedQuota->Release(count); + SharedQuota->Release(count); } else { if (owner == OwnerCommonStaticLog || owner == OwnerSystem) { // Chunk release for common log (fill bonus pool first, then fill the common pool) @@ -438,11 +438,11 @@ public: } } - void PrintHTML(IOutputStream &str) { - str << "<h4>GlobalQuota</h4>"; - GlobalQuota->PrintHTML(str, nullptr, nullptr); - str << "<h4>OwnerQuota</h4>"; - OwnerQuota->PrintHTML(str, SharedQuota.Get(), &ColorBorder); + void PrintHTML(IOutputStream &str) { + str << "<h4>GlobalQuota</h4>"; + GlobalQuota->PrintHTML(str, nullptr, nullptr); + str << "<h4>OwnerQuota</h4>"; + OwnerQuota->PrintHTML(str, SharedQuota.Get(), &ColorBorder); } }; diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_color_limits.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_color_limits.h index a16d8dd5ea..217818f439 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_color_limits.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_color_limits.h @@ -40,13 +40,13 @@ struct TColorLimits { i64 CyanAddend = 0; void Print(IOutputStream &str) { - str << " Black = Total * " << BlackMultiplier << " / " << BlackDivisor << " + " << BlackAddend << "\n"; - str << " Red = Total * " << RedMultiplier << " / " << RedDivisor << " + " << RedAddend << "\n"; - str << " Orange = Total * " << OrangeMultiplier << " / " << OrangeDivisor << " + " << OrangeAddend << "\n"; - str << " LightOrange = Total * " << LightOrangeMultiplier << " / " << LightOrangeDivisor << " + " << LightOrangeAddend << "\n"; - str << " Yellow = Total * " << YellowMultiplier << " / " << YellowDivisor << " + " << YellowAddend << "\n"; + str << " Black = Total * " << BlackMultiplier << " / " << BlackDivisor << " + " << BlackAddend << "\n"; + str << " Red = Total * " << RedMultiplier << " / " << RedDivisor << " + " << RedAddend << "\n"; + str << " Orange = Total * " << OrangeMultiplier << " / " << OrangeDivisor << " + " << OrangeAddend << "\n"; + str << " LightOrange = Total * " << LightOrangeMultiplier << " / " << LightOrangeDivisor << " + " << LightOrangeAddend << "\n"; + str << " Yellow = Total * " << YellowMultiplier << " / " << YellowDivisor << " + " << YellowAddend << "\n"; str << " LightYellow = Total * " << LightYellowMultiplier << " / " << LightYellowDivisor << " + " << LightYellowAddend << "\n"; - str << " Cyan = Total * " << CyanMultiplier << " / " << CyanDivisor << " + " << CyanAddend << "\n"; + str << " Cyan = Total * " << CyanMultiplier << " / " << CyanDivisor << " + " << CyanAddend << "\n"; } static TColorLimits MakeChunkLimits() { diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion.h index 65839490fc..148856c3d5 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion.h @@ -1,52 +1,52 @@ -#pragma once - +#pragma once + #include <ydb/library/pdisk_io/aio.h> #include <ydb/library/wilson/wilson_event.h> - -#include <util/system/hp_timer.h> -#include <util/generic/string.h> -#include <library/cpp/lwtrace/shuttle.h> - -namespace NKikimr::NPDisk { - -struct TCompletionAction { - ui64 OperationIdx; - NHPTimer::STime SubmitTime; - TCompletionAction *FlushAction = nullptr; - ui64 CostNs = 0; - NWilson::TTraceId TraceId; - EIoResult Result = EIoResult::Unknown; - TString ErrorReason; - - mutable NLWTrace::TOrbit Orbit; -protected: - TVector<ui64> BadOffsets; - -public: - void SetResult(const EIoResult result) { - Result = result; - if (FlushAction) { - FlushAction->SetResult(result); - } - } - - void SetErrorReason(const TString& errorReason) { - ErrorReason = errorReason; - if (FlushAction) { - FlushAction->SetErrorReason(errorReason); - } - } - - void RegisterBadOffset(ui64 offset) { - BadOffsets.push_back(offset); - } - - virtual bool CanHandleResult() const { - return Result == EIoResult::Ok; - } - virtual void Exec(TActorSystem *actorSystem) = 0; - virtual void Release(TActorSystem *) = 0; - virtual ~TCompletionAction() {} -}; - -} + +#include <util/system/hp_timer.h> +#include <util/generic/string.h> +#include <library/cpp/lwtrace/shuttle.h> + +namespace NKikimr::NPDisk { + +struct TCompletionAction { + ui64 OperationIdx; + NHPTimer::STime SubmitTime; + TCompletionAction *FlushAction = nullptr; + ui64 CostNs = 0; + NWilson::TTraceId TraceId; + EIoResult Result = EIoResult::Unknown; + TString ErrorReason; + + mutable NLWTrace::TOrbit Orbit; +protected: + TVector<ui64> BadOffsets; + +public: + void SetResult(const EIoResult result) { + Result = result; + if (FlushAction) { + FlushAction->SetResult(result); + } + } + + void SetErrorReason(const TString& errorReason) { + ErrorReason = errorReason; + if (FlushAction) { + FlushAction->SetErrorReason(errorReason); + } + } + + void RegisterBadOffset(ui64 offset) { + BadOffsets.push_back(offset); + } + + virtual bool CanHandleResult() const { + return Result == EIoResult::Ok; + } + virtual void Exec(TActorSystem *actorSystem) = 0; + virtual void Release(TActorSystem *) = 0; + virtual ~TCompletionAction() {} +}; + +} diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion_impl.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion_impl.cpp index 4862711b89..3843ca650c 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion_impl.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion_impl.cpp @@ -1,6 +1,6 @@ #include "blobstorage_pdisk_completion_impl.h" -#include "blobstorage_pdisk_impl.h" -#include "blobstorage_pdisk_sectorrestorator.h" +#include "blobstorage_pdisk_impl.h" +#include "blobstorage_pdisk_sectorrestorator.h" namespace NKikimr { namespace NPDisk { @@ -13,23 +13,23 @@ namespace NPDisk { // Log write completion action //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void TCompletionLogWrite::Exec(TActorSystem *actorSystem) { - bool isNewChunksCommited = false; - if (CommitedLogChunks) { - auto* req = PDisk->ReqCreator.CreateFromArgs<TCommitLogChunks>(std::move(CommitedLogChunks)); - PDisk->InputRequest(req); + bool isNewChunksCommited = false; + if (CommitedLogChunks) { + auto* req = PDisk->ReqCreator.CreateFromArgs<TCommitLogChunks>(std::move(CommitedLogChunks)); + PDisk->InputRequest(req); isNewChunksCommited = true; } for (auto it = Commits.begin(); it != Commits.end(); ++it) { TLogWrite *evLog = *it; Y_VERIFY(evLog); if (evLog->Result->Status == NKikimrProto::OK) { - TRequestBase *req = PDisk->ReqCreator.CreateFromArgs<TLogCommitDone>(*evLog); - PDisk->InputRequest(req); + TRequestBase *req = PDisk->ReqCreator.CreateFromArgs<TLogCommitDone>(*evLog); + PDisk->InputRequest(req); } } auto sendResponse = [&] (TLogWrite *evLog) { - Y_VERIFY_DEBUG(evLog->Result); + Y_VERIFY_DEBUG(evLog->Result); actorSystem->Send(evLog->Sender, evLog->Result.Release()); PDisk->Mon.WriteLog.CountResponse(); }; @@ -39,22 +39,22 @@ void TCompletionLogWrite::Exec(TActorSystem *actorSystem) { for (auto it = LogWriteQueue.begin(); it != LogWriteQueue.end(); ++it) { TLogWrite &evLog = *(*it); TLogWrite *&batch = batchMap[evLog.Owner]; - LOG_DEBUG_S(*actorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDisk->PDiskId - << " ReqId# " << evLog.ReqId.Id << " TEvLogResult Sender# " << evLog.Sender.LocalId() - << " Lsn# " << evLog.Lsn << " Latency# " << evLog.LifeDurationMs(now) - << " InputTime# " << HPMilliSeconds(evLog.InputTime - evLog.CreationTime) - << " ScheduleTime# " << HPMilliSeconds(evLog.ScheduleTime - evLog.InputTime) - << " DeviceTime# " << HPMilliSeconds(now - evLog.ScheduleTime) - << " Size# " << evLog.Data.size()); + LOG_DEBUG_S(*actorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDisk->PDiskId + << " ReqId# " << evLog.ReqId.Id << " TEvLogResult Sender# " << evLog.Sender.LocalId() + << " Lsn# " << evLog.Lsn << " Latency# " << evLog.LifeDurationMs(now) + << " InputTime# " << HPMilliSeconds(evLog.InputTime - evLog.CreationTime) + << " ScheduleTime# " << HPMilliSeconds(evLog.ScheduleTime - evLog.InputTime) + << " DeviceTime# " << HPMilliSeconds(now - evLog.ScheduleTime) + << " Size# " << evLog.Data.size()); LWTRACK(PDiskLogWriteComplete, evLog.Orbit, PDisk->PDiskId, evLog.ReqId.Id, HPSecondsFloat(evLog.CreationTime), double(evLog.Cost) / 1000000.0, HPMilliSecondsFloat(now - evLog.CreationTime), HPMilliSecondsFloat(evLog.InputTime - evLog.CreationTime), HPMilliSecondsFloat(evLog.ScheduleTime - evLog.InputTime), HPMilliSecondsFloat(now - evLog.ScheduleTime)); - if (evLog.Result->Results) { - evLog.Result->Results.front().Orbit = std::move(evLog.Orbit); - } + if (evLog.Result->Results) { + evLog.Result->Results.front().Orbit = std::move(evLog.Orbit); + } PDisk->Mon.LogResponseTime.Increment(evLog.LifeDurationMs(now)); @@ -92,19 +92,19 @@ void TCompletionLogWrite::Exec(TActorSystem *actorSystem) { } void TCompletionLogWrite::Release(TActorSystem *actorSystem) { - switch (Result) { - case EIoResult::Ok: - case EIoResult::Unknown: - break; - default: - for (TLogWrite *logWrite : LogWriteQueue) { - auto res = MakeHolder<TEvLogResult>(NKikimrProto::CORRUPTED, NKikimrBlobStorage::StatusIsValid, - ErrorReason); - actorSystem->Send(logWrite->Sender, res.Release()); - PDisk->Mon.WriteLog.CountResponse(); - } - } - + switch (Result) { + case EIoResult::Ok: + case EIoResult::Unknown: + break; + default: + for (TLogWrite *logWrite : LogWriteQueue) { + auto res = MakeHolder<TEvLogResult>(NKikimrProto::CORRUPTED, NKikimrBlobStorage::StatusIsValid, + ErrorReason); + actorSystem->Send(logWrite->Sender, res.Release()); + PDisk->Mon.WriteLog.CountResponse(); + } + } + delete this; } @@ -112,202 +112,202 @@ void TCompletionLogWrite::Release(TActorSystem *actorSystem) { // Chunk read completion actions //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -TCompletionChunkReadPart::TCompletionChunkReadPart(TPDisk *pDisk, TIntrusivePtr<TChunkRead> &read, ui64 rawReadSize, - ui64 payloadReadSize, ui64 commonBufferOffset, TCompletionChunkRead *cumulativeCompletion, bool isTheLastPart, - const TControlWrapper& useT1ha0Hasher) - : TCompletionAction() - , PDisk(pDisk) - , Read(read) - , RawReadSize(rawReadSize) - , PayloadReadSize(payloadReadSize) - , CommonBufferOffset(commonBufferOffset) - , CumulativeCompletion(cumulativeCompletion) - , Buffer(PDisk->BufferPool->Pop()) - , IsTheLastPart(isTheLastPart) - , UseT1ha0Hasher(useT1ha0Hasher) -{ - if (!IsTheLastPart) { - CumulativeCompletion->AddPart(); - } -} - +TCompletionChunkReadPart::TCompletionChunkReadPart(TPDisk *pDisk, TIntrusivePtr<TChunkRead> &read, ui64 rawReadSize, + ui64 payloadReadSize, ui64 commonBufferOffset, TCompletionChunkRead *cumulativeCompletion, bool isTheLastPart, + const TControlWrapper& useT1ha0Hasher) + : TCompletionAction() + , PDisk(pDisk) + , Read(read) + , RawReadSize(rawReadSize) + , PayloadReadSize(payloadReadSize) + , CommonBufferOffset(commonBufferOffset) + , CumulativeCompletion(cumulativeCompletion) + , Buffer(PDisk->BufferPool->Pop()) + , IsTheLastPart(isTheLastPart) + , UseT1ha0Hasher(useT1ha0Hasher) +{ + if (!IsTheLastPart) { + CumulativeCompletion->AddPart(); + } +} + TCompletionChunkReadPart::~TCompletionChunkReadPart() { if (CumulativeCompletion) { CumulativeCompletion->PartDeleted(PDisk->ActorSystem); } - AtomicSub(PDisk->InFlightChunkRead, RawReadSize); + AtomicSub(PDisk->InFlightChunkRead, RawReadSize); } -TBuffer *TCompletionChunkReadPart::GetBuffer() { - return Buffer.Get(); +TBuffer *TCompletionChunkReadPart::GetBuffer() { + return Buffer.Get(); } -void TCompletionChunkReadPart::Exec(TActorSystem *actorSystem) { - Y_VERIFY(actorSystem); - Y_VERIFY(CumulativeCompletion); - if (TCompletionAction::Result != EIoResult::Ok) { - Release(actorSystem); - return; - } - +void TCompletionChunkReadPart::Exec(TActorSystem *actorSystem) { + Y_VERIFY(actorSystem); + Y_VERIFY(CumulativeCompletion); + if (TCompletionAction::Result != EIoResult::Ok) { + Release(actorSystem); + return; + } + const TDiskFormat &format = PDisk->Format; ui64 firstSector; ui64 lastSector; ui64 sectorOffset; - bool isOk = ParseSectorOffset(PDisk->Format, actorSystem, PDisk->PDiskId, - Read->Offset + CommonBufferOffset, PayloadReadSize, firstSector, lastSector, sectorOffset); - Y_VERIFY(isOk); - - TBufferWithGaps *commonBuffer = CumulativeCompletion->GetCommonBuffer(); - ui8 *destination = commonBuffer->RawDataPtr(CommonBufferOffset, PayloadReadSize); - - ui8* source = Buffer->Data(); - - TPDiskStreamCypher cypher(PDisk->Cfg->EnableSectorEncryption); + bool isOk = ParseSectorOffset(PDisk->Format, actorSystem, PDisk->PDiskId, + Read->Offset + CommonBufferOffset, PayloadReadSize, firstSector, lastSector, sectorOffset); + Y_VERIFY(isOk); + + TBufferWithGaps *commonBuffer = CumulativeCompletion->GetCommonBuffer(); + ui8 *destination = commonBuffer->RawDataPtr(CommonBufferOffset, PayloadReadSize); + + ui8* source = Buffer->Data(); + + TPDiskStreamCypher cypher(PDisk->Cfg->EnableSectorEncryption); cypher.SetKey(format.ChunkKey); - ui64 sectorIdx = firstSector; + ui64 sectorIdx = firstSector; - ui32 sectorPayloadSize; - if (CommonBufferOffset == 0) { // First part - sectorPayloadSize = Min(format.SectorPayloadSize() - sectorOffset, PayloadReadSize); - } else { // Middle and last parts - sectorPayloadSize = Min(format.SectorPayloadSize(), PayloadReadSize); - sectorOffset = 0; - } + ui32 sectorPayloadSize; + if (CommonBufferOffset == 0) { // First part + sectorPayloadSize = Min(format.SectorPayloadSize() - sectorOffset, PayloadReadSize); + } else { // Middle and last parts + sectorPayloadSize = Min(format.SectorPayloadSize(), PayloadReadSize); + sectorOffset = 0; + } ui64 chunkNonce = CumulativeCompletion->GetChunkNonce(); ui32 beginBadUserOffset = 0xffffffff; ui32 endBadUserOffset = 0xffffffff; ui32 userSectorSize = format.SectorPayloadSize(); - while (PayloadReadSize > 0) { - ui32 beginUserOffset = sectorIdx * userSectorSize; + while (PayloadReadSize > 0) { + ui32 beginUserOffset = sectorIdx * userSectorSize; - TSectorRestorator restorator(false, 1, false, - format, actorSystem, PDisk->PDiskActor, PDisk->PDiskId, &PDisk->Mon, PDisk->BufferPool.Get()); + TSectorRestorator restorator(false, 1, false, + format, actorSystem, PDisk->PDiskActor, PDisk->PDiskId, &PDisk->Mon, PDisk->BufferPool.Get()); ui64 lastNonce = Min((ui64)0, chunkNonce - 1); - restorator.Restore(source, format.Offset(Read->ChunkIdx, sectorIdx), format.MagicDataChunk, lastNonce, - UseT1ha0Hasher); + restorator.Restore(source, format.Offset(Read->ChunkIdx, sectorIdx), format.MagicDataChunk, lastNonce, + UseT1ha0Hasher); - const ui32 sectorCount = 1; + const ui32 sectorCount = 1; if (restorator.GoodSectorCount != sectorCount) { if (beginBadUserOffset == 0xffffffff) { beginBadUserOffset = beginUserOffset; } - endBadUserOffset = beginUserOffset + userSectorSize; + endBadUserOffset = beginUserOffset + userSectorSize; } else { if (beginBadUserOffset != 0xffffffff) { LOG_INFO_S(*actorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDisk->PDiskId - << " ReqId# " << Read->ReqId - << " Can't read chunk chunkIdx# " << Read->ChunkIdx - << " beginBadUserOffet# " << beginBadUserOffset << " endBadUserOffset# " << endBadUserOffset - << " due to multiple sectors with incorrect hashes."); - commonBuffer->AddGap(beginBadUserOffset, endBadUserOffset); + << " ReqId# " << Read->ReqId + << " Can't read chunk chunkIdx# " << Read->ChunkIdx + << " beginBadUserOffet# " << beginBadUserOffset << " endBadUserOffset# " << endBadUserOffset + << " due to multiple sectors with incorrect hashes."); + commonBuffer->AddGap(beginBadUserOffset, endBadUserOffset); beginBadUserOffset = 0xffffffff; endBadUserOffset = 0xffffffff; } } - Y_VERIFY(sectorIdx >= firstSector); - - // Decrypt data - if (beginBadUserOffset != 0xffffffff) { - memset(destination, 0, sectorPayloadSize); - } else { - TDataSectorFooter *footer = (TDataSectorFooter*) (source + format.SectorSize - sizeof(TDataSectorFooter)); - if (footer->Nonce != chunkNonce + sectorIdx) { - ui32 userOffset = sectorIdx * userSectorSize; - LOG_INFO_S(*actorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDisk->PDiskId - << " ReqId# " << Read->ReqId - << " Can't read chunk chunkIdx# " << Read->ChunkIdx - << " nonce mismatch: expected# " << (ui64)(chunkNonce + sectorIdx) - << ", on-disk# " << (ui64)footer->Nonce << " for userOffset# " << userOffset << " !"); - if (beginBadUserOffset == 0xffffffff) { - beginBadUserOffset = userOffset; - } - endBadUserOffset = beginUserOffset + userSectorSize; - memset(destination, 0, sectorPayloadSize); - } else { - cypher.StartMessage(footer->Nonce); - if (sectorOffset > 0 || intptr_t(destination) % 32) { - cypher.InplaceEncrypt(source, sectorOffset + sectorPayloadSize); - if (CommonBufferOffset == 0 || !IsTheLastPart) { - memcpy(destination, source + sectorOffset, sectorPayloadSize); - } else { - memcpy(destination, source, sectorPayloadSize); + Y_VERIFY(sectorIdx >= firstSector); + + // Decrypt data + if (beginBadUserOffset != 0xffffffff) { + memset(destination, 0, sectorPayloadSize); + } else { + TDataSectorFooter *footer = (TDataSectorFooter*) (source + format.SectorSize - sizeof(TDataSectorFooter)); + if (footer->Nonce != chunkNonce + sectorIdx) { + ui32 userOffset = sectorIdx * userSectorSize; + LOG_INFO_S(*actorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDisk->PDiskId + << " ReqId# " << Read->ReqId + << " Can't read chunk chunkIdx# " << Read->ChunkIdx + << " nonce mismatch: expected# " << (ui64)(chunkNonce + sectorIdx) + << ", on-disk# " << (ui64)footer->Nonce << " for userOffset# " << userOffset << " !"); + if (beginBadUserOffset == 0xffffffff) { + beginBadUserOffset = userOffset; + } + endBadUserOffset = beginUserOffset + userSectorSize; + memset(destination, 0, sectorPayloadSize); + } else { + cypher.StartMessage(footer->Nonce); + if (sectorOffset > 0 || intptr_t(destination) % 32) { + cypher.InplaceEncrypt(source, sectorOffset + sectorPayloadSize); + if (CommonBufferOffset == 0 || !IsTheLastPart) { + memcpy(destination, source + sectorOffset, sectorPayloadSize); + } else { + memcpy(destination, source, sectorPayloadSize); } } else { - cypher.Encrypt(destination, source, sectorPayloadSize); + cypher.Encrypt(destination, source, sectorPayloadSize); + } + if (CanarySize > 0) { + ui32 canaryPosition = sectorOffset + sectorPayloadSize; + ui32 sizeToEncrypt = format.SectorSize - canaryPosition - ui32(sizeof(TDataSectorFooter)); + cypher.InplaceEncrypt(source + canaryPosition, sizeToEncrypt); + PDisk->CheckLogCanary(source, Read->ChunkIdx, sectorIdx); } - if (CanarySize > 0) { - ui32 canaryPosition = sectorOffset + sectorPayloadSize; - ui32 sizeToEncrypt = format.SectorSize - canaryPosition - ui32(sizeof(TDataSectorFooter)); - cypher.InplaceEncrypt(source + canaryPosition, sizeToEncrypt); - PDisk->CheckLogCanary(source, Read->ChunkIdx, sectorIdx); - } } } - destination += sectorPayloadSize; - source += format.SectorSize; - PayloadReadSize -= sectorPayloadSize; - sectorPayloadSize = Min(format.SectorPayloadSize(), PayloadReadSize); - sectorOffset = 0; - ++sectorIdx; + destination += sectorPayloadSize; + source += format.SectorSize; + PayloadReadSize -= sectorPayloadSize; + sectorPayloadSize = Min(format.SectorPayloadSize(), PayloadReadSize); + sectorOffset = 0; + ++sectorIdx; } if (beginBadUserOffset != 0xffffffff) { - LOG_INFO_S(*actorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDisk->PDiskId - << " ReqId# " << Read->ReqId - << " Can't read chunk chunkIdx# " << Read->ChunkIdx - << " beginBadUserOffet# " << beginBadUserOffset << " endBadUserOffset# " << endBadUserOffset - << " due to multiple sectors with incorrect hashes/nonces."); - commonBuffer->AddGap(beginBadUserOffset, endBadUserOffset); + LOG_INFO_S(*actorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDisk->PDiskId + << " ReqId# " << Read->ReqId + << " Can't read chunk chunkIdx# " << Read->ChunkIdx + << " beginBadUserOffet# " << beginBadUserOffset << " endBadUserOffset# " << endBadUserOffset + << " due to multiple sectors with incorrect hashes/nonces."); + commonBuffer->AddGap(beginBadUserOffset, endBadUserOffset); beginBadUserOffset = 0xffffffff; endBadUserOffset = 0xffffffff; } - - LWTRACK(PDiskChunkReadPieceComplete, Read->Orbit, PDisk->PDiskId, RawReadSize, CommonBufferOffset); - CumulativeCompletion->PartReadComplete(actorSystem); - CumulativeCompletion = nullptr; - - AtomicSub(PDisk->InFlightChunkRead, RawReadSize); - RawReadSize = 0; - delete this; -} - -void TCompletionChunkReadPart::Release(TActorSystem *actorSystem) { - if (CumulativeCompletion) { - CumulativeCompletion->PartDeleted(actorSystem); - CumulativeCompletion = nullptr; - } - AtomicSub(PDisk->InFlightChunkRead, RawReadSize); - RawReadSize = 0; - delete this; -} - -TCompletionChunkRead::~TCompletionChunkRead() { - OnDestroy(); - Y_VERIFY(CommonBuffer.Empty()); - Y_VERIFY(DoubleFreeCanary == ReferenceCanary, "DoubleFreeCanary in TCompletionChunkRead is dead!"); - // Set DoubleFreeCanary to 0 and make sure compiler will not eliminate that action - SecureWipeBuffer((ui8*)&DoubleFreeCanary, sizeof(DoubleFreeCanary)); -} - -void TCompletionChunkRead::Exec(TActorSystem *actorSystem) { - THolder<TEvChunkReadResult> result = MakeHolder<TEvChunkReadResult>(NKikimrProto::OK, + + LWTRACK(PDiskChunkReadPieceComplete, Read->Orbit, PDisk->PDiskId, RawReadSize, CommonBufferOffset); + CumulativeCompletion->PartReadComplete(actorSystem); + CumulativeCompletion = nullptr; + + AtomicSub(PDisk->InFlightChunkRead, RawReadSize); + RawReadSize = 0; + delete this; +} + +void TCompletionChunkReadPart::Release(TActorSystem *actorSystem) { + if (CumulativeCompletion) { + CumulativeCompletion->PartDeleted(actorSystem); + CumulativeCompletion = nullptr; + } + AtomicSub(PDisk->InFlightChunkRead, RawReadSize); + RawReadSize = 0; + delete this; +} + +TCompletionChunkRead::~TCompletionChunkRead() { + OnDestroy(); + Y_VERIFY(CommonBuffer.Empty()); + Y_VERIFY(DoubleFreeCanary == ReferenceCanary, "DoubleFreeCanary in TCompletionChunkRead is dead!"); + // Set DoubleFreeCanary to 0 and make sure compiler will not eliminate that action + SecureWipeBuffer((ui8*)&DoubleFreeCanary, sizeof(DoubleFreeCanary)); +} + +void TCompletionChunkRead::Exec(TActorSystem *actorSystem) { + THolder<TEvChunkReadResult> result = MakeHolder<TEvChunkReadResult>(NKikimrProto::OK, Read->ChunkIdx, Read->Offset, Read->Cookie, PDisk->GetStatusFlags(Read->Owner, Read->OwnerGroupType), ""); - result->Data = std::move(CommonBuffer); - CommonBuffer.Clear(); - Y_VERIFY(result->Data.IsDetached()); - - result->Data.Commit(); - + result->Data = std::move(CommonBuffer); + CommonBuffer.Clear(); + Y_VERIFY(result->Data.IsDetached()); + + result->Data.Commit(); + Y_VERIFY(Read); - LOG_DEBUG_S(*actorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDisk->PDiskId << " ReqId# " << Read->ReqId.Id - << " " << result->ToString() << " To# " << Read->Sender.LocalId()); + LOG_DEBUG_S(*actorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDisk->PDiskId << " ReqId# " << Read->ReqId.Id + << " " << result->ToString() << " To# " << Read->Sender.LocalId()); - double responseTimeMs = HPMilliSecondsFloat(HPNow() - Read->CreationTime); - PDisk->Mon.IncrementResponseTime(Read->PriorityClass, responseTimeMs, Read->Size); + double responseTimeMs = HPMilliSecondsFloat(HPNow() - Read->CreationTime); + PDisk->Mon.IncrementResponseTime(Read->PriorityClass, responseTimeMs, Read->Size); LWTRACK(PDiskChunkResponseTime, Read->Orbit, PDisk->PDiskId, Read->ReqId.Id, Read->PriorityClass, responseTimeMs, Read->Size); @@ -317,36 +317,36 @@ void TCompletionChunkRead::Exec(TActorSystem *actorSystem) { delete this; } -void TCompletionChunkRead::ReplyError(TActorSystem *actorSystem, TString reason) { - Y_VERIFY(!Read->IsReplied); - CommonBuffer.Clear(); - - TStringStream error; - error << "PDiskId# " << PDisk->PDiskId << " ReqId# " << Read->ReqId << " reason# " << reason; - auto result = MakeHolder<TEvChunkReadResult>(NKikimrProto::CORRUPTED, - Read->ChunkIdx, Read->Offset, Read->Cookie, - PDisk->GetStatusFlags(Read->Owner, Read->OwnerGroupType), error.Str()); - LOG_WARN_S(*actorSystem, NKikimrServices::BS_PDISK, error.Str()); - actorSystem->Send(Read->Sender, result.Release()); - Read->IsReplied = true; -} - -// Returns true if there is some pending requests to wait -bool TCompletionChunkRead::PartReadComplete(TActorSystem *actorSystem) { - TAtomicBase partsPending = AtomicDecrement(PartsPending); - if (partsPending == 0) { - if (AtomicGet(Deletes) == 0) { - Exec(actorSystem); - } else { - ReplyError(actorSystem, "One of ChunkReadPart failed due to unknown reason"); - delete this; - } - return true; - } else { - return false; - } -} - +void TCompletionChunkRead::ReplyError(TActorSystem *actorSystem, TString reason) { + Y_VERIFY(!Read->IsReplied); + CommonBuffer.Clear(); + + TStringStream error; + error << "PDiskId# " << PDisk->PDiskId << " ReqId# " << Read->ReqId << " reason# " << reason; + auto result = MakeHolder<TEvChunkReadResult>(NKikimrProto::CORRUPTED, + Read->ChunkIdx, Read->Offset, Read->Cookie, + PDisk->GetStatusFlags(Read->Owner, Read->OwnerGroupType), error.Str()); + LOG_WARN_S(*actorSystem, NKikimrServices::BS_PDISK, error.Str()); + actorSystem->Send(Read->Sender, result.Release()); + Read->IsReplied = true; +} + +// Returns true if there is some pending requests to wait +bool TCompletionChunkRead::PartReadComplete(TActorSystem *actorSystem) { + TAtomicBase partsPending = AtomicDecrement(PartsPending); + if (partsPending == 0) { + if (AtomicGet(Deletes) == 0) { + Exec(actorSystem); + } else { + ReplyError(actorSystem, "One of ChunkReadPart failed due to unknown reason"); + delete this; + } + return true; + } else { + return false; + } +} + void TCompletionEventSender::Exec(TActorSystem *actorSystem) { if (actorSystem) { if (Event) { @@ -358,27 +358,27 @@ void TCompletionEventSender::Exec(TActorSystem *actorSystem) { if (Event) { actorSystem->Send(Recipient, Event.Release()); } - if (Req) { - PDisk->InputRequest(Req.Release()); - } + if (Req) { + PDisk->InputRequest(Req.Release()); + } if (Counter) { Counter->Inc(); } delete this; } -void TChunkTrimCompletion::Exec(TActorSystem *actorSystem) { - double responseTimeMs = HPMilliSecondsFloat(HPNow() - StartTime); - LOG_DEBUG_S(*actorSystem, NKikimrServices::BS_PDISK, - "PDiskId# " << PDisk->PDiskId << " ReqId# " << ReqId - << " TChunkTrimCompletion timeMs# " - << ui64(responseTimeMs) << " sizeBytes# " << SizeBytes); - LWPROBE(PDiskTrimResponseTime, PDisk->PDiskId, ReqId.Id, responseTimeMs, SizeBytes); - PDisk->Mon.Trim.CountResponse(); - TTryTrimChunk *tryTrim = PDisk->ReqCreator.CreateFromArgs<TTryTrimChunk>(SizeBytes); - PDisk->InputRequest(tryTrim); - delete this; -} +void TChunkTrimCompletion::Exec(TActorSystem *actorSystem) { + double responseTimeMs = HPMilliSecondsFloat(HPNow() - StartTime); + LOG_DEBUG_S(*actorSystem, NKikimrServices::BS_PDISK, + "PDiskId# " << PDisk->PDiskId << " ReqId# " << ReqId + << " TChunkTrimCompletion timeMs# " + << ui64(responseTimeMs) << " sizeBytes# " << SizeBytes); + LWPROBE(PDiskTrimResponseTime, PDisk->PDiskId, ReqId.Id, responseTimeMs, SizeBytes); + PDisk->Mon.Trim.CountResponse(); + TTryTrimChunk *tryTrim = PDisk->ReqCreator.CreateFromArgs<TTryTrimChunk>(SizeBytes); + PDisk->InputRequest(tryTrim); + delete this; +} } // NPDisk } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion_impl.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion_impl.h index ca069592fd..2baba160c5 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion_impl.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion_impl.h @@ -3,11 +3,11 @@ #include "blobstorage_pdisk.h" #include "blobstorage_pdisk_mon.h" -#include "blobstorage_pdisk_requestimpl.h" -#include "blobstorage_pdisk_util_signal_event.h" - +#include "blobstorage_pdisk_requestimpl.h" +#include "blobstorage_pdisk_util_signal_event.h" + #include <ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.h> - + #include <library/cpp/containers/stack_vector/stack_vec.h> namespace NKikimr { @@ -18,19 +18,19 @@ LWTRACE_USING(BLOBSTORAGE_PROVIDER); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Completion actions //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -class TRequestBase; +class TRequestBase; + +class TPDisk; -class TPDisk; - class TCompletionEventSender : public TCompletionAction { TPDisk *PDisk; const TActorId Recipient; THolder<IEventBase> Event; NMonitoring::TDynamicCounters::TCounterPtr Counter; - + public: - THolder<TRequestBase> Req; - + THolder<TRequestBase> Req; + TCompletionEventSender(TPDisk *pDisk, const TActorId &recipient, IEventBase *event, NMonitoring::TDynamicCounters::TCounterPtr &counter) : PDisk(pDisk) @@ -46,12 +46,12 @@ public: , Counter(nullptr) {} - TCompletionEventSender(TPDisk *pDisk, THolder<TRequestBase> req) - : PDisk(pDisk) - , Counter(nullptr) - , Req(std::move(req)) - {} - + TCompletionEventSender(TPDisk *pDisk, THolder<TRequestBase> req) + : PDisk(pDisk) + , Counter(nullptr) + , Req(std::move(req)) + {} + TCompletionEventSender(TPDisk *pDisk) : PDisk(pDisk) , Counter(nullptr) @@ -65,21 +65,21 @@ public: } }; -class TCompletionChunkWrite : public TCompletionAction { +class TCompletionChunkWrite : public TCompletionAction { const TActorId Recipient; - THolder<TEvChunkWriteResult> Event; + THolder<TEvChunkWriteResult> Event; TPDiskMon *Mon; ui32 PDiskId; NHPTimer::STime StartTime; size_t SizeBytes; ui8 PriorityClass; - std::function<void()> OnDestroy; + std::function<void()> OnDestroy; TReqId ReqId; - + public: - TCompletionChunkWrite(const TActorId &recipient, TEvChunkWriteResult *event, - TPDiskMon *mon, ui32 pdiskId, NHPTimer::STime startTime, size_t sizeBytes, - ui8 priorityClass, std::function<void()> onDestroy, TReqId reqId) + TCompletionChunkWrite(const TActorId &recipient, TEvChunkWriteResult *event, + TPDiskMon *mon, ui32 pdiskId, NHPTimer::STime startTime, size_t sizeBytes, + ui8 priorityClass, std::function<void()> onDestroy, TReqId reqId) : Recipient(recipient) , Event(event) , Mon(mon) @@ -87,26 +87,26 @@ public: , StartTime(startTime) , SizeBytes(sizeBytes) , PriorityClass(priorityClass) - , OnDestroy(std::move(onDestroy)) + , OnDestroy(std::move(onDestroy)) , ReqId(reqId) - { - } + { + } - ~TCompletionChunkWrite() { - OnDestroy(); - } + ~TCompletionChunkWrite() { + OnDestroy(); + } void Exec(TActorSystem *actorSystem) override { double responseTimeMs = HPMilliSecondsFloat(HPNow() - StartTime); LOG_DEBUG_S(*actorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId << " ReqId# " << ReqId - << "TCompletionChunkWrite " << Event->ToString().data() + << "TCompletionChunkWrite " << Event->ToString().data() << " PriorityClass# " << (ui32)PriorityClass << " timeMs# " << ui64(responseTimeMs) << " sizeBytes# " << SizeBytes); if (Mon) { - Mon->IncrementResponseTime(PriorityClass, responseTimeMs, SizeBytes); + Mon->IncrementResponseTime(PriorityClass, responseTimeMs, SizeBytes); } - LWTRACK(PDiskChunkResponseTime, Orbit, PDiskId, ReqId.Id, PriorityClass, responseTimeMs, SizeBytes); + LWTRACK(PDiskChunkResponseTime, Orbit, PDiskId, ReqId.Id, PriorityClass, responseTimeMs, SizeBytes); actorSystem->Send(Recipient, Event.Release()); if (Mon) { Mon->GetWriteCounter(PriorityClass)->CountResponse(); @@ -115,9 +115,9 @@ public: } void Release(TActorSystem *actorSystem) override { - Event->Status = NKikimrProto::CORRUPTED; - Event->ErrorReason = ErrorReason; - actorSystem->Send(Recipient, Event.Release()); + Event->Status = NKikimrProto::CORRUPTED; + Event->ErrorReason = ErrorReason; + actorSystem->Send(Recipient, Event.Release()); delete this; } }; @@ -151,81 +151,81 @@ public: }; class TCompletionChunkRead : public TCompletionAction { - static constexpr ui64 ReferenceCanary = 6422729157296672589ull; - + static constexpr ui64 ReferenceCanary = 6422729157296672589ull; + TPDisk *PDisk; TIntrusivePtr<TChunkRead> Read; - TBufferWithGaps CommonBuffer; + TBufferWithGaps CommonBuffer; TAtomic PartsPending; TAtomic Deletes; - std::function<void()> OnDestroy; + std::function<void()> OnDestroy; ui64 ChunkNonce; - - const ui64 DoubleFreeCanary; + + const ui64 DoubleFreeCanary; public: - TCompletionChunkRead(TPDisk *pDisk, TIntrusivePtr<TChunkRead> &read, std::function<void()> onDestroy, + TCompletionChunkRead(TPDisk *pDisk, TIntrusivePtr<TChunkRead> &read, std::function<void()> onDestroy, ui64 chunkNonce) : TCompletionAction() , PDisk(pDisk) , Read(read) - , CommonBuffer(read->Offset, read->Size) - // 1 in PartsPending stands for the last part, so if any non-last part completes it will not lead to call of Exec() - , PartsPending(1) + , CommonBuffer(read->Offset, read->Size) + // 1 in PartsPending stands for the last part, so if any non-last part completes it will not lead to call of Exec() + , PartsPending(1) , Deletes(0) - , OnDestroy(std::move(onDestroy)) + , OnDestroy(std::move(onDestroy)) , ChunkNonce(chunkNonce) - , DoubleFreeCanary(ReferenceCanary) - {} + , DoubleFreeCanary(ReferenceCanary) + {} void Exec(TActorSystem *actorSystem) override; ~TCompletionChunkRead(); - void ReplyError(TActorSystem *actorSystem, TString reason); - // Returns true if there is some pending requests to wait - bool PartReadComplete(TActorSystem *actorSystem); + void ReplyError(TActorSystem *actorSystem, TString reason); + // Returns true if there is some pending requests to wait + bool PartReadComplete(TActorSystem *actorSystem); void AddPart() { AtomicIncrement(PartsPending); } - TBufferWithGaps *GetCommonBuffer() { - return &CommonBuffer; - } - + TBufferWithGaps *GetCommonBuffer() { + return &CommonBuffer; + } + ui64 GetChunkNonce() { return ChunkNonce; } - bool PartDeleted(TActorSystem *actorSystem) { + bool PartDeleted(TActorSystem *actorSystem) { AtomicIncrement(Deletes); - return PartReadComplete(actorSystem); + return PartReadComplete(actorSystem); } void Release(TActorSystem *actorSystem) override { - ReplyError(actorSystem, "TCompletionChunkRead is released"); + ReplyError(actorSystem, "TCompletionChunkRead is released"); } }; class TCompletionChunkReadPart : public TCompletionAction { TPDisk *PDisk; - TIntrusivePtr<TChunkRead> Read; - ui32 RawReadSize; + TIntrusivePtr<TChunkRead> Read; + ui32 RawReadSize; ui64 PayloadReadSize; - ui64 CommonBufferOffset; + ui64 CommonBufferOffset; TCompletionChunkRead *CumulativeCompletion; - TBuffer::TPtr Buffer; - bool IsTheLastPart; - TControlWrapper UseT1ha0Hasher; + TBuffer::TPtr Buffer; + bool IsTheLastPart; + TControlWrapper UseT1ha0Hasher; public: - TCompletionChunkReadPart(TPDisk *pDisk, TIntrusivePtr<TChunkRead> &read, ui64 rawReadSize, ui64 payloadReadSize, - ui64 commonBufferOffset, TCompletionChunkRead *cumulativeCompletion, bool isTheLastPart, - const TControlWrapper& useT1ha0Hasher); - - - bool CanHandleResult() const override { - return true; - } - - TBuffer *GetBuffer(); + TCompletionChunkReadPart(TPDisk *pDisk, TIntrusivePtr<TChunkRead> &read, ui64 rawReadSize, ui64 payloadReadSize, + ui64 commonBufferOffset, TCompletionChunkRead *cumulativeCompletion, bool isTheLastPart, + const TControlWrapper& useT1ha0Hasher); + + + bool CanHandleResult() const override { + return true; + } + + TBuffer *GetBuffer(); void Exec(TActorSystem *actorSystem) override; void Release(TActorSystem *actorSystem) override; virtual ~TCompletionChunkReadPart(); @@ -244,7 +244,7 @@ public: , CompletionActionPtr((TAtomicBase)nullptr) {} - void SetCompletionAction(TCompletionAction *completionAction) { + void SetCompletionAction(TCompletionAction *completionAction) { AtomicSet(CompletionActionPtr, (TAtomicBase)completionAction); Y_VERIFY(AtomicGet(PartsPending) > 0); } @@ -269,7 +269,7 @@ public: completionAction->Exec(actorSystem); } } - delete this; + delete this; } } }; @@ -294,46 +294,46 @@ public: } }; -class TCompletionSignal : public TCompletionAction { - TSignalEvent *Event; - -public: - TCompletionSignal(TSignalEvent *event) - : Event(event) - {} - - void Exec(TActorSystem *actorSystem) override { - Y_UNUSED(actorSystem); - Event->Signal(); - delete this; - } - - void Release(TActorSystem *actorSystem) override { - Exec(actorSystem); - } -}; - -class TChunkTrimCompletion : public TCompletionAction { - TPDisk *PDisk; - NHPTimer::STime StartTime; - size_t SizeBytes; - TReqId ReqId; - -public: - TChunkTrimCompletion(TPDisk *pdisk, NHPTimer::STime startTime, size_t sizeBytes, TReqId reqId) - : PDisk(pdisk) - , StartTime(startTime) - , SizeBytes(sizeBytes) - , ReqId(reqId) - {} - - void Exec(TActorSystem *actorSystem) override; - - void Release(TActorSystem *actorSystem) override { - Y_UNUSED(actorSystem); - delete this; - } -}; - +class TCompletionSignal : public TCompletionAction { + TSignalEvent *Event; + +public: + TCompletionSignal(TSignalEvent *event) + : Event(event) + {} + + void Exec(TActorSystem *actorSystem) override { + Y_UNUSED(actorSystem); + Event->Signal(); + delete this; + } + + void Release(TActorSystem *actorSystem) override { + Exec(actorSystem); + } +}; + +class TChunkTrimCompletion : public TCompletionAction { + TPDisk *PDisk; + NHPTimer::STime StartTime; + size_t SizeBytes; + TReqId ReqId; + +public: + TChunkTrimCompletion(TPDisk *pdisk, NHPTimer::STime startTime, size_t sizeBytes, TReqId reqId) + : PDisk(pdisk) + , StartTime(startTime) + , SizeBytes(sizeBytes) + , ReqId(reqId) + {} + + void Exec(TActorSystem *actorSystem) override; + + void Release(TActorSystem *actorSystem) override { + Y_UNUSED(actorSystem); + delete this; + } +}; + } // NPDisk } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h index 3ce0d1826c..a3d764aa7e 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h @@ -1,8 +1,8 @@ #pragma once #include "defs.h" - -#include "blobstorage_pdisk_util_wcache.h" - + +#include "blobstorage_pdisk_util_wcache.h" + #include <ydb/core/base/blobstorage.h> #include <ydb/core/base/compile_time_flags.h> #include <ydb/core/blobstorage/base/vdisk_priorities.h> @@ -12,95 +12,95 @@ #include <ydb/library/pdisk_io/file_params.h> #include <ydb/library/pdisk_io/sector_map.h> - + namespace NKikimr { -struct TPDiskSchedulerConfig { - ui64 BytesSchedulerWeight = BytesSchedulerWeightDefault; - ui64 LogWeight = LogWeightDefault; - ui64 FreshWeight = FreshWeightDefault; - ui64 CompWeight = CompWeightDefault; - ui64 SyncLogWeight = SyncLogWeightDefault; - ui64 HugeWeight = HugeWeightDefault; - ui64 FastReadWeight = FastReadWeightDefault; - ui64 OtherReadWeight = OtherReadWeightDefault; - ui64 LoadWeight = LoadWeightDefault; - ui64 LowReadWeight = LowWeightDefault; - - TString ToString(bool isMultiline) const { - const char *x = isMultiline ? "\n" : ""; - TStringStream str; - str << "{TPDiskSchedulerConfig" << x; - str << " BytesSchedulerWeight# " << BytesSchedulerWeight << x; - str << " LogWeight# " << LogWeight << x; - str << " FreshWeight# " << FreshWeight << x; - str << " CompWeight# " << CompWeight << x; - str << " SyncLogWeight# " << SyncLogWeight << x; - str << " HugeWeight# " << HugeWeight << x; - str << " FastReadWeight# " << FastReadWeight << x; - str << " OtherReadWeight# " << OtherReadWeight << x; - str << " LoadWeight# " << LoadWeight << x; - str << " LowReadWeight# " << LowReadWeight << x; - str << "}" << x; - return str.Str(); - } - - void Apply(const NKikimrBlobStorage::TPDiskConfig *cfg) { - if (cfg->HasBytesSchedulerWeight()) { - BytesSchedulerWeight = cfg->GetBytesSchedulerWeight(); - } - if (cfg->HasLogWeight()) { - LogWeight = cfg->GetLogWeight(); - } - if (cfg->HasFreshWeight()) { - FreshWeight = cfg->GetFreshWeight(); - } - if (cfg->HasCompWeight()) { - CompWeight = cfg->GetCompWeight(); - } - if (cfg->HasSyncLogWeight()) { - SyncLogWeight = cfg->GetSyncLogWeight(); - } - if (cfg->HasHugeWeight()) { - HugeWeight = cfg->GetHugeWeight(); - } - if (cfg->HasFastReadWeight()) { - FastReadWeight = cfg->GetFastReadWeight(); - } - if (cfg->HasOtherReadWeight()) { - OtherReadWeight = cfg->GetOtherReadWeight(); - } - if (cfg->HasLoadWeight()) { - LoadWeight = cfg->GetLoadWeight(); - } - if (cfg->HasLowReadWeight()) { - LowReadWeight = cfg->GetLowReadWeight(); - } - } -}; - +struct TPDiskSchedulerConfig { + ui64 BytesSchedulerWeight = BytesSchedulerWeightDefault; + ui64 LogWeight = LogWeightDefault; + ui64 FreshWeight = FreshWeightDefault; + ui64 CompWeight = CompWeightDefault; + ui64 SyncLogWeight = SyncLogWeightDefault; + ui64 HugeWeight = HugeWeightDefault; + ui64 FastReadWeight = FastReadWeightDefault; + ui64 OtherReadWeight = OtherReadWeightDefault; + ui64 LoadWeight = LoadWeightDefault; + ui64 LowReadWeight = LowWeightDefault; + + TString ToString(bool isMultiline) const { + const char *x = isMultiline ? "\n" : ""; + TStringStream str; + str << "{TPDiskSchedulerConfig" << x; + str << " BytesSchedulerWeight# " << BytesSchedulerWeight << x; + str << " LogWeight# " << LogWeight << x; + str << " FreshWeight# " << FreshWeight << x; + str << " CompWeight# " << CompWeight << x; + str << " SyncLogWeight# " << SyncLogWeight << x; + str << " HugeWeight# " << HugeWeight << x; + str << " FastReadWeight# " << FastReadWeight << x; + str << " OtherReadWeight# " << OtherReadWeight << x; + str << " LoadWeight# " << LoadWeight << x; + str << " LowReadWeight# " << LowReadWeight << x; + str << "}" << x; + return str.Str(); + } + + void Apply(const NKikimrBlobStorage::TPDiskConfig *cfg) { + if (cfg->HasBytesSchedulerWeight()) { + BytesSchedulerWeight = cfg->GetBytesSchedulerWeight(); + } + if (cfg->HasLogWeight()) { + LogWeight = cfg->GetLogWeight(); + } + if (cfg->HasFreshWeight()) { + FreshWeight = cfg->GetFreshWeight(); + } + if (cfg->HasCompWeight()) { + CompWeight = cfg->GetCompWeight(); + } + if (cfg->HasSyncLogWeight()) { + SyncLogWeight = cfg->GetSyncLogWeight(); + } + if (cfg->HasHugeWeight()) { + HugeWeight = cfg->GetHugeWeight(); + } + if (cfg->HasFastReadWeight()) { + FastReadWeight = cfg->GetFastReadWeight(); + } + if (cfg->HasOtherReadWeight()) { + OtherReadWeight = cfg->GetOtherReadWeight(); + } + if (cfg->HasLoadWeight()) { + LoadWeight = cfg->GetLoadWeight(); + } + if (cfg->HasLowReadWeight()) { + LowReadWeight = cfg->GetLowReadWeight(); + } + } +}; + struct TPDiskConfig : public TThrRefBase { TString Path; // set only by constructor - TString ExpectedPath; - TString ExpectedSerial; - NKikimrBlobStorage::TSerialManagementStage::E SerialManagementStage - = NKikimrBlobStorage::TSerialManagementStage::DISCOVER_SERIAL; - + TString ExpectedPath; + TString ExpectedSerial; + NKikimrBlobStorage::TSerialManagementStage::E SerialManagementStage + = NKikimrBlobStorage::TSerialManagementStage::DISCOVER_SERIAL; + ui64 PDiskGuid; // set only by constructor ui32 PDiskId; // set only by constructor TPDiskCategory PDiskCategory; // set only by constructor - TString HashedMainKey; + TString HashedMainKey; ui64 StartOwnerRound = 1ull; // set only by warden - TIntrusivePtr<NPDisk::TSectorMap> SectorMap; // set only by warden - bool EnableSectorEncryption = true; + TIntrusivePtr<NPDisk::TSectorMap> SectorMap; // set only by warden + bool EnableSectorEncryption = true; + + ui32 ChunkSize = 128 << 20; + ui32 SectorSize = 4 << 10; - ui32 ChunkSize = 128 << 20; - ui32 SectorSize = 4 << 10; - ui64 StatisticsUpdateIntervalMs = 1000; - TPDiskSchedulerConfig SchedulerCfg; + TPDiskSchedulerConfig SchedulerCfg; ui64 SortFreeChunksPerItems = 100; @@ -109,22 +109,22 @@ struct TPDiskConfig : public TThrRefBase { ui64 DriveModelSeekTimeNs; ui64 DriveModelSpeedBps; - ui64 DriveModelSpeedBpsMin; - ui64 DriveModelSpeedBpsMax; + ui64 DriveModelSpeedBpsMin; + ui64 DriveModelSpeedBpsMax; ui64 DriveModelBulkWrieBlockSize; ui64 DriveModelTrimSpeedBps; - + ui64 ReorderingMs; - ui64 DeviceInFlight; + ui64 DeviceInFlight; ui64 CostLimitNs; - // AsyncBlockDevice settings + // AsyncBlockDevice settings ui32 BufferPoolBufferSizeBytes = 512 << 10; ui32 BufferPoolBufferCount = 256; ui32 MaxQueuedCompletionActions = 128; // BufferPoolBufferCount / 2; - bool UseSpdkNvmeDriver; - TControlWrapper UseT1ha0HashInFooter; - + bool UseSpdkNvmeDriver; + TControlWrapper UseT1ha0HashInFooter; + ui64 ExpectedSlotCount = 0; NKikimrConfig::TFeatureFlags FeatureFlags; @@ -145,117 +145,117 @@ struct TPDiskConfig : public TThrRefBase { ui64 WarningLogChunksMultiplier = 4; ui64 YellowLogChunksMultiplier = 4; - NKikimrBlobStorage::TPDiskSpaceColor::E SpaceColorBorder = NKikimrBlobStorage::TPDiskSpaceColor::GREEN; - - TPDiskConfig(ui64 pDiskGuid, ui32 pdiskId, ui64 pDiskCategory) - : TPDiskConfig({}, pDiskGuid, pdiskId, pDiskCategory) - {} - + NKikimrBlobStorage::TPDiskSpaceColor::E SpaceColorBorder = NKikimrBlobStorage::TPDiskSpaceColor::GREEN; + + TPDiskConfig(ui64 pDiskGuid, ui32 pdiskId, ui64 pDiskCategory) + : TPDiskConfig({}, pDiskGuid, pdiskId, pDiskCategory) + {} + TPDiskConfig(TString path, ui64 pDiskGuid, ui32 pdiskId, ui64 pDiskCategory) : Path(path) , PDiskGuid(pDiskGuid) , PDiskId(pdiskId) , PDiskCategory(pDiskCategory) - , UseT1ha0HashInFooter(KIKIMR_PDISK_ENABLE_T1HA_HASH_WRITING, 0, 1) - { - Initialize(); - } - - TPDiskCategory::EDeviceType RetrieveDeviceType() { - TStringStream outDetails; - - if (std::optional<NPDisk::TDriveData> data = NPDisk::GetDriveData(Path, &outDetails)) { - return data->DeviceType; - } else if (Path.Contains("nvme") || Path.Contains("NVME")) { - return TPDiskCategory::DEVICE_TYPE_NVME; - } else if (Path.Contains("ssd") || Path.Contains("SSD")) { - return TPDiskCategory::DEVICE_TYPE_SSD; - } else { - return PDiskCategory.Type(); - } - } - - void Initialize() { - TPDiskCategory::EDeviceType deviceType = RetrieveDeviceType(); - - auto choose = [&] (ui64 nvme, ui64 ssd, ui64 hdd) { - if (deviceType == TPDiskCategory::DEVICE_TYPE_ROT) { - return hdd; - } else if (deviceType == TPDiskCategory::DEVICE_TYPE_SSD) { - return ssd; - } else if (deviceType == TPDiskCategory::DEVICE_TYPE_NVME) { - return nvme; - } else { - return hdd; - } - }; - - DriveModelSeekTimeNs = choose(40'000ull, 40'000ull, 8'000'000ull); - DriveModelSpeedBps = choose(900'000'000ull, 375'000'000ull, 127'000'000ull); - DriveModelSpeedBpsMin = choose(900'000'000ull, 375'000'000ull, 135'000'000ull); - DriveModelSpeedBpsMax = choose(900'000'000ull, 375'000'000ull, 200'000'000ull); - DriveModelBulkWrieBlockSize = choose(64'000, 1 << 20, 2 << 20); - DriveModelTrimSpeedBps = choose(6ull << 30, 6ull << 30, 0); - ReorderingMs = choose(1, 7, 50); - DeviceInFlight = choose(128, 4, 4); - CostLimitNs = choose(500'000ull, 20'000'000ull, 50'000'000ull); - + , UseT1ha0HashInFooter(KIKIMR_PDISK_ENABLE_T1HA_HASH_WRITING, 0, 1) + { + Initialize(); + } + + TPDiskCategory::EDeviceType RetrieveDeviceType() { + TStringStream outDetails; + + if (std::optional<NPDisk::TDriveData> data = NPDisk::GetDriveData(Path, &outDetails)) { + return data->DeviceType; + } else if (Path.Contains("nvme") || Path.Contains("NVME")) { + return TPDiskCategory::DEVICE_TYPE_NVME; + } else if (Path.Contains("ssd") || Path.Contains("SSD")) { + return TPDiskCategory::DEVICE_TYPE_SSD; + } else { + return PDiskCategory.Type(); + } + } + + void Initialize() { + TPDiskCategory::EDeviceType deviceType = RetrieveDeviceType(); + + auto choose = [&] (ui64 nvme, ui64 ssd, ui64 hdd) { + if (deviceType == TPDiskCategory::DEVICE_TYPE_ROT) { + return hdd; + } else if (deviceType == TPDiskCategory::DEVICE_TYPE_SSD) { + return ssd; + } else if (deviceType == TPDiskCategory::DEVICE_TYPE_NVME) { + return nvme; + } else { + return hdd; + } + }; + + DriveModelSeekTimeNs = choose(40'000ull, 40'000ull, 8'000'000ull); + DriveModelSpeedBps = choose(900'000'000ull, 375'000'000ull, 127'000'000ull); + DriveModelSpeedBpsMin = choose(900'000'000ull, 375'000'000ull, 135'000'000ull); + DriveModelSpeedBpsMax = choose(900'000'000ull, 375'000'000ull, 200'000'000ull); + DriveModelBulkWrieBlockSize = choose(64'000, 1 << 20, 2 << 20); + DriveModelTrimSpeedBps = choose(6ull << 30, 6ull << 30, 0); + ReorderingMs = choose(1, 7, 50); + DeviceInFlight = choose(128, 4, 4); + CostLimitNs = choose(500'000ull, 20'000'000ull, 50'000'000ull); + UseSpdkNvmeDriver = Path.StartsWith("PCIe:"); - Y_VERIFY(!UseSpdkNvmeDriver || deviceType == TPDiskCategory::DEVICE_TYPE_NVME, - "SPDK NVMe driver can be used only with NVMe devices!"); - } - - TString GetDevicePath() { - if (ExpectedSerial && !Path && !ExpectedPath) { - if (std::optional<NPDisk::TDriveData> dev = FindDeviceBySerialNumber(ExpectedSerial, true)) { - ExpectedPath = dev->Path; - } - } - - if (ExpectedPath) { - return ExpectedPath; - } else { - return Path; - } - } - - bool CheckSerial(const TString& deviceSerial) const { - if (ExpectedSerial || - SerialManagementStage == NKikimrBlobStorage::TSerialManagementStage::CHECK_SERIAL || - SerialManagementStage == NKikimrBlobStorage::TSerialManagementStage::ONLY_SERIAL) { - if (ExpectedSerial != deviceSerial) { - return false; - } - } - return true; - } - - TString ToString() const { + Y_VERIFY(!UseSpdkNvmeDriver || deviceType == TPDiskCategory::DEVICE_TYPE_NVME, + "SPDK NVMe driver can be used only with NVMe devices!"); + } + + TString GetDevicePath() { + if (ExpectedSerial && !Path && !ExpectedPath) { + if (std::optional<NPDisk::TDriveData> dev = FindDeviceBySerialNumber(ExpectedSerial, true)) { + ExpectedPath = dev->Path; + } + } + + if (ExpectedPath) { + return ExpectedPath; + } else { + return Path; + } + } + + bool CheckSerial(const TString& deviceSerial) const { + if (ExpectedSerial || + SerialManagementStage == NKikimrBlobStorage::TSerialManagementStage::CHECK_SERIAL || + SerialManagementStage == NKikimrBlobStorage::TSerialManagementStage::ONLY_SERIAL) { + if (ExpectedSerial != deviceSerial) { + return false; + } + } + return true; + } + + TString ToString() const { return ToString(false); } - TString ToString(bool isMultiline) const { + TString ToString(bool isMultiline) const { TStringStream str; const char *x = isMultiline ? "\n" : ""; str << "{TPDiskConfg" << x; str << " Path# \"" << Path << "\"" << x; - str << " ExpectedPath# \"" << ExpectedPath << "\"" << x; - str << " ExpectedSerial# \"" << ExpectedSerial << "\"" << x; + str << " ExpectedPath# \"" << ExpectedPath << "\"" << x; + str << " ExpectedSerial# \"" << ExpectedSerial << "\"" << x; str << " PDiskGuid# " << PDiskGuid << x; str << " PDiskId# " << PDiskId << x; str << " PDiskCategory# " << PDiskCategory.ToString() << x; - str << " HashedMainKey# " << HashedMainKey << x; + str << " HashedMainKey# " << HashedMainKey << x; str << " StartOwnerRound# " << StartOwnerRound << x; - str << " SectorMap# " << (SectorMap ? "true" : "false") << x; - str << " EnableSectorEncryption # " << EnableSectorEncryption << x; + str << " SectorMap# " << (SectorMap ? "true" : "false") << x; + str << " EnableSectorEncryption # " << EnableSectorEncryption << x; + + str << " ChunkSize# " << ChunkSize << x; + str << " SectorSize# " << SectorSize << x; - str << " ChunkSize# " << ChunkSize << x; - str << " SectorSize# " << SectorSize << x; - str << " StatisticsUpdateIntervalMs# " << StatisticsUpdateIntervalMs << x; - str << " SchedulerCfg# " << SchedulerCfg.ToString(isMultiline) << x; - + str << " SchedulerCfg# " << SchedulerCfg.ToString(isMultiline) << x; + str << " MinLogChunksTotal# " << MinLogChunksTotal << x; str << " MaxLogChunksPerOwnerMultiplier# " << MaxLogChunksPerOwnerMultiplier << x; str << " MaxLogChunksPerOwnerDivisor# " << MaxLogChunksPerOwnerDivisor << x; @@ -265,24 +265,24 @@ struct TPDiskConfig : public TThrRefBase { str << " DriveModelSeekTimeNs# " << DriveModelSeekTimeNs << x; str << " DriveModelSpeedBps# " << DriveModelSpeedBps << x; - str << " DriveModelSpeedBpsMin# " << DriveModelSpeedBpsMin << x; - str << " DriveModelSpeedBpsMax# " << DriveModelSpeedBpsMax << x; + str << " DriveModelSpeedBpsMin# " << DriveModelSpeedBpsMin << x; + str << " DriveModelSpeedBpsMax# " << DriveModelSpeedBpsMax << x; str << " DriveModelBulkWrieBlockSize# " << DriveModelBulkWrieBlockSize << x; str << " DriveModelTrimSpeedBps# " << DriveModelTrimSpeedBps << x; str << " ReorderingMs# " << ReorderingMs << x; - str << " DeviceInFlight# " << DeviceInFlight << x; + str << " DeviceInFlight# " << DeviceInFlight << x; str << " CostLimitNs# " << CostLimitNs << x; - str << " BufferPoolBufferSizeBytes# " << BufferPoolBufferSizeBytes << x; - str << " BufferPoolBufferCount# " << BufferPoolBufferCount << x; - str << " MaxQueuedCompletionActions# " << MaxQueuedCompletionActions << x; + str << " BufferPoolBufferSizeBytes# " << BufferPoolBufferSizeBytes << x; + str << " BufferPoolBufferCount# " << BufferPoolBufferCount << x; + str << " MaxQueuedCompletionActions# " << MaxQueuedCompletionActions << x; str << " ExpectedSlotCount# " << ExpectedSlotCount << x; - str << " ReserveLogChunksMultiplier# " << ReserveLogChunksMultiplier << x; + str << " ReserveLogChunksMultiplier# " << ReserveLogChunksMultiplier << x; str << " InsaneLogChunksMultiplier# " << InsaneLogChunksMultiplier << x; - str << " RedLogChunksMultiplier# " << RedLogChunksMultiplier << x; - str << " OrangeLogChunksMultiplier# " << OrangeLogChunksMultiplier << x; - str << " WarningLogChunksMultiplier# " << WarningLogChunksMultiplier << x; - str << " YellowLogChunksMultiplier# " << YellowLogChunksMultiplier << x; + str << " RedLogChunksMultiplier# " << RedLogChunksMultiplier << x; + str << " OrangeLogChunksMultiplier# " << OrangeLogChunksMultiplier << x; + str << " WarningLogChunksMultiplier# " << WarningLogChunksMultiplier << x; + str << " YellowLogChunksMultiplier# " << YellowLogChunksMultiplier << x; str << "}"; return str.Str(); } @@ -292,17 +292,17 @@ struct TPDiskConfig : public TThrRefBase { return; } - if (cfg->HasChunkSize()) { - ChunkSize = cfg->GetChunkSize(); - } - if (cfg->HasSectorSize()) { - SectorSize = cfg->GetSectorSize(); - } + if (cfg->HasChunkSize()) { + ChunkSize = cfg->GetChunkSize(); + } + if (cfg->HasSectorSize()) { + SectorSize = cfg->GetSectorSize(); + } if (cfg->HasStatisticsUpdateIntervalMs()) { StatisticsUpdateIntervalMs = cfg->GetStatisticsUpdateIntervalMs(); } - SchedulerCfg.Apply(cfg); + SchedulerCfg.Apply(cfg); if (cfg->HasMinLogChunksTotal()) { MinLogChunksTotal = cfg->GetMinLogChunksTotal(); @@ -338,21 +338,21 @@ struct TPDiskConfig : public TThrRefBase { if (cfg->HasReorderingMs()) { ReorderingMs = cfg->GetReorderingMs(); } - if (cfg->HasDeviceInFlight()) { - DeviceInFlight = cfg->GetDeviceInFlight(); - } + if (cfg->HasDeviceInFlight()) { + DeviceInFlight = cfg->GetDeviceInFlight(); + } if (cfg->HasCostLimitNs()) { CostLimitNs = cfg->GetCostLimitNs(); } - if (cfg->HasBufferPoolBufferSizeBytes()) { - BufferPoolBufferSizeBytes = cfg->GetBufferPoolBufferSizeBytes(); - } - if (cfg->HasBufferPoolBufferCount()) { - BufferPoolBufferCount = cfg->GetBufferPoolBufferCount(); - } - if (cfg->HasMaxQueuedCompletionActions()) { - MaxQueuedCompletionActions = cfg->GetMaxQueuedCompletionActions(); - } + if (cfg->HasBufferPoolBufferSizeBytes()) { + BufferPoolBufferSizeBytes = cfg->GetBufferPoolBufferSizeBytes(); + } + if (cfg->HasBufferPoolBufferCount()) { + BufferPoolBufferCount = cfg->GetBufferPoolBufferCount(); + } + if (cfg->HasMaxQueuedCompletionActions()) { + MaxQueuedCompletionActions = cfg->GetMaxQueuedCompletionActions(); + } if (cfg->HasInsaneLogChunksMultiplier()) { InsaneLogChunksMultiplier = cfg->GetInsaneLogChunksMultiplier(); } diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_crypto.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_crypto.h index b8f80d1143..1cb1b90749 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_crypto.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_crypto.h @@ -1,6 +1,6 @@ #pragma once #include "defs.h" - + #include <ydb/core/base/compile_time_flags.h> #include <ydb/core/blobstorage/crypto/crypto.h> @@ -11,65 +11,65 @@ namespace NPDisk { // PDiskHashCalculator //////////////////////////////////////////////////////////////////////////// -class TPDiskHashCalculator : public THashCalculator { - bool UseT1ha0Hasher; - +class TPDiskHashCalculator : public THashCalculator { + bool UseT1ha0Hasher; + public: - TPDiskHashCalculator(bool useT1ha0Hasher) - : UseT1ha0Hasher(useT1ha0Hasher) - {} - - void SetUseT1ha0Hasher(bool x) { - UseT1ha0Hasher = x; - }; - - ui64 OldHashSector(const ui64 sectorOffset, const ui64 magic, const ui8 *sector, - const ui32 sectorSize) { - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(§orOffset, sizeof sectorOffset); - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&magic, sizeof magic); - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(sector, sectorSize - sizeof(ui64)); - - THashCalculator::Clear(); - THashCalculator::Hash(§orOffset, sizeof sectorOffset); - THashCalculator::Hash(&magic, sizeof magic); - THashCalculator::Hash(sector, sectorSize - sizeof(ui64)); - return THashCalculator::GetHashResult(); - } - - template<class THasher> - ui64 T1ha0HashSector(const ui64 sectorOffset, const ui64 magic, const ui8 *sector, - const ui32 sectorSize) { - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(§orOffset, sizeof sectorOffset); - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&magic, sizeof magic); - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(sector, sectorSize - sizeof(ui64)); - - THasher hasher; - hasher.SetKey(sectorOffset ^ magic); - return hasher.Hash(sector, sectorSize - sizeof(ui64)); - } - - ui64 HashSector(const ui64 sectorOffset, const ui64 magic, const ui8 *sector, - const ui32 sectorSize) { - if (UseT1ha0Hasher) { - return T1ha0HashSector<TT1ha0NoAvxHasher>(sectorOffset, magic, sector, sectorSize); - } else { - return OldHashSector(sectorOffset, magic, sector, sectorSize); - } - } - - bool CheckSectorHash(const ui64 sectorOffset, const ui64 magic, const ui8 *sector, - const ui32 sectorSize, const ui64 sectorHash) { - // On production servers may be to versions. - // If by default used OldHash version, then use it first - // If by default used T1ha0NoAvx version, then use it - if (UseT1ha0Hasher) { - return sectorHash == T1ha0HashSector<TT1ha0NoAvxHasher>(sectorOffset, magic, sector, sectorSize) - || sectorHash == OldHashSector(sectorOffset, magic, sector, sectorSize); - } else { - return sectorHash == OldHashSector(sectorOffset, magic, sector, sectorSize) - || sectorHash == T1ha0HashSector<TT1ha0NoAvxHasher>(sectorOffset, magic, sector, sectorSize); - } - } + TPDiskHashCalculator(bool useT1ha0Hasher) + : UseT1ha0Hasher(useT1ha0Hasher) + {} + + void SetUseT1ha0Hasher(bool x) { + UseT1ha0Hasher = x; + }; + + ui64 OldHashSector(const ui64 sectorOffset, const ui64 magic, const ui8 *sector, + const ui32 sectorSize) { + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(§orOffset, sizeof sectorOffset); + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&magic, sizeof magic); + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(sector, sectorSize - sizeof(ui64)); + + THashCalculator::Clear(); + THashCalculator::Hash(§orOffset, sizeof sectorOffset); + THashCalculator::Hash(&magic, sizeof magic); + THashCalculator::Hash(sector, sectorSize - sizeof(ui64)); + return THashCalculator::GetHashResult(); + } + + template<class THasher> + ui64 T1ha0HashSector(const ui64 sectorOffset, const ui64 magic, const ui8 *sector, + const ui32 sectorSize) { + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(§orOffset, sizeof sectorOffset); + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&magic, sizeof magic); + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(sector, sectorSize - sizeof(ui64)); + + THasher hasher; + hasher.SetKey(sectorOffset ^ magic); + return hasher.Hash(sector, sectorSize - sizeof(ui64)); + } + + ui64 HashSector(const ui64 sectorOffset, const ui64 magic, const ui8 *sector, + const ui32 sectorSize) { + if (UseT1ha0Hasher) { + return T1ha0HashSector<TT1ha0NoAvxHasher>(sectorOffset, magic, sector, sectorSize); + } else { + return OldHashSector(sectorOffset, magic, sector, sectorSize); + } + } + + bool CheckSectorHash(const ui64 sectorOffset, const ui64 magic, const ui8 *sector, + const ui32 sectorSize, const ui64 sectorHash) { + // On production servers may be to versions. + // If by default used OldHash version, then use it first + // If by default used T1ha0NoAvx version, then use it + if (UseT1ha0Hasher) { + return sectorHash == T1ha0HashSector<TT1ha0NoAvxHasher>(sectorOffset, magic, sector, sectorSize) + || sectorHash == OldHashSector(sectorOffset, magic, sector, sectorSize); + } else { + return sectorHash == OldHashSector(sectorOffset, magic, sector, sectorSize) + || sectorHash == T1ha0HashSector<TT1ha0NoAvxHasher>(sectorOffset, magic, sector, sectorSize); + } + } }; //////////////////////////////////////////////////////////////////////////// @@ -78,47 +78,47 @@ public: class TPDiskStreamCypher { TStreamCypher Impl; - const bool EnableEncryption = true; + const bool EnableEncryption = true; public: - - TPDiskStreamCypher(bool encryption) - : Impl() - , EnableEncryption(encryption) - {} - - void SetKey(const ui64 &key) { - if (EnableEncryption) { - Impl.SetKey(key); - } - } - - void StartMessage(ui64 nonce) { - if (EnableEncryption) { - Impl.StartMessage(nonce, 0); - } - } - - void EncryptZeroes(void* destination, ui32 size) { - if (EnableEncryption) { - Impl.EncryptZeroes(destination, size); - } else { - memset(destination, 0, size); - } - } - - void Encrypt(void* destination, const void* source, ui32 size) { - if (EnableEncryption) { - Impl.Encrypt(destination, source, size); - } else { - memcpy(destination, source, size); - } - } - - void InplaceEncrypt(void *source, ui32 size) { - if (EnableEncryption) { - Impl.InplaceEncrypt(source, size); - } - } + + TPDiskStreamCypher(bool encryption) + : Impl() + , EnableEncryption(encryption) + {} + + void SetKey(const ui64 &key) { + if (EnableEncryption) { + Impl.SetKey(key); + } + } + + void StartMessage(ui64 nonce) { + if (EnableEncryption) { + Impl.StartMessage(nonce, 0); + } + } + + void EncryptZeroes(void* destination, ui32 size) { + if (EnableEncryption) { + Impl.EncryptZeroes(destination, size); + } else { + memset(destination, 0, size); + } + } + + void Encrypt(void* destination, const void* source, ui32 size) { + if (EnableEncryption) { + Impl.Encrypt(destination, source, size); + } else { + memcpy(destination, source, size); + } + } + + void InplaceEncrypt(void *source, ui32 size) { + if (EnableEncryption) { + Impl.InplaceEncrypt(source, size); + } + } }; } // NPDisk diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_crypto_ut.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_crypto_ut.cpp index 17e6abed67..1ca02078cf 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_crypto_ut.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_crypto_ut.cpp @@ -8,73 +8,73 @@ namespace NKikimr { Y_UNIT_TEST_SUITE(TBlobStoragePDiskCrypto) { Y_UNIT_TEST(TestMixedStreamCypher) { - for (ui32 enableEnctyption = 0; enableEnctyption < 2; ++enableEnctyption) { - NPDisk::TPDiskStreamCypher cypher1(enableEnctyption); - NPDisk::TPDiskStreamCypher cypher2(enableEnctyption); - constexpr int SIZE = 5000; - alignas(16) ui8 in[SIZE]; - alignas(16) ui8 out[SIZE]; - for (ui32 i = 0; i < SIZE; ++i) { - in[i] = (ui8)i; - } - - ui64 key = 1; - ui64 nonce = 1; - cypher1.SetKey(key); - - for (ui32 size = 1; size < SIZE; ++size) { - ui32 in_offset = size / 7; - cypher1.StartMessage(nonce); - ui32 size1 = (size - in_offset) % 257; - ui32 size2 = (size - in_offset - size1) % 263; - ui32 size3 = size - size1 - size2 - in_offset; - cypher1.Encrypt(out, in + in_offset, size1); - cypher1.Encrypt(out + size1, in + in_offset + size1, size2); - cypher1.Encrypt(out + size1 + size2, in + in_offset + size1 + size2, size3); - - cypher2.SetKey(key); - cypher2.StartMessage(nonce); - cypher2.InplaceEncrypt(out, size - in_offset); - - for (ui32 i = 0; i < size - in_offset; ++i) { - UNIT_ASSERT(in[i + in_offset] == out[i]); - } + for (ui32 enableEnctyption = 0; enableEnctyption < 2; ++enableEnctyption) { + NPDisk::TPDiskStreamCypher cypher1(enableEnctyption); + NPDisk::TPDiskStreamCypher cypher2(enableEnctyption); + constexpr int SIZE = 5000; + alignas(16) ui8 in[SIZE]; + alignas(16) ui8 out[SIZE]; + for (ui32 i = 0; i < SIZE; ++i) { + in[i] = (ui8)i; + } + + ui64 key = 1; + ui64 nonce = 1; + cypher1.SetKey(key); + + for (ui32 size = 1; size < SIZE; ++size) { + ui32 in_offset = size / 7; + cypher1.StartMessage(nonce); + ui32 size1 = (size - in_offset) % 257; + ui32 size2 = (size - in_offset - size1) % 263; + ui32 size3 = size - size1 - size2 - in_offset; + cypher1.Encrypt(out, in + in_offset, size1); + cypher1.Encrypt(out + size1, in + in_offset + size1, size2); + cypher1.Encrypt(out + size1 + size2, in + in_offset + size1 + size2, size3); + + cypher2.SetKey(key); + cypher2.StartMessage(nonce); + cypher2.InplaceEncrypt(out, size - in_offset); + + for (ui32 i = 0; i < size - in_offset; ++i) { + UNIT_ASSERT(in[i + in_offset] == out[i]); + } } } } Y_UNIT_TEST(TestInplaceStreamCypher) { - for (ui32 enableEnctyption = 0; enableEnctyption < 2; ++enableEnctyption) { - NPDisk::TPDiskStreamCypher cypher1(enableEnctyption); - NPDisk::TPDiskStreamCypher cypher2(enableEnctyption); - constexpr int SIZE = 5000; - ui8 in[SIZE]; - ui8 out[SIZE]; - for (ui32 i = 0; i < SIZE; ++i) { - in[i] = (ui8)i; - } - - ui64 key = 1; - ui64 nonce = 1; - - for (ui32 size = 1; size < SIZE; ++size) { - cypher1.SetKey(key); - cypher1.StartMessage(nonce); - cypher1.InplaceEncrypt(in, size); - - memcpy(out, in, size); - - cypher2.SetKey(key); - cypher2.StartMessage(nonce); - cypher2.InplaceEncrypt(out, size); - - for (ui32 i = 0; i < SIZE; ++i) { - in[i] = (ui8)i; - } - - for (ui32 i = 0; i < size; ++i) { - UNIT_ASSERT_C(in[i] == out[i], "Mismatch at " << i << " of " << size << Endl); - } + for (ui32 enableEnctyption = 0; enableEnctyption < 2; ++enableEnctyption) { + NPDisk::TPDiskStreamCypher cypher1(enableEnctyption); + NPDisk::TPDiskStreamCypher cypher2(enableEnctyption); + constexpr int SIZE = 5000; + ui8 in[SIZE]; + ui8 out[SIZE]; + for (ui32 i = 0; i < SIZE; ++i) { + in[i] = (ui8)i; + } + + ui64 key = 1; + ui64 nonce = 1; + + for (ui32 size = 1; size < SIZE; ++size) { + cypher1.SetKey(key); + cypher1.StartMessage(nonce); + cypher1.InplaceEncrypt(in, size); + + memcpy(out, in, size); + + cypher2.SetKey(key); + cypher2.StartMessage(nonce); + cypher2.InplaceEncrypt(out, size); + + for (ui32 i = 0; i < SIZE; ++i) { + in[i] = (ui8)i; + } + + for (ui32 i = 0; i < size; ++i) { + UNIT_ASSERT_C(in[i] == out[i], "Mismatch at " << i << " of " << size << Endl); + } } } } diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_data.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_data.h index 46809940f8..2b1350bff7 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_data.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_data.h @@ -3,7 +3,7 @@ #include "blobstorage_pdisk.h" #include "blobstorage_pdisk_crypto.h" #include "blobstorage_pdisk_defs.h" -#include "blobstorage_pdisk_state.h" +#include "blobstorage_pdisk_state.h" #include <ydb/core/util/text.h> @@ -14,16 +14,16 @@ namespace NPDisk { // PDisk On-disk structures //////////////////////////////////////////////////////////////////////////// -static_assert(sizeof(TOwner) == 1, "TOwner size mismatch."); -static_assert(sizeof(TLogSignature) == 1, "TSignature size mismatch."); +static_assert(sizeof(TOwner) == 1, "TOwner size mismatch."); +static_assert(sizeof(TLogSignature) == 1, "TSignature size mismatch."); const ui64 MagicNextLogChunkReferenceId = 0x709DA7A709DA7A11; const ui64 MagicLogChunkId = 0x11170915A71FE111; const ui64 MagicDataChunkId = 0xDA7AC8A2CDA7AC8A; const ui64 MagicSysLogChunkId = 0x5957095957095957; const ui64 MagicFormatChunkId = 0xF088A7F088A7F088; -constexpr ui64 MagicIncompleteFormat = 0x5b48add808b31984; -constexpr ui64 MagicIncompleteFormatSize = 512; // Bytes +constexpr ui64 MagicIncompleteFormat = 0x5b48add808b31984; +constexpr ui64 MagicIncompleteFormatSize = 512; // Bytes const ui64 Canary = 0x0123456789abcdef; constexpr ui32 CanarySize = 8; @@ -38,12 +38,12 @@ constexpr ui32 RecordsInSysLog = 16; #define PDISK_FORMAT_VERSION 3 #define PDISK_DATA_VERSION 2 #define PDISK_DATA_VERSION_2 3 -#define PDISK_DATA_VERSION_3 4 +#define PDISK_DATA_VERSION_3 4 #define PDISK_SYS_LOG_RECORD_VERSION_2 2 #define PDISK_SYS_LOG_RECORD_VERSION_3 3 -#define PDISK_SYS_LOG_RECORD_VERSION_4 4 -// #define PDISK_SYS_LOG_RECORD_VERSION_5 5 // It was used in reverted commits, just avoid this version -#define PDISK_SYS_LOG_RECORD_VERSION_6 6 +#define PDISK_SYS_LOG_RECORD_VERSION_4 4 +// #define PDISK_SYS_LOG_RECORD_VERSION_5 5 // It was used in reverted commits, just avoid this version +#define PDISK_SYS_LOG_RECORD_VERSION_6 6 #define PDISK_SYS_LOG_RECORD_INCOMPATIBLE_VERSION_1000 1000 #define FORMAT_TEXT_SIZE 1024 @@ -123,10 +123,10 @@ struct TLogRecordHeader { TOwner OwnerId; ui8 A; ui8 B; - TLogSignature Signature; + TLogSignature Signature; ui64 OwnerLsn; - TLogRecordHeader(TOwner ownerId, TLogSignature signature, ui64 ownerLsn) + TLogRecordHeader(TOwner ownerId, TLogSignature signature, ui64 ownerLsn) : Version(PDISK_DATA_VERSION) , OwnerId(ownerId) , A('A') @@ -146,7 +146,7 @@ struct TFirstLogPageHeader { ui64 DataSize; TLogRecordHeader LogRecordHeader; - TFirstLogPageHeader(ui8 flags, ui32 size, ui64 dataSize, TOwner ownerId, TLogSignature signature, ui64 ownerLsn) + TFirstLogPageHeader(ui8 flags, ui32 size, ui64 dataSize, TOwner ownerId, TLogSignature signature, ui64 ownerLsn) : Version(PDISK_DATA_VERSION) , Flags(flags) , A('A') @@ -179,8 +179,8 @@ struct TNonceJumpLogPageHeader2 { ui8 A; ui8 B; ui64 PreviousNonce; - - // For debug only + + // For debug only ui32 PreviousLogTails[NONCE_JUMP_DLOG_RECORDS][NONCE_JUMP_DLOG_CHUNKS]; ui32 PreviousNonces[NONCE_JUMP_DLOG_RECORDS]; ui64 PreviousInstants[NONCE_JUMP_DLOG_RECORDS]; @@ -203,7 +203,7 @@ struct TNonceJumpLogPageHeader2 { } TNonceJumpLogPageHeader2(ui8 flags, ui64 previousNonce, TNonceJumpLogPageHeader2 &prevHeader, - TList<TLogChunkInfo> &logChunkList) + TList<TLogChunkInfo> &logChunkList) : Version(PDISK_DATA_VERSION) , Flags(flags) , A('A') @@ -322,13 +322,13 @@ struct TSysLogRecord { // TODO: use atomics here ui64 Version; TNonceSet Nonces; - TChunkIdx LogHeadChunkIdx; + TChunkIdx LogHeadChunkIdx; ui32 Reserved1; ui64 LogHeadChunkPreviousNonce; TVDiskID OwnerVDisks[256]; TSysLogRecord() - : Version(PDISK_SYS_LOG_RECORD_VERSION_6) + : Version(PDISK_SYS_LOG_RECORD_VERSION_6) , LogHeadChunkIdx(0) , Reserved1(0) , LogHeadChunkPreviousNonce((ui64)-1) @@ -408,77 +408,77 @@ struct TChunkInfo { }; static_assert(sizeof(TOwner) == 1, "TOwner size is intended to be 1 byte (range 0..255)"); // Owner[256] -struct TChunkTrimInfo { - ui8 TrimMask; - - static constexpr ui64 ChunksPerRecord = 8; - - static ui64 RecordsForChunkCount(ui32 chunkCount) { - return (chunkCount + ChunksPerRecord - 1) / ChunksPerRecord; - } - - static ui64 SizeForChunkCount(ui32 chunkCount) { - // Write chunkCount in first 64 bits of chunk trim record - return RecordsForChunkCount(chunkCount) * sizeof(TChunkTrimInfo); - } - - TChunkTrimInfo(ui64 mask) - : TrimMask(mask) - {} - - void SetChunkTrimmed(ui8 idx) { - Y_VERIFY(idx < ChunksPerRecord); - TrimMask |= (1 << idx); - } - - void SetChunkUntrimmed(ui8 idx) { - Y_VERIFY(idx < ChunksPerRecord); - TrimMask &= ~(1 << idx); - } - - bool IsChunkTrimmed(ui8 idx) { - Y_VERIFY(idx < ChunksPerRecord); - return TrimMask & (1 << idx); - } -}; - +struct TChunkTrimInfo { + ui8 TrimMask; + + static constexpr ui64 ChunksPerRecord = 8; + + static ui64 RecordsForChunkCount(ui32 chunkCount) { + return (chunkCount + ChunksPerRecord - 1) / ChunksPerRecord; + } + + static ui64 SizeForChunkCount(ui32 chunkCount) { + // Write chunkCount in first 64 bits of chunk trim record + return RecordsForChunkCount(chunkCount) * sizeof(TChunkTrimInfo); + } + + TChunkTrimInfo(ui64 mask) + : TrimMask(mask) + {} + + void SetChunkTrimmed(ui8 idx) { + Y_VERIFY(idx < ChunksPerRecord); + TrimMask |= (1 << idx); + } + + void SetChunkUntrimmed(ui8 idx) { + Y_VERIFY(idx < ChunksPerRecord); + TrimMask &= ~(1 << idx); + } + + bool IsChunkTrimmed(ui8 idx) { + Y_VERIFY(idx < ChunksPerRecord); + return TrimMask & (1 << idx); + } +}; + struct TNextLogChunkReference2 { ui32 Version; - TChunkIdx NextChunk; + TChunkIdx NextChunk; TInstant CreatedAt; // Absent in Reference 1 TNextLogChunkReference2() : Version(PDISK_DATA_VERSION_2) , NextChunk(0) - , CreatedAt(TInstant::Now()) - {} + , CreatedAt(TInstant::Now()) + {} +}; + +struct TNextLogChunkReference3 : public TNextLogChunkReference2 { + // Version should be PDISK_DATA_VERSION_3 + + // In typical case should be zero + // In case of splicing means first nonce of next chunk + ui64 NextChunkFirstNonce; + // Should be zero + ui8 IsNotCompatible; + + TNextLogChunkReference3() { + Version = PDISK_DATA_VERSION_3; + IsNotCompatible = 0; + } }; -struct TNextLogChunkReference3 : public TNextLogChunkReference2 { - // Version should be PDISK_DATA_VERSION_3 - - // In typical case should be zero - // In case of splicing means first nonce of next chunk - ui64 NextChunkFirstNonce; - // Should be zero - ui8 IsNotCompatible; - - TNextLogChunkReference3() { - Version = PDISK_DATA_VERSION_3; - IsNotCompatible = 0; - } -}; - #pragma pack(pop) enum EFormatFlags { - FormatFlagErasureEncodeUserChunks = 1, // Deprecated, user chunks is never erasure encoded + FormatFlagErasureEncodeUserChunks = 1, // Deprecated, user chunks is never erasure encoded FormatFlagErasureEncodeUserLog = 1 << 1, // Deprecated, user log is never erasure encoded - FormatFlagErasureEncodeSysLog = 1 << 2, // Always on, flag is useless - FormatFlagErasureEncodeFormat = 1 << 3, // Always on, flag is useless - FormatFlagErasureEncodeNextChunkReference = 1 << 4, // Always on, flag is useless - FormatFlagEncryptFormat = 1 << 5, // Always on, flag is useless - FormatFlagEncryptData = 1 << 6, // Always on, flag is useless + FormatFlagErasureEncodeSysLog = 1 << 2, // Always on, flag is useless + FormatFlagErasureEncodeFormat = 1 << 3, // Always on, flag is useless + FormatFlagErasureEncodeNextChunkReference = 1 << 4, // Always on, flag is useless + FormatFlagEncryptFormat = 1 << 5, // Always on, flag is useless + FormatFlagEncryptData = 1 << 6, // Always on, flag is useless FormatFlagFormatInProgress = 1 << 7, // Not implemented (Must be OFF for a formatted disk) }; @@ -602,36 +602,36 @@ struct TDiskFormat { } } - ui64 Offset(TChunkIdx chunkIdx, ui32 sectorIdx, ui64 offset) const { - return (ui64)ChunkSize * chunkIdx + (ui64)SectorSize * sectorIdx + offset; + ui64 Offset(TChunkIdx chunkIdx, ui32 sectorIdx, ui64 offset) const { + return (ui64)ChunkSize * chunkIdx + (ui64)SectorSize * sectorIdx + offset; } - ui64 Offset(TChunkIdx chunkIdx, ui32 sectorIdx) const { - return (ui64)ChunkSize * chunkIdx + (ui64)SectorSize * sectorIdx; + ui64 Offset(TChunkIdx chunkIdx, ui32 sectorIdx) const { + return (ui64)ChunkSize * chunkIdx + (ui64)SectorSize * sectorIdx; } ui64 SectorPayloadSize() const { - return SectorSize - sizeof(TDataSectorFooter) - CanarySize; + return SectorSize - sizeof(TDataSectorFooter) - CanarySize; } ui32 DiskSizeChunks() const { - return DiskSize / ChunkSize; + return DiskSize / ChunkSize; } ui32 GetUserAccessibleChunkSize() const { - const ui32 userSectors = ChunkSize / SectorSize; - return userSectors * SectorPayloadSize(); + const ui32 userSectors = ChunkSize / SectorSize; + return userSectors * SectorPayloadSize(); } ui32 SysLogSectorsPerRecord() const { - ui32 sectorPayload = SectorSize - CanarySize - sizeof(TDataSectorFooter); - ui32 diskChunks = DiskSizeChunks(); - ui32 baseSysLogRecordSize = sizeof(TSysLogRecord) + ui32 sectorPayload = SectorSize - CanarySize - sizeof(TDataSectorFooter); + ui32 diskChunks = DiskSizeChunks(); + ui32 baseSysLogRecordSize = sizeof(TSysLogRecord) + diskChunks * sizeof(TChunkInfo) - + sizeof(TSysLogFirstNoncesToKeep) - + TChunkTrimInfo::SizeForChunkCount(diskChunks); - ui32 sysLogFirstSectorPayload = sectorPayload - sizeof(TFirstLogPageHeader); - ui32 sysLogExtraSectorPayload = sectorPayload - sizeof(TLogPageHeader); + + sizeof(TSysLogFirstNoncesToKeep) + + TChunkTrimInfo::SizeForChunkCount(diskChunks); + ui32 sysLogFirstSectorPayload = sectorPayload - sizeof(TFirstLogPageHeader); + ui32 sysLogExtraSectorPayload = sectorPayload - sizeof(TLogPageHeader); ui32 sysLogExtraSectorCount = 0; if (baseSysLogRecordSize > sysLogFirstSectorPayload) { ui32 extraSize = baseSysLogRecordSize - sysLogFirstSectorPayload; @@ -647,15 +647,15 @@ struct TDiskFormat { } void PrepareMagic(TKey &key, ui64 nonce, ui64 &magic) { - NPDisk::TPDiskStreamCypher cypher(true); - cypher.SetKey(key); - cypher.StartMessage(nonce); - cypher.InplaceEncrypt(&magic, sizeof(magic)); + NPDisk::TPDiskStreamCypher cypher(true); + cypher.SetKey(key); + cypher.StartMessage(nonce); + cypher.InplaceEncrypt(&magic, sizeof(magic)); } void InitMagic() { MagicFormatChunk = MagicFormatChunkId; - NPDisk::TPDiskHashCalculator hash(false); + NPDisk::TPDiskHashCalculator hash(false); hash.Hash(&Guid, sizeof(Guid)); hash.Hash(&MagicNextLogChunkReferenceId, sizeof(MagicNextLogChunkReferenceId)); MagicNextLogChunkReference = hash.GetHashResult(); @@ -682,7 +682,7 @@ struct TDiskFormat { } bool IsHashOk(ui64 bufferSize) const { - NPDisk::TPDiskHashCalculator hashCalculator(false); + NPDisk::TPDiskHashCalculator hashCalculator(false); if (Version == 2) { ui64 size = (char*)&HashVersion2 - (char*)this; hashCalculator.Hash(this, size); @@ -705,7 +705,7 @@ struct TDiskFormat { void SetHash() { // Set an invalid HashVersion2 to prevent Version2 code from trying to read incompatible disks { - NPDisk::TPDiskHashCalculator hashCalculator(false); + NPDisk::TPDiskHashCalculator hashCalculator(false); ui64 size = (char*)&HashVersion2 - (char*)this; hashCalculator.Hash(this, size); HashVersion2 = hashCalculator.GetHashResult(); @@ -714,7 +714,7 @@ struct TDiskFormat { } // Set Hash { - NPDisk::TPDiskHashCalculator hashCalculator(false); + NPDisk::TPDiskHashCalculator hashCalculator(false); Y_VERIFY(DiskFormatSize > sizeof(THash)); ui64 size = DiskFormatSize - sizeof(THash); hashCalculator.Hash(this, size); diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_defs.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_defs.h index d2557d81ef..cc63207fc7 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_defs.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_defs.h @@ -3,37 +3,37 @@ #include <ydb/core/base/blobstorage.h> #include <ydb/core/protos/blobstorage.pb.h> -#include "blobstorage_pdisk_signature.h" - +#include "blobstorage_pdisk_signature.h" + #include <ydb/core/util/text.h> - + namespace NKikimr { namespace NPDisk { - struct TPrintable_ui8 { - ui8 Val; - - TPrintable_ui8(ui8 val = 0) - : Val(val) - {} - - // To make possible usage of class instances in array indexing - operator ui64() const { - return Val; - } - - TPrintable_ui8& operator--() { - Val--; - return *this; - } - - TPrintable_ui8& operator++() { - Val++; - return *this; - } - }; - - typedef TPrintable_ui8 TOwner; + struct TPrintable_ui8 { + ui8 Val; + + TPrintable_ui8(ui8 val = 0) + : Val(val) + {} + + // To make possible usage of class instances in array indexing + operator ui64() const { + return Val; + } + + TPrintable_ui8& operator--() { + Val--; + return *this; + } + + TPrintable_ui8& operator++() { + Val++; + return *this; + } + }; + + typedef TPrintable_ui8 TOwner; typedef ui64 TOwnerRound; typedef ui32 TStatusFlags; typedef ui64 TKey; @@ -49,57 +49,57 @@ namespace NKikimr { {} }; - // using TLogPosition = std::pair<TChunkIdx, ui32>; - struct TLogPosition { - TChunkIdx ChunkIdx = 0; - ui32 OffsetInChunk = 0; - - constexpr static TLogPosition Invalid() { - return {Max<TChunkIdx>(), Max<ui32>()}; - } - }; - - inline bool operator==(const TLogPosition& x, const TLogPosition& y) { - return x.ChunkIdx == y.ChunkIdx && x.OffsetInChunk == y.OffsetInChunk; - } - - inline bool operator!=(const TLogPosition& x, const TLogPosition& y) { - return !(x == y); - } - + // using TLogPosition = std::pair<TChunkIdx, ui32>; + struct TLogPosition { + TChunkIdx ChunkIdx = 0; + ui32 OffsetInChunk = 0; + + constexpr static TLogPosition Invalid() { + return {Max<TChunkIdx>(), Max<ui32>()}; + } + }; + + inline bool operator==(const TLogPosition& x, const TLogPosition& y) { + return x.ChunkIdx == y.ChunkIdx && x.OffsetInChunk == y.OffsetInChunk; + } + + inline bool operator!=(const TLogPosition& x, const TLogPosition& y) { + return !(x == y); + } + // Flag values for TStatusFlags ydb/core/protos/blobstorage.proto EStatusFlags - inline TString StatusFlagsToString(TStatusFlags flags) { - TStringStream str; - bool isFirst = true; - isFirst = NText::OutFlag(isFirst, flags == 0, "None", str); - isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusIsValid), "IsValid", str); - isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusDiskSpaceCyan), "DiskSpaceCyan", str); + inline TString StatusFlagsToString(TStatusFlags flags) { + TStringStream str; + bool isFirst = true; + isFirst = NText::OutFlag(isFirst, flags == 0, "None", str); + isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusIsValid), "IsValid", str); + isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusDiskSpaceCyan), "DiskSpaceCyan", str); isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusDiskSpaceLightYellowMove), "DiskSpaceLightYellowMove", str); isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusDiskSpaceYellowStop), "DiskSpaceYellowStop", str); - isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusDiskSpaceLightOrange), "DiskSpaceLightOrange", str); - isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusDiskSpaceOrange), "DiskSpaceOrange", str); - isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusDiskSpaceRed), "DiskSpaceRed", str); - isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusDiskSpaceBlack), "DiskSpaceBlack", str); - isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusNewOwner), "NewOwner", str); - isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusNotEnoughDiskSpaceForOperation), "NotEnoughDiskSpaceForOperation", str); - NText::OutFlag(isFirst, isFirst, "Unknown", str); - return str.Str(); - } + isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusDiskSpaceLightOrange), "DiskSpaceLightOrange", str); + isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusDiskSpaceOrange), "DiskSpaceOrange", str); + isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusDiskSpaceRed), "DiskSpaceRed", str); + isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusDiskSpaceBlack), "DiskSpaceBlack", str); + isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusNewOwner), "NewOwner", str); + isFirst = NText::OutFlag(isFirst, flags & ui32(NKikimrBlobStorage::StatusNotEnoughDiskSpaceForOperation), "NotEnoughDiskSpaceForOperation", str); + NText::OutFlag(isFirst, isFirst, "Unknown", str); + return str.Str(); + } } // NPDisk } // NKikimr - -template<> -inline void Out<NKikimr::NPDisk::TLogPosition>(IOutputStream& os, const NKikimr::NPDisk::TLogPosition& pos) { - os << "{"; - os << " ChunkIdx# " << pos.ChunkIdx; - os << " OffsetInChunk# " << pos.OffsetInChunk; - os << "}"; -} - -template<> -inline void Out<NKikimr::NPDisk::TOwner>(IOutputStream& os, const NKikimr::NPDisk::TPrintable_ui8& x) { - os << static_cast<ui64>(x); -} - + +template<> +inline void Out<NKikimr::NPDisk::TLogPosition>(IOutputStream& os, const NKikimr::NPDisk::TLogPosition& pos) { + os << "{"; + os << " ChunkIdx# " << pos.ChunkIdx; + os << " OffsetInChunk# " << pos.OffsetInChunk; + os << "}"; +} + +template<> +inline void Out<NKikimr::NPDisk::TOwner>(IOutputStream& os, const NKikimr::NPDisk::TPrintable_ui8& x) { + os << static_cast<ui64>(x); +} + diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_drivedata.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_drivedata.cpp index d6fe73d344..9a19772f59 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_drivedata.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_drivedata.cpp @@ -1,58 +1,58 @@ -#include "defs.h" - -#include "blobstorage_pdisk_drivedata.h" - +#include "defs.h" + +#include "blobstorage_pdisk_drivedata.h" + #include <ydb/core/base/blobstorage.h> #include <ydb/core/protos/blobstorage.pb.h> - -namespace NKikimr { -namespace NPDisk { - -TDriveData::TDriveData(const NKikimrBlobStorage::TDriveData& p) - : Path(p.GetPath()) - , SerialNumber(p.GetSerialNumber()) - , FirmwareRevision(p.GetFirmwareRevision()) - , ModelNumber(p.GetModelNumber()) - , DeviceType(PDiskTypeToPDiskType(p.GetDeviceType())) - , Size(p.HasSize() ? p.GetSize() : 0) - , IsMock(p.HasIsMock() ? p.GetIsMock() : false) -{} - -TString TDriveData::ToString(bool isMultiline) const { - const char *x = isMultiline ? "\n" : ""; - TStringStream str; - str << "{Path# " << Path.Quote() << x - << " IsWriteCacheValid# " << IsWriteCacheValid << x - << " IsWriteCacheEnabled# " << IsWriteCacheEnabled << x - << " ModelNumber# " << ModelNumber.Quote() << x - << " SerialNumber# " << SerialNumber.Quote() << x - << " FirmwareRevision# " << FirmwareRevision.Quote() << x - << " DeviceType# " << TPDiskCategory::DeviceTypeStr(DeviceType, true) << x - << "}" << x; - return str.Str(); -} - -void TDriveData::ToProto(NKikimrBlobStorage::TDriveData *p) const { - p->SetPath(Path); - p->SetSerialNumber(SerialNumber); - p->SetFirmwareRevision(FirmwareRevision); - p->SetModelNumber(ModelNumber); - p->SetDeviceType(PDiskTypeToPDiskType(DeviceType)); - p->SetSize(Size); - p->SetIsMock(IsMock); -} - -bool operator==(const NPDisk::TDriveData& lhs, const NPDisk::TDriveData& rhs) { - NKikimrBlobStorage::TDriveData l_p; - lhs.ToProto(&l_p); - TString l_s; - Y_PROTOBUF_SUPPRESS_NODISCARD l_p.SerializeToString(&l_s); - NKikimrBlobStorage::TDriveData r_p; - rhs.ToProto(&r_p); - TString r_s; - Y_PROTOBUF_SUPPRESS_NODISCARD r_p.SerializeToString(&r_s); - return l_s == r_s; -} - -} // NPDisk -} // NKikimr + +namespace NKikimr { +namespace NPDisk { + +TDriveData::TDriveData(const NKikimrBlobStorage::TDriveData& p) + : Path(p.GetPath()) + , SerialNumber(p.GetSerialNumber()) + , FirmwareRevision(p.GetFirmwareRevision()) + , ModelNumber(p.GetModelNumber()) + , DeviceType(PDiskTypeToPDiskType(p.GetDeviceType())) + , Size(p.HasSize() ? p.GetSize() : 0) + , IsMock(p.HasIsMock() ? p.GetIsMock() : false) +{} + +TString TDriveData::ToString(bool isMultiline) const { + const char *x = isMultiline ? "\n" : ""; + TStringStream str; + str << "{Path# " << Path.Quote() << x + << " IsWriteCacheValid# " << IsWriteCacheValid << x + << " IsWriteCacheEnabled# " << IsWriteCacheEnabled << x + << " ModelNumber# " << ModelNumber.Quote() << x + << " SerialNumber# " << SerialNumber.Quote() << x + << " FirmwareRevision# " << FirmwareRevision.Quote() << x + << " DeviceType# " << TPDiskCategory::DeviceTypeStr(DeviceType, true) << x + << "}" << x; + return str.Str(); +} + +void TDriveData::ToProto(NKikimrBlobStorage::TDriveData *p) const { + p->SetPath(Path); + p->SetSerialNumber(SerialNumber); + p->SetFirmwareRevision(FirmwareRevision); + p->SetModelNumber(ModelNumber); + p->SetDeviceType(PDiskTypeToPDiskType(DeviceType)); + p->SetSize(Size); + p->SetIsMock(IsMock); +} + +bool operator==(const NPDisk::TDriveData& lhs, const NPDisk::TDriveData& rhs) { + NKikimrBlobStorage::TDriveData l_p; + lhs.ToProto(&l_p); + TString l_s; + Y_PROTOBUF_SUPPRESS_NODISCARD l_p.SerializeToString(&l_s); + NKikimrBlobStorage::TDriveData r_p; + rhs.ToProto(&r_p); + TString r_s; + Y_PROTOBUF_SUPPRESS_NODISCARD r_p.SerializeToString(&r_s); + return l_s == r_s; +} + +} // NPDisk +} // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_drivedata.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_drivedata.h index 8610f09b9c..5d024f0351 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_drivedata.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_drivedata.h @@ -1,36 +1,36 @@ #pragma once #include <ydb/core/base/blobstorage_pdisk_category.h> -#include <util/generic/string.h> - -namespace NKikimrBlobStorage { -class TDriveData; -} - +#include <util/generic/string.h> + +namespace NKikimrBlobStorage { +class TDriveData; +} + namespace NKikimr { namespace NPDisk { struct TDriveData { - TString Path; - bool IsWriteCacheValid = false; - bool IsWriteCacheEnabled = false; + TString Path; + bool IsWriteCacheValid = false; + bool IsWriteCacheEnabled = false; TString SerialNumber; TString FirmwareRevision; TString ModelNumber; - TPDiskCategory::EDeviceType DeviceType = TPDiskCategory::DEVICE_TYPE_UNKNOWN; - ui64 Size = 0; - bool IsMock = false; + TPDiskCategory::EDeviceType DeviceType = TPDiskCategory::DEVICE_TYPE_UNKNOWN; + ui64 Size = 0; + bool IsMock = false; + + TDriveData() = default; - TDriveData() = default; - - TDriveData(const NKikimrBlobStorage::TDriveData& p); + TDriveData(const NKikimrBlobStorage::TDriveData& p); - TString ToString(bool isMultiline) const; - - void ToProto(NKikimrBlobStorage::TDriveData *p) const; + TString ToString(bool isMultiline) const; + + void ToProto(NKikimrBlobStorage::TDriveData *p) const; }; -bool operator==(const NPDisk::TDriveData& lhs, const NPDisk::TDriveData& rhs); - +bool operator==(const NPDisk::TDriveData& lhs, const NPDisk::TDriveData& rhs); + } // NPDisk } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_driveestimator.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_driveestimator.cpp index eb640b3bc6..3419eeda6f 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_driveestimator.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_driveestimator.cpp @@ -1,283 +1,283 @@ #include "blobstorage_pdisk_driveestimator.h" #include "blobstorage_pdisk_util_wcache.h" - + #include <ydb/library/pdisk_io/aio.h> - -#include <util/system/align.h> - -namespace NKikimr { -namespace NPDisk { - -//////////////////////////////////////////////////////////////////////////////// -// TLoadCompl -//////////////////////////////////////////////////////////////////////////////// - -TDriveEstimator::TLoadCompl::TLoadCompl(TDriveEstimator *estimator) - : Estimator(estimator) -{} - -void TDriveEstimator::TLoadCompl::Exec(TActorSystem *actorSystem) { - Y_UNUSED(actorSystem); - if (AtomicIncrement(Estimator->Counter) == Estimator->Repeats) { - TGuard<TMutex> grd(Estimator->Mtx); - if (AtomicGet(Estimator->Counter) == Estimator->Repeats) { - Estimator->CondVar.Signal(); - } - } - delete this; -} - -void TDriveEstimator::TLoadCompl::Release(TActorSystem *actorSystem) { - Y_UNUSED(actorSystem); - delete this; -} - -//////////////////////////////////////////////////////////////////////////////// -// TSeekCompl -//////////////////////////////////////////////////////////////////////////////// - -TDriveEstimator::TSeekCompl::TSeekCompl(TDriveEstimator *estimator, ui32 counter, NHPTimer::STime prevCompletionTime) - : Estimator(estimator) - , Counter(counter) - , PrevCompletionTime(prevCompletionTime) -{} - -void TDriveEstimator::TSeekCompl::Exec(TActorSystem *actorSystem) { - Y_UNUSED(actorSystem); - if (Counter == Estimator->Repeats) { - TGuard<TMutex> grd(Estimator->Mtx); - Estimator->CondVar.Signal(); - } else { - const NHPTimer::STime now = HPNow(); - if (Counter > 1) { - Estimator->Durations.push_back(now - PrevCompletionTime); - } - Estimator->Device->PwriteAsync(Estimator->Buffer->Data(), TDriveEstimator::SeekBufferSize, - TDriveEstimator::SeekBufferSize * (Counter + 1), new TSeekCompl(Estimator, Counter + 1, now), + +#include <util/system/align.h> + +namespace NKikimr { +namespace NPDisk { + +//////////////////////////////////////////////////////////////////////////////// +// TLoadCompl +//////////////////////////////////////////////////////////////////////////////// + +TDriveEstimator::TLoadCompl::TLoadCompl(TDriveEstimator *estimator) + : Estimator(estimator) +{} + +void TDriveEstimator::TLoadCompl::Exec(TActorSystem *actorSystem) { + Y_UNUSED(actorSystem); + if (AtomicIncrement(Estimator->Counter) == Estimator->Repeats) { + TGuard<TMutex> grd(Estimator->Mtx); + if (AtomicGet(Estimator->Counter) == Estimator->Repeats) { + Estimator->CondVar.Signal(); + } + } + delete this; +} + +void TDriveEstimator::TLoadCompl::Release(TActorSystem *actorSystem) { + Y_UNUSED(actorSystem); + delete this; +} + +//////////////////////////////////////////////////////////////////////////////// +// TSeekCompl +//////////////////////////////////////////////////////////////////////////////// + +TDriveEstimator::TSeekCompl::TSeekCompl(TDriveEstimator *estimator, ui32 counter, NHPTimer::STime prevCompletionTime) + : Estimator(estimator) + , Counter(counter) + , PrevCompletionTime(prevCompletionTime) +{} + +void TDriveEstimator::TSeekCompl::Exec(TActorSystem *actorSystem) { + Y_UNUSED(actorSystem); + if (Counter == Estimator->Repeats) { + TGuard<TMutex> grd(Estimator->Mtx); + Estimator->CondVar.Signal(); + } else { + const NHPTimer::STime now = HPNow(); + if (Counter > 1) { + Estimator->Durations.push_back(now - PrevCompletionTime); + } + Estimator->Device->PwriteAsync(Estimator->Buffer->Data(), TDriveEstimator::SeekBufferSize, + TDriveEstimator::SeekBufferSize * (Counter + 1), new TSeekCompl(Estimator, Counter + 1, now), TReqId(TReqId::EstimatorSeekCompl, 0), nullptr); - } - delete this; -} - -void TDriveEstimator::TSeekCompl::Release(TActorSystem *actorSystem) { - Y_UNUSED(actorSystem); - delete this; -} - -//////////////////////////////////////////////////////////////////////////////// -// TDriveEstimator -//////////////////////////////////////////////////////////////////////////////// - -ui64 TDriveEstimator::EstimateSeekTimeNs() { - TGuard<TMutex> grd(Mtx); - Counter = 0; - - NHPTimer::STime start = HPNow(); + } + delete this; +} + +void TDriveEstimator::TSeekCompl::Release(TActorSystem *actorSystem) { + Y_UNUSED(actorSystem); + delete this; +} + +//////////////////////////////////////////////////////////////////////////////// +// TDriveEstimator +//////////////////////////////////////////////////////////////////////////////// + +ui64 TDriveEstimator::EstimateSeekTimeNs() { + TGuard<TMutex> grd(Mtx); + Counter = 0; + + NHPTimer::STime start = HPNow(); Device->PwriteAsync(Buffer->Data(), SeekBufferSize, 0, new TSeekCompl(this, 0, start), TReqId(TReqId::EstimatorSeekTimeNs, 0), nullptr); - CondVar.WaitI(Mtx); - grd.Release(); - - NHPTimer::STime seekTime = Durations.front(); - for (ui32 i = 0; i < Durations.size(); ++i) { - seekTime = Min(seekTime, Durations[i]); - } - Durations.clear(); - - return HPNanoSeconds(seekTime); -} - -void TDriveEstimator::EstimateSpeed(const bool isAtDriveBegin, ui64 outSpeed[TDriveModel::OP_TYPE_COUNT]) { - NHPTimer::STime start = HPNow(); - constexpr ui64 operationSize = 1 << 20; - static_assert(operationSize <= BufferSize, "operationSize must be less than or equal to BufferSize"); - TGuard<TMutex> grd(Mtx); - Counter = 0; - - for (ui32 i = 0; i < Repeats; ++i) { - ui64 offset; - if (isAtDriveBegin) { - offset = i * operationSize; - } else { - offset = DriveSize - (Repeats - i) * operationSize; - offset = AlignDown<ui64>(offset, SectorSize); - } + CondVar.WaitI(Mtx); + grd.Release(); + + NHPTimer::STime seekTime = Durations.front(); + for (ui32 i = 0; i < Durations.size(); ++i) { + seekTime = Min(seekTime, Durations[i]); + } + Durations.clear(); + + return HPNanoSeconds(seekTime); +} + +void TDriveEstimator::EstimateSpeed(const bool isAtDriveBegin, ui64 outSpeed[TDriveModel::OP_TYPE_COUNT]) { + NHPTimer::STime start = HPNow(); + constexpr ui64 operationSize = 1 << 20; + static_assert(operationSize <= BufferSize, "operationSize must be less than or equal to BufferSize"); + TGuard<TMutex> grd(Mtx); + Counter = 0; + + for (ui32 i = 0; i < Repeats; ++i) { + ui64 offset; + if (isAtDriveBegin) { + offset = i * operationSize; + } else { + offset = DriveSize - (Repeats - i) * operationSize; + offset = AlignDown<ui64>(offset, SectorSize); + } Device->PwriteAsync(Buffer->Data(), operationSize, offset, new TLoadCompl(this), TReqId(TReqId::EstimatorSpeed1, 0), nullptr); - } - - CondVar.WaitI(Mtx); - - double elapsed = HPSecondsFloat(HPNow() - start); - outSpeed[TDriveModel::OP_TYPE_WRITE] = (double)operationSize * Repeats / elapsed; - - Counter = 0; - start = HPNow(); - - for (ui32 i = 0; i < Repeats; ++i) { - ui64 offset; - if (isAtDriveBegin) { - offset = i * operationSize; - } else { - offset = DriveSize - (Repeats - i) * operationSize; - offset = AlignDown<ui64>(offset, SectorSize); - } + } + + CondVar.WaitI(Mtx); + + double elapsed = HPSecondsFloat(HPNow() - start); + outSpeed[TDriveModel::OP_TYPE_WRITE] = (double)operationSize * Repeats / elapsed; + + Counter = 0; + start = HPNow(); + + for (ui32 i = 0; i < Repeats; ++i) { + ui64 offset; + if (isAtDriveBegin) { + offset = i * operationSize; + } else { + offset = DriveSize - (Repeats - i) * operationSize; + offset = AlignDown<ui64>(offset, SectorSize); + } Device->PreadAsync(Buffer->Data(), operationSize, offset, new TLoadCompl(this), TReqId(TReqId::EstimatorSpeed2, 0), nullptr); - } - - CondVar.WaitI(Mtx); - - elapsed = HPSecondsFloat(HPNow() - start); - outSpeed[TDriveModel::OP_TYPE_READ] = (double)operationSize * Repeats / elapsed; - outSpeed[TDriveModel::OP_TYPE_AVG] = (outSpeed[TDriveModel::OP_TYPE_READ] + outSpeed[TDriveModel::OP_TYPE_WRITE]) / 2; -} - -ui64 TDriveEstimator::EstimateTrimSpeed() { - constexpr ui64 trimSize = 1ull << 20; - static_assert(trimSize <= BufferSize, "trimSize must be less than or equal to BufferSize"); - TGuard<TMutex> grd(Mtx); - Counter = 0; - - for (ui32 i = 0; i < Repeats; ++i) { - Device->PwriteAsync(Buffer->Data(), trimSize, i * trimSize, + } + + CondVar.WaitI(Mtx); + + elapsed = HPSecondsFloat(HPNow() - start); + outSpeed[TDriveModel::OP_TYPE_READ] = (double)operationSize * Repeats / elapsed; + outSpeed[TDriveModel::OP_TYPE_AVG] = (outSpeed[TDriveModel::OP_TYPE_READ] + outSpeed[TDriveModel::OP_TYPE_WRITE]) / 2; +} + +ui64 TDriveEstimator::EstimateTrimSpeed() { + constexpr ui64 trimSize = 1ull << 20; + static_assert(trimSize <= BufferSize, "trimSize must be less than or equal to BufferSize"); + TGuard<TMutex> grd(Mtx); + Counter = 0; + + for (ui32 i = 0; i < Repeats; ++i) { + Device->PwriteAsync(Buffer->Data(), trimSize, i * trimSize, new TLoadCompl(this), TReqId(TReqId::EstimatorTrimSpeed1, 0), nullptr); - } - CondVar.WaitI(Mtx); - Counter = 0; - - NHPTimer::STime start = HPNow(); - for (ui32 i = 0; i < Repeats; ++i) { - Device->TrimSync(trimSize, i * trimSize); - } - - if (Device->GetIsTrimEnabled()) { - const double elapsed = HPSecondsFloat(HPNow() - start); - return (double)trimSize * Repeats / elapsed; - } else { - return 0; - } -} - -ui64 TDriveEstimator::MeasureOperationDuration(const ui32 type, const ui64 size) { - constexpr ui32 eventsToSkip = Repeats / 4; - - TStackVec<TLoadCompl*, Repeats> completions; - for (ui32 repeat = 0; repeat < Repeats; ++repeat) { - completions.push_back(new TLoadCompl(this)); - } - - TGuard<TMutex> grd(Mtx); - NHPTimer::STime start = HPNow(); - Counter = 0; - for (ui32 repeat = 0; repeat < Repeats; ++repeat) { - switch (type) { - case TDriveModel::OP_TYPE_READ: - Device->PreadAsync(Buffer->Data(), size, repeat * size, completions[repeat], + } + CondVar.WaitI(Mtx); + Counter = 0; + + NHPTimer::STime start = HPNow(); + for (ui32 i = 0; i < Repeats; ++i) { + Device->TrimSync(trimSize, i * trimSize); + } + + if (Device->GetIsTrimEnabled()) { + const double elapsed = HPSecondsFloat(HPNow() - start); + return (double)trimSize * Repeats / elapsed; + } else { + return 0; + } +} + +ui64 TDriveEstimator::MeasureOperationDuration(const ui32 type, const ui64 size) { + constexpr ui32 eventsToSkip = Repeats / 4; + + TStackVec<TLoadCompl*, Repeats> completions; + for (ui32 repeat = 0; repeat < Repeats; ++repeat) { + completions.push_back(new TLoadCompl(this)); + } + + TGuard<TMutex> grd(Mtx); + NHPTimer::STime start = HPNow(); + Counter = 0; + for (ui32 repeat = 0; repeat < Repeats; ++repeat) { + switch (type) { + case TDriveModel::OP_TYPE_READ: + Device->PreadAsync(Buffer->Data(), size, repeat * size, completions[repeat], TReqId(TReqId::EstimatorDurationRead, 0), nullptr); - break; - case TDriveModel::OP_TYPE_WRITE: - Device->PwriteAsync(Buffer->Data(), size, repeat * size, completions[repeat], + break; + case TDriveModel::OP_TYPE_WRITE: + Device->PwriteAsync(Buffer->Data(), size, repeat * size, completions[repeat], TReqId(TReqId::EstimatorDurationWrite, 0), nullptr); - break; - default: - Y_FAIL(); - } - if (repeat == eventsToSkip) { - start = HPNow(); - } - } - NHPTimer::STime now = HPNow(); - CondVar.WaitI(Mtx); - return HPNanoSeconds(now - start) / (Repeats - 1 - eventsToSkip); -} - -void TDriveEstimator::EstimateGlueingDeadline(ui32 outGlueingDeadline[TDriveModel::OP_TYPE_COUNT]) { - constexpr ui32 maxSize = 128u << 10; - - for (ui32 type = TDriveModel::OP_TYPE_READ; type <= TDriveModel::OP_TYPE_WRITE; ++type) { - TStackVec<ui64, 32> durations; - durations.push_back(Max<ui64>()); - for (ui32 sizeIdx = 1; sizeIdx <= maxSize / SeekBufferSize; ++sizeIdx) { - const ui64 size = sizeIdx * SeekBufferSize; - const ui64 durationNs = MeasureOperationDuration(type, size); - durations.push_back(durationNs); - } - - const ui64 minDuration = *MinElement(durations.begin(), durations.end()); - const ui64 threshold = minDuration * 2; - ui32 lowerSize = SeekBufferSize; - ui32 upperSize = SeekBufferSize; - for (ui32 i = 1; i < durations.size(); ++i) { - if (durations[i] < threshold) { - if (lowerSize == SeekBufferSize) { - lowerSize = i * SeekBufferSize; - } - upperSize = i * SeekBufferSize; - } - } - durations.clear(); - - ui64 glueingDeadlineMin = Max<ui64>(); - for (ui32 size = lowerSize; size <= upperSize; size += 512) { - const ui64 durationNs = MeasureOperationDuration(type, size); - glueingDeadlineMin = Min(glueingDeadlineMin, durationNs); - } - outGlueingDeadline[type] = glueingDeadlineMin; - } - outGlueingDeadline[TDriveModel::OP_TYPE_AVG] = (outGlueingDeadline[TDriveModel::OP_TYPE_READ] - + outGlueingDeadline[TDriveModel::OP_TYPE_WRITE]) / 2; -} - -TDriveEstimator::TDriveEstimator(const TString filename) + break; + default: + Y_FAIL(); + } + if (repeat == eventsToSkip) { + start = HPNow(); + } + } + NHPTimer::STime now = HPNow(); + CondVar.WaitI(Mtx); + return HPNanoSeconds(now - start) / (Repeats - 1 - eventsToSkip); +} + +void TDriveEstimator::EstimateGlueingDeadline(ui32 outGlueingDeadline[TDriveModel::OP_TYPE_COUNT]) { + constexpr ui32 maxSize = 128u << 10; + + for (ui32 type = TDriveModel::OP_TYPE_READ; type <= TDriveModel::OP_TYPE_WRITE; ++type) { + TStackVec<ui64, 32> durations; + durations.push_back(Max<ui64>()); + for (ui32 sizeIdx = 1; sizeIdx <= maxSize / SeekBufferSize; ++sizeIdx) { + const ui64 size = sizeIdx * SeekBufferSize; + const ui64 durationNs = MeasureOperationDuration(type, size); + durations.push_back(durationNs); + } + + const ui64 minDuration = *MinElement(durations.begin(), durations.end()); + const ui64 threshold = minDuration * 2; + ui32 lowerSize = SeekBufferSize; + ui32 upperSize = SeekBufferSize; + for (ui32 i = 1; i < durations.size(); ++i) { + if (durations[i] < threshold) { + if (lowerSize == SeekBufferSize) { + lowerSize = i * SeekBufferSize; + } + upperSize = i * SeekBufferSize; + } + } + durations.clear(); + + ui64 glueingDeadlineMin = Max<ui64>(); + for (ui32 size = lowerSize; size <= upperSize; size += 512) { + const ui64 durationNs = MeasureOperationDuration(type, size); + glueingDeadlineMin = Min(glueingDeadlineMin, durationNs); + } + outGlueingDeadline[type] = glueingDeadlineMin; + } + outGlueingDeadline[TDriveModel::OP_TYPE_AVG] = (outGlueingDeadline[TDriveModel::OP_TYPE_READ] + + outGlueingDeadline[TDriveModel::OP_TYPE_WRITE]) / 2; +} + +TDriveEstimator::TDriveEstimator(const TString filename) : Filename(filename) , Counters(new NMonitoring::TDynamicCounters()) , PDiskMon(Counters, 0, nullptr) - , ActorSystemCreator(new TActorSystemCreator) - , ActorSystem(ActorSystemCreator->GetActorSystem()) - , QueueDepth(4) - , Device(CreateRealBlockDevice(filename, 0, PDiskMon, 50, 0, QueueDepth, TDeviceMode::LockFile, 128, nullptr)) - , BufferPool(CreateBufferPool(BufferSize, 1, false, {})) - , Buffer(BufferPool->Pop()) -{ + , ActorSystemCreator(new TActorSystemCreator) + , ActorSystem(ActorSystemCreator->GetActorSystem()) + , QueueDepth(4) + , Device(CreateRealBlockDevice(filename, 0, PDiskMon, 50, 0, QueueDepth, TDeviceMode::LockFile, 128, nullptr)) + , BufferPool(CreateBufferPool(BufferSize, 1, false, {})) + , Buffer(BufferPool->Pop()) +{ memset(Buffer->Data(), 7, Buffer->Size()); // Initialize the buffer so that Valgrind does not complain - bool isBlockDevice = false; - ActorSystem->AppData<TAppData>()->IoContextFactory->DetectFileParameters(filename, DriveSize, isBlockDevice); - Y_VERIFY(Buffer->Size() * Repeats < DriveSize); - Device->Initialize(ActorSystem, {}); - Y_VERIFY_S(Device->IsGood(), "Cannot Initialize TBlockDevice"); -} - -TDriveModel TDriveEstimator::EstimateDriveModel() { - TDriveModel model; - for (ui32 type = TDriveModel::OP_TYPE_READ; type <= TDriveModel::OP_TYPE_AVG; ++type) { - model.OptimalQueueDepth[type] = QueueDepth; - } - model.SeekTimeNsec = EstimateSeekTimeNs(); - ui64 speedBpsMin[TDriveModel::OP_TYPE_COUNT]; - ui64 speedBpsMax[TDriveModel::OP_TYPE_COUNT]; - EstimateSpeed(false, speedBpsMin); - EstimateSpeed(true, speedBpsMax); - - for (ui32 type = TDriveModel::OP_TYPE_READ; type <= TDriveModel::OP_TYPE_AVG; ++type) { - model.SpeedBpsMin[type] = Min(speedBpsMin[type], speedBpsMax[type]); - model.SpeedBpsMax[type] = Max(speedBpsMin[type], speedBpsMax[type]); - model.SpeedBps[type] = (model.SpeedBpsMax[type] + model.SpeedBpsMin[type]) / 2; - } - model.TrimSpeedBps = EstimateTrimSpeed(); - EstimateGlueingDeadline(model.GlueingDeadline); + bool isBlockDevice = false; + ActorSystem->AppData<TAppData>()->IoContextFactory->DetectFileParameters(filename, DriveSize, isBlockDevice); + Y_VERIFY(Buffer->Size() * Repeats < DriveSize); + Device->Initialize(ActorSystem, {}); + Y_VERIFY_S(Device->IsGood(), "Cannot Initialize TBlockDevice"); +} + +TDriveModel TDriveEstimator::EstimateDriveModel() { + TDriveModel model; + for (ui32 type = TDriveModel::OP_TYPE_READ; type <= TDriveModel::OP_TYPE_AVG; ++type) { + model.OptimalQueueDepth[type] = QueueDepth; + } + model.SeekTimeNsec = EstimateSeekTimeNs(); + ui64 speedBpsMin[TDriveModel::OP_TYPE_COUNT]; + ui64 speedBpsMax[TDriveModel::OP_TYPE_COUNT]; + EstimateSpeed(false, speedBpsMin); + EstimateSpeed(true, speedBpsMax); + + for (ui32 type = TDriveModel::OP_TYPE_READ; type <= TDriveModel::OP_TYPE_AVG; ++type) { + model.SpeedBpsMin[type] = Min(speedBpsMin[type], speedBpsMax[type]); + model.SpeedBpsMax[type] = Max(speedBpsMin[type], speedBpsMax[type]); + model.SpeedBps[type] = (model.SpeedBpsMax[type] + model.SpeedBpsMin[type]) / 2; + } + model.TrimSpeedBps = EstimateTrimSpeed(); + EstimateGlueingDeadline(model.GlueingDeadline); // Get the metadata - TDriveData driveData = Device->GetDriveData(); - model.ModelSource = NKikimrBlobStorage::TDriveModel::SourceLocalMeasure; - model.SourceSerialNumber = driveData.SerialNumber; - model.SourceFirmwareRevision = driveData.FirmwareRevision; - model.SourceModelNumber = driveData.ModelNumber; - model.IsSourceSharedWithOs = false; // TODO(cthulhu): obtain this data - model.IsSourceWriteCacheEnabled = driveData.IsWriteCacheEnabled; - - return model; -} - -} -} + TDriveData driveData = Device->GetDriveData(); + model.ModelSource = NKikimrBlobStorage::TDriveModel::SourceLocalMeasure; + model.SourceSerialNumber = driveData.SerialNumber; + model.SourceFirmwareRevision = driveData.FirmwareRevision; + model.SourceModelNumber = driveData.ModelNumber; + model.IsSourceSharedWithOs = false; // TODO(cthulhu): obtain this data + model.IsSourceWriteCacheEnabled = driveData.IsWriteCacheEnabled; + + return model; +} + +} +} diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_driveestimator.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_driveestimator.h index 3ea6c203af..8855da6c21 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_driveestimator.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_driveestimator.h @@ -1,78 +1,78 @@ -#pragma once - -#include "defs.h" - -#include "blobstorage_pdisk_blockdevice.h" +#pragma once + +#include "defs.h" + +#include "blobstorage_pdisk_blockdevice.h" #include <ydb/library/pdisk_io/buffers.h> -#include "blobstorage_pdisk_drivemodel.h" -#include "blobstorage_pdisk_actorsystem_creator.h" -#include "blobstorage_pdisk_mon.h" - -#include <util/system/hp_timer.h> -#include <util/system/atomic.h> -#include <util/system/condvar.h> +#include "blobstorage_pdisk_drivemodel.h" +#include "blobstorage_pdisk_actorsystem_creator.h" +#include "blobstorage_pdisk_mon.h" + +#include <util/system/hp_timer.h> +#include <util/system/atomic.h> +#include <util/system/condvar.h> #include <library/cpp/actors/core/actorsystem.h> - -namespace NKikimr { -namespace NPDisk { - -class TDriveEstimator { + +namespace NKikimr { +namespace NPDisk { + +class TDriveEstimator { TString Filename; - TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; - TPDiskMon PDiskMon; - std::unique_ptr<TActorSystemCreator> ActorSystemCreator; - TActorSystem *ActorSystem; - const ui32 QueueDepth; - THolder<IBlockDevice> Device; - ui64 DriveSize; - THolder<TBufferPool> BufferPool; - TBuffer::TPtr Buffer; + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + TPDiskMon PDiskMon; + std::unique_ptr<TActorSystemCreator> ActorSystemCreator; + TActorSystem *ActorSystem; + const ui32 QueueDepth; + THolder<IBlockDevice> Device; + ui64 DriveSize; + THolder<TBufferPool> BufferPool; + TBuffer::TPtr Buffer; TAtomic Counter = 0; - TMutex Mtx; - TCondVar CondVar; - static constexpr ui32 Repeats = 128; - static constexpr ui32 BufferSize = 128 << 20; - static constexpr ui32 SectorSize = 4096; - static constexpr ui32 SeekBufferSize = 4096; - TVector<NHPTimer::STime> Durations; - - struct TLoadCompl : public NPDisk::TCompletionAction { - TDriveEstimator *Estimator; - - TLoadCompl(TDriveEstimator *estimator); - - void Exec(TActorSystem *actorSystem) override; - - void Release(TActorSystem *actorSystem) override; - }; - - struct TSeekCompl : public NPDisk::TCompletionAction { - TDriveEstimator *Estimator; - ui32 Counter; - NHPTimer::STime PrevCompletionTime; - - TSeekCompl(TDriveEstimator *estimator, ui32 counter, NHPTimer::STime prevCompletionTime); - - void Exec(TActorSystem *actorSystem) override; - - void Release(TActorSystem *actorSystem) override; - }; - - ui64 EstimateSeekTimeNs(); - - void EstimateSpeed(const bool isAtDriveBegin, ui64 outSpeed[TDriveModel::OP_TYPE_COUNT]); - - ui64 MeasureOperationDuration(const ui32 type, const ui64 size); - - void EstimateGlueingDeadline(ui32 outGlueingDeadlin[TDriveModel::OP_TYPE_COUNT]); - - ui64 EstimateTrimSpeed(); - -public: - TDriveEstimator(const TString filename); - - TDriveModel EstimateDriveModel(); -}; - -} -} + TMutex Mtx; + TCondVar CondVar; + static constexpr ui32 Repeats = 128; + static constexpr ui32 BufferSize = 128 << 20; + static constexpr ui32 SectorSize = 4096; + static constexpr ui32 SeekBufferSize = 4096; + TVector<NHPTimer::STime> Durations; + + struct TLoadCompl : public NPDisk::TCompletionAction { + TDriveEstimator *Estimator; + + TLoadCompl(TDriveEstimator *estimator); + + void Exec(TActorSystem *actorSystem) override; + + void Release(TActorSystem *actorSystem) override; + }; + + struct TSeekCompl : public NPDisk::TCompletionAction { + TDriveEstimator *Estimator; + ui32 Counter; + NHPTimer::STime PrevCompletionTime; + + TSeekCompl(TDriveEstimator *estimator, ui32 counter, NHPTimer::STime prevCompletionTime); + + void Exec(TActorSystem *actorSystem) override; + + void Release(TActorSystem *actorSystem) override; + }; + + ui64 EstimateSeekTimeNs(); + + void EstimateSpeed(const bool isAtDriveBegin, ui64 outSpeed[TDriveModel::OP_TYPE_COUNT]); + + ui64 MeasureOperationDuration(const ui32 type, const ui64 size); + + void EstimateGlueingDeadline(ui32 outGlueingDeadlin[TDriveModel::OP_TYPE_COUNT]); + + ui64 EstimateTrimSpeed(); + +public: + TDriveEstimator(const TString filename); + + TDriveModel EstimateDriveModel(); +}; + +} +} diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_drivemodel.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_drivemodel.h index 0737e11c88..2a0678dbd2 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_drivemodel.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_drivemodel.h @@ -1,7 +1,7 @@ #pragma once #include "defs.h" #include <ydb/core/protos/drivemodel.pb.h> -#include <cmath> +#include <cmath> namespace NKikimr { namespace NPDisk { @@ -11,26 +11,26 @@ namespace NPDisk { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class TDriveModel : public TThrRefBase { public: - enum EOperationType { - OP_TYPE_READ = 0, - OP_TYPE_WRITE = 1, - OP_TYPE_AVG = 2, - OP_TYPE_COUNT = 3, - }; - + enum EOperationType { + OP_TYPE_READ = 0, + OP_TYPE_WRITE = 1, + OP_TYPE_AVG = 2, + OP_TYPE_COUNT = 3, + }; + protected: // Model data ui64 SeekTimeNsec; ui64 BulkReadBlockSizeBytes; ui64 BulkWriteBlockSizeBytes; ui64 TrimSpeedBps; - ui32 TotalChunksCount; - ui64 SpeedBps[OP_TYPE_COUNT]; - ui64 SpeedBpsMin[OP_TYPE_COUNT]; - ui64 SpeedBpsMax[OP_TYPE_COUNT]; - TVector<ui64> ChunkSpeedBps[OP_TYPE_COUNT]; - ui32 OptimalQueueDepth[OP_TYPE_COUNT]; - ui32 GlueingDeadline[OP_TYPE_COUNT]; + ui32 TotalChunksCount; + ui64 SpeedBps[OP_TYPE_COUNT]; + ui64 SpeedBpsMin[OP_TYPE_COUNT]; + ui64 SpeedBpsMax[OP_TYPE_COUNT]; + TVector<ui64> ChunkSpeedBps[OP_TYPE_COUNT]; + ui32 OptimalQueueDepth[OP_TYPE_COUNT]; + ui32 GlueingDeadline[OP_TYPE_COUNT]; // Model metadata NKikimrBlobStorage::TDriveModel::EModelSource ModelSource; TString SourceModelNumber; @@ -38,13 +38,13 @@ protected: TString SourceSerialNumber; bool IsSourceWriteCacheEnabled; bool IsSourceSharedWithOs; - + friend class TDriveEstimator; public: - TDriveModel() { - Clear(); - } - + TDriveModel() { + Clear(); + } + TDriveModel(ui64 seekTimeNs, ui64 speedBps, ui64 bulkWriteBlockSize, ui64 trimSpeedBps, ui64 speedBpsMin, ui64 speedBpsMax, ui32 queueDepth) { Clear(); @@ -52,46 +52,46 @@ public: BulkReadBlockSizeBytes = bulkWriteBlockSize; BulkWriteBlockSizeBytes = bulkWriteBlockSize; TrimSpeedBps = trimSpeedBps; - for (ui32 type = OP_TYPE_READ; type <= OP_TYPE_WRITE; ++type) { - SpeedBps[type] = speedBps; - SpeedBpsMin[type] = speedBpsMin > 100ull ? speedBpsMin : 100ull; - SpeedBpsMax[type] = speedBpsMax < 1000000000000ull ? speedBpsMax : 1000000000000ull; - OptimalQueueDepth[type] = queueDepth; - } + for (ui32 type = OP_TYPE_READ; type <= OP_TYPE_WRITE; ++type) { + SpeedBps[type] = speedBps; + SpeedBpsMin[type] = speedBpsMin > 100ull ? speedBpsMin : 100ull; + SpeedBpsMax[type] = speedBpsMax < 1000000000000ull ? speedBpsMax : 1000000000000ull; + OptimalQueueDepth[type] = queueDepth; + } CalculateAvgs(); } void CalculateAvgs() { - SpeedBps[OP_TYPE_AVG] = (SpeedBps[OP_TYPE_READ] + SpeedBps[OP_TYPE_WRITE]) / 2; - SpeedBpsMin[OP_TYPE_AVG] = (SpeedBpsMin[OP_TYPE_READ] + SpeedBpsMin[OP_TYPE_WRITE]) / 2; - SpeedBpsMax[OP_TYPE_AVG] = (SpeedBpsMax[OP_TYPE_READ] + SpeedBpsMax[OP_TYPE_WRITE]) / 2; - OptimalQueueDepth[OP_TYPE_AVG] = (OptimalQueueDepth[OP_TYPE_READ] + OptimalQueueDepth[OP_TYPE_WRITE]) / 2; - GlueingDeadline[OP_TYPE_AVG] = (GlueingDeadline[OP_TYPE_READ] + GlueingDeadline[OP_TYPE_WRITE]) / 2; - } - - void Clear() { - SeekTimeNsec = 0; + SpeedBps[OP_TYPE_AVG] = (SpeedBps[OP_TYPE_READ] + SpeedBps[OP_TYPE_WRITE]) / 2; + SpeedBpsMin[OP_TYPE_AVG] = (SpeedBpsMin[OP_TYPE_READ] + SpeedBpsMin[OP_TYPE_WRITE]) / 2; + SpeedBpsMax[OP_TYPE_AVG] = (SpeedBpsMax[OP_TYPE_READ] + SpeedBpsMax[OP_TYPE_WRITE]) / 2; + OptimalQueueDepth[OP_TYPE_AVG] = (OptimalQueueDepth[OP_TYPE_READ] + OptimalQueueDepth[OP_TYPE_WRITE]) / 2; + GlueingDeadline[OP_TYPE_AVG] = (GlueingDeadline[OP_TYPE_READ] + GlueingDeadline[OP_TYPE_WRITE]) / 2; + } + + void Clear() { + SeekTimeNsec = 0; BulkReadBlockSizeBytes = 0; - BulkWriteBlockSizeBytes = 0; - TrimSpeedBps = 0; - TotalChunksCount = 0; - for (ui32 type = OP_TYPE_READ; type <= OP_TYPE_AVG; ++type) { - SpeedBps[type] = 0; - SpeedBpsMin[type] = 0; - SpeedBpsMax[type] = 0; - ChunkSpeedBps[type].clear(); - OptimalQueueDepth[type] = 0; - GlueingDeadline[type] = 0; - } + BulkWriteBlockSizeBytes = 0; + TrimSpeedBps = 0; + TotalChunksCount = 0; + for (ui32 type = OP_TYPE_READ; type <= OP_TYPE_AVG; ++type) { + SpeedBps[type] = 0; + SpeedBpsMin[type] = 0; + SpeedBpsMax[type] = 0; + ChunkSpeedBps[type].clear(); + OptimalQueueDepth[type] = 0; + GlueingDeadline[type] = 0; + } ModelSource = NKikimrBlobStorage::TDriveModel::SourceClear; SourceModelNumber = ""; SourceFirmwareRevision = ""; SourceSerialNumber = ""; IsSourceWriteCacheEnabled = false; IsSourceSharedWithOs = false; - } - + } + void Apply(const NKikimrBlobStorage::TDriveModel *cfg) { if (!cfg) { return; @@ -151,43 +151,43 @@ public: outCfg->SetIsSourceSharedWithOs(IsSourceSharedWithOs); } - void SetTotalChunksCount(ui32 totalChunksCount) { - TotalChunksCount = totalChunksCount; - for (ui32 type = OP_TYPE_READ; type <= OP_TYPE_AVG; ++type) { - ChunkSpeedBps[type].resize(totalChunksCount); - double SpeedPerChunkSqr; - for (ui32 i = 0; i < totalChunksCount; ++i) { - SpeedPerChunkSqr = ((double)SpeedBpsMax[type] * SpeedBpsMax[type] - - (double)SpeedBpsMin[type] * SpeedBpsMin[type]) / totalChunksCount; - ChunkSpeedBps[type][i] = std::sqrt((double)SpeedBpsMax[type] * SpeedBpsMax[type] - i * SpeedPerChunkSqr); - } - } - } - + void SetTotalChunksCount(ui32 totalChunksCount) { + TotalChunksCount = totalChunksCount; + for (ui32 type = OP_TYPE_READ; type <= OP_TYPE_AVG; ++type) { + ChunkSpeedBps[type].resize(totalChunksCount); + double SpeedPerChunkSqr; + for (ui32 i = 0; i < totalChunksCount; ++i) { + SpeedPerChunkSqr = ((double)SpeedBpsMax[type] * SpeedBpsMax[type] - + (double)SpeedBpsMin[type] * SpeedBpsMin[type]) / totalChunksCount; + ChunkSpeedBps[type][i] = std::sqrt((double)SpeedBpsMax[type] * SpeedBpsMax[type] - i * SpeedPerChunkSqr); + } + } + } + ui64 SeekTimeNs() const { return SeekTimeNsec; } - ui64 Speed(EOperationType type) const { - return SpeedBps[type]; - } - - ui64 Speed(ui32 chunkIdx, EOperationType type) const { - if (chunkIdx < TotalChunksCount) { - return ChunkSpeedBps[type][chunkIdx]; - } else { - return SpeedBps[type]; - } - } - - ui64 TimeForSizeNs(ui64 bytesToProcess, EOperationType type) const { - return (ui64)bytesToProcess * 1000000000ull / SpeedBps[type]; - } - - ui64 TimeForSizeNs(ui64 bytesToProcess, ui32 chunkIdx, EOperationType type) const { - return (ui64)bytesToProcess * 1000000000ull / Speed(chunkIdx, type); - } - + ui64 Speed(EOperationType type) const { + return SpeedBps[type]; + } + + ui64 Speed(ui32 chunkIdx, EOperationType type) const { + if (chunkIdx < TotalChunksCount) { + return ChunkSpeedBps[type][chunkIdx]; + } else { + return SpeedBps[type]; + } + } + + ui64 TimeForSizeNs(ui64 bytesToProcess, EOperationType type) const { + return (ui64)bytesToProcess * 1000000000ull / SpeedBps[type]; + } + + ui64 TimeForSizeNs(ui64 bytesToProcess, ui32 chunkIdx, EOperationType type) const { + return (ui64)bytesToProcess * 1000000000ull / Speed(chunkIdx, type); + } + ui64 TrimTimeForSizeNs(ui64 bytesToTrim) const { return (ui64)bytesToTrim * 1000000000ull / TrimSpeedBps; } @@ -196,8 +196,8 @@ public: return TrimSpeedBps > 0; } - ui64 SizeForTimeNs(ui64 durationNs, ui32 chunkIdx, EOperationType type) const { - return Speed(chunkIdx, type) * durationNs / 1000000000ull; + ui64 SizeForTimeNs(ui64 durationNs, ui32 chunkIdx, EOperationType type) const { + return Speed(chunkIdx, type) * durationNs / 1000000000ull; } ui64 BulkWriteBlockSize() const { @@ -216,19 +216,19 @@ public: TStringStream str; const char *x = isMultiline ? "\n" : ""; str << "{TDriveModel" << x; - str << " SeekTimeNsec# " << SeekTimeNsec << x; - str << " TrimSpeedBps# " << TrimSpeedBps << x; + str << " SeekTimeNsec# " << SeekTimeNsec << x; + str << " TrimSpeedBps# " << TrimSpeedBps << x; str << " BulkWriteBlockSizeBytes# " << BulkWriteBlockSizeBytes << x; - str << " SpeedBps[OP_TYPE_READ]# " << SpeedBps[OP_TYPE_READ] << x; - str << " SpeedBps[OP_TYPE_WRITE]# " << SpeedBps[OP_TYPE_WRITE] << x; - str << " SpeedBpsMin[OP_TYPE_READ]# " << SpeedBpsMin[OP_TYPE_READ] << x; - str << " SpeedBpsMin[OP_TYPE_WRITE]# " << SpeedBpsMin[OP_TYPE_WRITE] << x; - str << " SpeedBpsMax[OP_TYPE_READ]# " << SpeedBpsMax[OP_TYPE_READ] << x; - str << " SpeedBpsMax[OP_TYPE_WRITE]# " << SpeedBpsMax[OP_TYPE_WRITE] << x; - str << " OptimalQueueDepth[OP_TYPE_READ]# " << OptimalQueueDepth[OP_TYPE_READ] << x; - str << " OptimalQueueDepth[OP_TYPE_WRITE]# " << OptimalQueueDepth[OP_TYPE_WRITE] << x; - str << " GlueingDeadline[OP_TYPE_READ]# " << GlueingDeadline[OP_TYPE_READ] << x; - str << " GlueingDeadline[OP_TYPE_WRITE]# " << GlueingDeadline[OP_TYPE_WRITE] << x; + str << " SpeedBps[OP_TYPE_READ]# " << SpeedBps[OP_TYPE_READ] << x; + str << " SpeedBps[OP_TYPE_WRITE]# " << SpeedBps[OP_TYPE_WRITE] << x; + str << " SpeedBpsMin[OP_TYPE_READ]# " << SpeedBpsMin[OP_TYPE_READ] << x; + str << " SpeedBpsMin[OP_TYPE_WRITE]# " << SpeedBpsMin[OP_TYPE_WRITE] << x; + str << " SpeedBpsMax[OP_TYPE_READ]# " << SpeedBpsMax[OP_TYPE_READ] << x; + str << " SpeedBpsMax[OP_TYPE_WRITE]# " << SpeedBpsMax[OP_TYPE_WRITE] << x; + str << " OptimalQueueDepth[OP_TYPE_READ]# " << OptimalQueueDepth[OP_TYPE_READ] << x; + str << " OptimalQueueDepth[OP_TYPE_WRITE]# " << OptimalQueueDepth[OP_TYPE_WRITE] << x; + str << " GlueingDeadline[OP_TYPE_READ]# " << GlueingDeadline[OP_TYPE_READ] << x; + str << " GlueingDeadline[OP_TYPE_WRITE]# " << GlueingDeadline[OP_TYPE_WRITE] << x; str << " ModelSource# " << (ui64)ModelSource << x; str << " SourceModelNumber# \"" << SourceModelNumber << "\"" << x; str << " SourceFirmwareRevision# \"" << SourceFirmwareRevision << "\"" << x; diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp index ae070cb8b5..d00fc0f220 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp @@ -2,15 +2,15 @@ #include "blobstorage_pdisk_chunk_id_formatter.h" #include "blobstorage_pdisk_completion_impl.h" -#include "blobstorage_pdisk_mon.h" -#include "blobstorage_pdisk_request_id.h" - +#include "blobstorage_pdisk_mon.h" +#include "blobstorage_pdisk_request_id.h" + #include <ydb/core/blobstorage/base/blobstorage_events.h> #include <ydb/core/protos/blobstorage.pb.h> #include <ydb/core/blobstorage/crypto/secured_block.h> #include <ydb/library/schlab/schine/job_kind.h> - -#include <util/system/unaligned_mem.h> + +#include <util/system/unaligned_mem.h> namespace NKikimr { namespace NPDisk { @@ -21,17 +21,17 @@ LWTRACE_USING(BLOBSTORAGE_PROVIDER); // Initialization //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -TPDisk::TPDisk(const TIntrusivePtr<TPDiskConfig> cfg, const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters) +TPDisk::TPDisk(const TIntrusivePtr<TPDiskConfig> cfg, const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters) : PDiskId(cfg->PDiskId) - , Mon(counters, PDiskId, cfg.Get()) + , Mon(counters, PDiskId, cfg.Get()) , DriveModel(cfg->DriveModelSeekTimeNs, cfg->DriveModelSpeedBps, cfg->DriveModelBulkWrieBlockSize, - cfg->DriveModelTrimSpeedBps, - cfg->DriveModelSpeedBpsMin, - cfg->DriveModelSpeedBpsMax, - cfg->DeviceInFlight) - , ReqCreator(PDiskId, &Mon, &DriveModel, &EstimatedLogChunkIdx) + cfg->DriveModelTrimSpeedBps, + cfg->DriveModelSpeedBpsMin, + cfg->DriveModelSpeedBpsMax, + cfg->DeviceInFlight) + , ReqCreator(PDiskId, &Mon, &DriveModel, &EstimatedLogChunkIdx) , ReorderingMs(cfg->ReorderingMs) , LogSeekCostLoop(2) , ActorSystem(nullptr) @@ -42,14 +42,14 @@ TPDisk::TPDisk(const TIntrusivePtr<TPDiskConfig> cfg, const TIntrusivePtr<NMonit , Keeper(Mon, cfg) , CostLimitNs(cfg->CostLimitNs) , PDiskThread(*this) - , BlockDevice(CreateRealBlockDevice(cfg->GetDevicePath(), cfg->PDiskId, Mon, - HPCyclesMs(ReorderingMs), DriveModel.SeekTimeNs(), cfg->DeviceInFlight, - TDeviceMode::LockFile | (cfg->UseSpdkNvmeDriver ? TDeviceMode::UseSpdk : 0), - cfg->MaxQueuedCompletionActions, cfg->SectorMap)) + , BlockDevice(CreateRealBlockDevice(cfg->GetDevicePath(), cfg->PDiskId, Mon, + HPCyclesMs(ReorderingMs), DriveModel.SeekTimeNs(), cfg->DeviceInFlight, + TDeviceMode::LockFile | (cfg->UseSpdkNvmeDriver ? TDeviceMode::UseSpdk : 0), + cfg->MaxQueuedCompletionActions, cfg->SectorMap)) , Cfg(cfg) - , CreationTime(TInstant::Now()) + , CreationTime(TInstant::Now()) , ExpectedSlotCount(cfg->ExpectedSlotCount) - , UseHugePages(cfg->UseSpdkNvmeDriver) + , UseHugePages(cfg->UseSpdkNvmeDriver) { SlowdownAddLatencyNs = TControlWrapper(0, 0, 100'000'000'000ll); EnableForsetiBinLog = TControlWrapper(0, 0, 1); @@ -69,9 +69,9 @@ TPDisk::TPDisk(const TIntrusivePtr<TPDiskConfig> cfg, const TIntrusivePtr<NMonit Format.Clear(); *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::Initial; - *Mon.PDiskBriefState = TPDiskMon::TPDisk::Booting; - ErrorStr = "PDisk is initializing now"; - + *Mon.PDiskBriefState = TPDiskMon::TPDisk::Booting; + ErrorStr = "PDisk is initializing now"; + for (ui32 i = 0; i < NonceCount; ++i) { SysLogRecord.Nonces.Value[i] = 1; LoggedNonces.Value[i] = 1; @@ -102,7 +102,7 @@ TString TPDisk::DynamicStateToString(bool isMultiline) { bool TPDisk::ReadChunk0Format(ui8* formatSectors, const TKey& mainKey) { TGuard<TMutex> guard(StateMutex); - TPDiskStreamCypher cypher(true); // Format record is always encrypted + TPDiskStreamCypher cypher(true); // Format record is always encrypted cypher.SetKey(mainKey); Format.SectorSize = FormatSectorSize; @@ -114,7 +114,7 @@ bool TPDisk::ReadChunk0Format(ui8* formatSectors, const TKey& mainKey) { ui64 sectorOffset = i * FormatSectorSize; ui8* formatSector = formatSectors + sectorOffset; TDataSectorFooter *footer = (TDataSectorFooter*) - (formatSector + FormatSectorSize - sizeof(TDataSectorFooter)); + (formatSector + FormatSectorSize - sizeof(TDataSectorFooter)); cypher.StartMessage(footer->Nonce); alignas(16) TDiskFormat diskFormat; @@ -123,15 +123,15 @@ bool TPDisk::ReadChunk0Format(ui8* formatSectors, const TKey& mainKey) { isBad[i] = !diskFormat.IsHashOk(FormatSectorSize); if (!isBad[i]) { Format.UpgradeFrom(diskFormat); - if (Format.IsErasureEncodeUserChunks()) { - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " Read from disk Format has FormatFlagErasureEncodeUserChunks set, " - << " but current version of PDisk can't work with it" - << " Format# " << Format.ToString() - << " Marker# BPD80"); - Y_FAIL_S("PDiskId# " << PDiskId - << "Unable to run PDisk on disk with FormatFlagErasureEncodeUserChunks set"); - } + if (Format.IsErasureEncodeUserChunks()) { + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " Read from disk Format has FormatFlagErasureEncodeUserChunks set, " + << " but current version of PDisk can't work with it" + << " Format# " << Format.ToString() + << " Marker# BPD80"); + Y_FAIL_S("PDiskId# " << PDiskId + << "Unable to run PDisk on disk with FormatFlagErasureEncodeUserChunks set"); + } if (Format.IsErasureEncodeUserLog()) { LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId << " Read from disk Format has FormatFlagErasureEncodeUserLog set, " @@ -142,7 +142,7 @@ bool TPDisk::ReadChunk0Format(ui8* formatSectors, const TKey& mainKey) { << "Unable to run PDisk on disk with FormatFlagErasureEncodeUserLog set"); } lastGoodIdx = i; - *Mon.TotalSpaceBytes = Format.DiskSize; + *Mon.TotalSpaceBytes = Format.DiskSize; } else { isBadPresent = true; } @@ -154,7 +154,7 @@ bool TPDisk::ReadChunk0Format(ui8* formatSectors, const TKey& mainKey) { if (isBadPresent) { for (ui32 i = 0; i < ReplicationFactor; ++i) { if (isBad[i]) { - TBuffer* buffer = BufferPool->Pop(); + TBuffer* buffer = BufferPool->Pop(); Y_VERIFY(FormatSectorSize <= buffer->Size()); memcpy(buffer->Data(), formatSector, FormatSectorSize); ui64 targetOffset = i * FormatSectorSize; @@ -176,34 +176,34 @@ bool TPDisk::ReadChunk0Format(ui8* formatSectors, const TKey& mainKey) { return false; } -bool TPDisk::IsFormatMagicValid(ui8 *magicData8, ui32 magicDataSize) { - Y_VERIFY_S(magicDataSize % sizeof(ui64) == 0, "Magic data size# "<< magicDataSize - << " must be a multiple of sizeof(ui64)"); - Y_VERIFY_S(magicDataSize >= FormatSectorSize, "Magic data size# "<< magicDataSize - << " must greater or equals to FormatSectorSize# " << FormatSectorSize); - ui64 magicOr = 0ull; - ui64 isIncompleteFormatMagicPresent = true; - ui64 *magicData64 = reinterpret_cast<ui64 *>(magicData8); - for (ui32 i = 0; i < magicDataSize / sizeof(ui64); ++i) { - magicOr |= magicData64[i]; - if (i < MagicIncompleteFormatSize / sizeof(ui64) && MagicIncompleteFormat != magicData64[i]) { - isIncompleteFormatMagicPresent = false; - } - } - return magicOr == 0ull || isIncompleteFormatMagicPresent; -} - -bool TPDisk::CheckGuid(TString *outReason) { - const bool ok = Format.Guid == ExpectedDiskGuid; - if (!ok && outReason) { - *outReason = TStringBuilder() << "expected# " << ExpectedDiskGuid << " on-disk# " << Format.Guid; - } - - return ok; +bool TPDisk::IsFormatMagicValid(ui8 *magicData8, ui32 magicDataSize) { + Y_VERIFY_S(magicDataSize % sizeof(ui64) == 0, "Magic data size# "<< magicDataSize + << " must be a multiple of sizeof(ui64)"); + Y_VERIFY_S(magicDataSize >= FormatSectorSize, "Magic data size# "<< magicDataSize + << " must greater or equals to FormatSectorSize# " << FormatSectorSize); + ui64 magicOr = 0ull; + ui64 isIncompleteFormatMagicPresent = true; + ui64 *magicData64 = reinterpret_cast<ui64 *>(magicData8); + for (ui32 i = 0; i < magicDataSize / sizeof(ui64); ++i) { + magicOr |= magicData64[i]; + if (i < MagicIncompleteFormatSize / sizeof(ui64) && MagicIncompleteFormat != magicData64[i]) { + isIncompleteFormatMagicPresent = false; + } + } + return magicOr == 0ull || isIncompleteFormatMagicPresent; +} + +bool TPDisk::CheckGuid(TString *outReason) { + const bool ok = Format.Guid == ExpectedDiskGuid; + if (!ok && outReason) { + *outReason = TStringBuilder() << "expected# " << ExpectedDiskGuid << " on-disk# " << Format.Guid; + } + + return ok; } bool TPDisk::CheckFormatComplete() { - return !Format.IsFormatInProgress(); + return !Format.IsFormatInProgress(); } void TPDisk::InitFreeChunks() { @@ -211,13 +211,13 @@ void TPDisk::InitFreeChunks() { for (ui32 i = 0; i < ChunkState.size(); ++i) { if (ChunkState[i].OwnerId == OwnerUnallocated) { Keeper.InitialPushFree(i); - } else if (ChunkState[i].OwnerId == OwnerUnallocatedTrimmed) { + } else if (ChunkState[i].OwnerId == OwnerUnallocatedTrimmed) { Keeper.InitialPushTrimmed(i); } } - if (Cfg->FeatureFlags.GetTrimEntireDeviceOnStartup()) { - TrimAllUntrimmedChunks(); - } + if (Cfg->FeatureFlags.GetTrimEntireDeviceOnStartup()) { + TrimAllUntrimmedChunks(); + } } @@ -234,10 +234,10 @@ TString TPDisk::StartupOwnerInfo() { if (data.VDiskId != TVDiskID::InvalidId) { str << "{OwnerId: " << (ui32)owner; str << " VDiskId: " << data.VDiskId.ToString(); - str << " ChunkWrites: " << data.InFlight->ChunkWrites.load(); - str << " ChunkReads: " << data.InFlight->ChunkReads.load(); - str << " LogWrites: " << data.InFlight->LogWrites.load(); - str << " LogReader: " << (bool)data.LogReader; + str << " ChunkWrites: " << data.InFlight->ChunkWrites.load(); + str << " ChunkReads: " << data.InFlight->ChunkReads.load(); + str << " LogWrites: " << data.InFlight->LogWrites.load(); + str << " LogReader: " << (bool)data.LogReader; str << " CurrentFirstLsnToKeep: " << data.CurrentFirstLsnToKeep; str << " FirstNonceToKeep: " << SysLogFirstNoncesToKeep.FirstNonceToKeep[owner]; str << " StartingPoints: {"; @@ -245,15 +245,15 @@ TString TPDisk::StartupOwnerInfo() { str << it->second.ToString(); } str << "}"; - str << " Owned chunkIds: "; - TChunkIdFormatter(str).PrintBracedChunksList(ownedChunks[owner]); + str << " Owned chunkIds: "; + TChunkIdFormatter(str).PrintBracedChunksList(ownedChunks[owner]); str << "}"; } } - str << " PDisk system/log ChunkIds: "; - TChunkIdFormatter(str).PrintBracedChunksList(ownedChunks[OwnerSystem]); - str << " Free ChunkIds: "; - TChunkIdFormatter(str).PrintBracedChunksList(ownedChunks[OwnerUnallocated]); + str << " PDisk system/log ChunkIds: "; + TChunkIdFormatter(str).PrintBracedChunksList(ownedChunks[OwnerSystem]); + str << " Free ChunkIds: "; + TChunkIdFormatter(str).PrintBracedChunksList(ownedChunks[OwnerUnallocated]); return str.Str(); } @@ -276,13 +276,13 @@ void TPDisk::Stop() { return; } AtomicSet(IsStarted, false); - PDiskThread.Stop(); - PDiskThread.Join(); + PDiskThread.Stop(); + PDiskThread.Join(); - if (ActorSystem) { - LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " shutdown owner info# " << StartupOwnerInfo()); - } + if (ActorSystem) { + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " shutdown owner info# " << StartupOwnerInfo()); + } BlockDevice->Stop(); // BlockDevice is stopped, the data will NOT hit the disk. @@ -297,16 +297,16 @@ void TPDisk::Stop() { TRequestBase* req = static_cast<TRequestBase*>(job->Payload); ForsetiTimeNs += job->Cost + 1; ForsetiScheduler.CompleteJob(ForsetiTimeNs, job); - TRequestBase::AbortDelete(req, ActorSystem); + TRequestBase::AbortDelete(req, ActorSystem); } } - for (TRequestBase* req : JointLogReads) { + for (TRequestBase* req : JointLogReads) { delete req; } JointLogReads.clear(); - for (auto& req : JointChunkReads) { - TRequestBase::AbortDelete(req.Get(), ActorSystem); + for (auto& req : JointChunkReads) { + TRequestBase::AbortDelete(req.Get(), ActorSystem); } JointChunkReads.clear(); for (TRequestBase* req : JointChunkWrites) { @@ -323,7 +323,7 @@ void TPDisk::Stop() { delete req; break; default: - Y_FAIL_S("Unexpected request type# " << ui64(req->GetType()) << " in JointChunkWrites"); + Y_FAIL_S("Unexpected request type# " << ui64(req->GetType()) << " in JointChunkWrites"); } } JointChunkWrites.clear(); @@ -332,9 +332,9 @@ void TPDisk::Stop() { } JointLogWrites.clear(); JointCommits.clear(); - for (const auto& req : FastOperationsQueue) { + for (const auto& req : FastOperationsQueue) { TRequestBase::AbortDelete(req.get(), ActorSystem); - } + } FastOperationsQueue.clear(); for (TRequestBase* req : PausedQueue) { TRequestBase::AbortDelete(req, ActorSystem); @@ -363,51 +363,51 @@ ui32 TPDisk::GetUserAccessibleChunkSize() const { } ui32 TPDisk::GetChunkAppendBlockSize() const { - return Format.SectorPayloadSize(); + return Format.SectorPayloadSize(); } ui32 TPDisk::SystemChunkSize(const TDiskFormat& format, ui32 userAccessibleChunkSizeBytes, ui32 sectorSizeBytes) const { ui32 usableSectorBytes = format.SectorPayloadSize(); ui32 userSectors = (userAccessibleChunkSizeBytes + usableSectorBytes - 1) / usableSectorBytes; - ui32 minChunkSize = userSectors * sectorSizeBytes; + ui32 minChunkSize = userSectors * sectorSizeBytes; const ui32 chunkSizeAlignment = (2 << 20); ui32 alignedChunkSize = ((minChunkSize + chunkSizeAlignment - 1) / chunkSizeAlignment) * chunkSizeAlignment; return alignedChunkSize; } -void ParsePayloadFromSectorOffset(const TDiskFormat& format, ui64 firstSector, ui64 lastSector, ui64 currentSector, - ui64 *outPayloadBytes, ui64 *outPayloadOffset) { - Y_VERIFY_S(firstSector <= currentSector && currentSector <= lastSector, firstSector << " <= " << currentSector - << " <= " << lastSector); - - *outPayloadBytes = (lastSector + 1 - currentSector) * format.SectorPayloadSize(); - *outPayloadOffset = (currentSector - firstSector) * format.SectorPayloadSize(); -} - -bool ParseSectorOffset(const TDiskFormat& format, TActorSystem *actorSystem, ui32 pDiskId, ui64 offset, ui64 size, - ui64 &outSectorIdx, ui64 &outLastSectorIdx, ui64 &outSectorOffset) { - const ui64 chunkSizeUsableSectors = format.ChunkSize / format.SectorSize; - const ui64 sectorPayloadSize = format.SectorPayloadSize(); - Y_VERIFY(sectorPayloadSize > 0); - ui64 lastSectorIdx = (offset + size + sectorPayloadSize - 1) / sectorPayloadSize - 1; - outLastSectorIdx = lastSectorIdx; - - ui64 sectorIdx = offset / sectorPayloadSize; - outSectorIdx = sectorIdx; - - if (outSectorIdx >= chunkSizeUsableSectors || outLastSectorIdx >= chunkSizeUsableSectors) { - if (outSectorIdx >= chunkSizeUsableSectors) { +void ParsePayloadFromSectorOffset(const TDiskFormat& format, ui64 firstSector, ui64 lastSector, ui64 currentSector, + ui64 *outPayloadBytes, ui64 *outPayloadOffset) { + Y_VERIFY_S(firstSector <= currentSector && currentSector <= lastSector, firstSector << " <= " << currentSector + << " <= " << lastSector); + + *outPayloadBytes = (lastSector + 1 - currentSector) * format.SectorPayloadSize(); + *outPayloadOffset = (currentSector - firstSector) * format.SectorPayloadSize(); +} + +bool ParseSectorOffset(const TDiskFormat& format, TActorSystem *actorSystem, ui32 pDiskId, ui64 offset, ui64 size, + ui64 &outSectorIdx, ui64 &outLastSectorIdx, ui64 &outSectorOffset) { + const ui64 chunkSizeUsableSectors = format.ChunkSize / format.SectorSize; + const ui64 sectorPayloadSize = format.SectorPayloadSize(); + Y_VERIFY(sectorPayloadSize > 0); + ui64 lastSectorIdx = (offset + size + sectorPayloadSize - 1) / sectorPayloadSize - 1; + outLastSectorIdx = lastSectorIdx; + + ui64 sectorIdx = offset / sectorPayloadSize; + outSectorIdx = sectorIdx; + + if (outSectorIdx >= chunkSizeUsableSectors || outLastSectorIdx >= chunkSizeUsableSectors) { + if (outSectorIdx >= chunkSizeUsableSectors) { LOG_ERROR(*actorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " outSectorIdx# %" PRIu32 - " >= chunkSizeUsableSectors# %" PRIu32 " with offset# %" PRIu32 " size# %" PRIu32, - (ui32)pDiskId, (ui32)outSectorIdx, (ui32)chunkSizeUsableSectors, (ui32)offset, (ui32)size); + " >= chunkSizeUsableSectors# %" PRIu32 " with offset# %" PRIu32 " size# %" PRIu32, + (ui32)pDiskId, (ui32)outSectorIdx, (ui32)chunkSizeUsableSectors, (ui32)offset, (ui32)size); } else { LOG_ERROR(*actorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " outLastSectorIdx# %" PRIu32 - " >= chunkSizeUsableSectors# %" PRIu32 " with offset# %" PRIu32 " size# %" PRIu32, - (ui32)pDiskId, (ui32)outLastSectorIdx, (ui32)chunkSizeUsableSectors, (ui32)offset, (ui32)size); + " >= chunkSizeUsableSectors# %" PRIu32 " with offset# %" PRIu32 " size# %" PRIu32, + (ui32)pDiskId, (ui32)outLastSectorIdx, (ui32)chunkSizeUsableSectors, (ui32)offset, (ui32)size); } return false; } - outSectorOffset = offset - sectorIdx * sectorPayloadSize; + outSectorOffset = offset - sectorIdx * sectorPayloadSize; return true; } @@ -418,125 +418,125 @@ ui64 TPDisk::UsableSectorsPerLogChunk() const { return maxBaseSectors; } -void TPDisk::CheckLogCanary(ui8* sectorData, ui32 chunkIdx, ui64 sectorIdx) const { - if (CanarySize) { - const ui64 readCanary = ReadUnaligned<ui64>( - sectorData + Format.SectorSize - CanarySize - sizeof(TDataSectorFooter)); - if (readCanary != Canary) { - TStringStream ss; - ss << "PDiskId# " << PDiskId << " Failed log canary at chunkIdx# " << chunkIdx - << " sectorIdx# " << sectorIdx << " sectorOffset# " << Format.Offset(chunkIdx, sectorIdx) - << " read canary# " << readCanary << " expected canary# " << Canary; - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, ss.Str()); - Y_FAIL_S(ss.Str()); - } +void TPDisk::CheckLogCanary(ui8* sectorData, ui32 chunkIdx, ui64 sectorIdx) const { + if (CanarySize) { + const ui64 readCanary = ReadUnaligned<ui64>( + sectorData + Format.SectorSize - CanarySize - sizeof(TDataSectorFooter)); + if (readCanary != Canary) { + TStringStream ss; + ss << "PDiskId# " << PDiskId << " Failed log canary at chunkIdx# " << chunkIdx + << " sectorIdx# " << sectorIdx << " sectorOffset# " << Format.Offset(chunkIdx, sectorIdx) + << " read canary# " << readCanary << " expected canary# " << Canary; + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, ss.Str()); + Y_FAIL_S(ss.Str()); + } } } -TLogPosition TPDisk::LogPosition(TChunkIdx chunkIdx, ui64 sectorIdx, ui64 offsetInSector) const { - ui64 offsetBytes = sectorIdx * Format.SectorSize + offsetInSector; - Y_VERIFY(offsetBytes <= Max<ui32>()); - return {chunkIdx, static_cast<ui32>(offsetBytes)}; +TLogPosition TPDisk::LogPosition(TChunkIdx chunkIdx, ui64 sectorIdx, ui64 offsetInSector) const { + ui64 offsetBytes = sectorIdx * Format.SectorSize + offsetInSector; + Y_VERIFY(offsetBytes <= Max<ui32>()); + return {chunkIdx, static_cast<ui32>(offsetBytes)}; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Common operations //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// The goal of this function is to find the first row of unused log chunks and release them. -// There are two possible cases: -// 1. Row starts from the first log chunk, trivial case, just set first non-empty chunk as new head -// 2. Row is located between used log chunks -bool TPDisk::ReleaseUnusedLogChunks(TCompletionEventSender *completion) { +// The goal of this function is to find the first row of unused log chunks and release them. +// There are two possible cases: +// 1. Row starts from the first log chunk, trivial case, just set first non-empty chunk as new head +// 2. Row is located between used log chunks +bool TPDisk::ReleaseUnusedLogChunks(TCompletionEventSender *completion) { TGuard<TMutex> guard(StateMutex); - if (IsLogChunksReleaseInflight) { - return false; - } - - // Both gapStart end gapEnd point to non-empty log chunks around empty chunk region - TMaybe<TLogChunkInfo> gapStart; - TMaybe<TLogChunkInfo> gapEnd; - auto it = LogChunks.begin(); - - // Gap search requires whole LogChunks list traversal - if (LogChunks.size() > 3 && KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE) { - while (it != LogChunks.end() && ChunkState[it->ChunkIdx].CommitState == TChunkState::LOG_COMMITTED - && it->CurrentUserCount != 0) { - gapStart = *it; - ++it; - } - } - - PrintLogChunksInfo("before log cut"); - TVector<TChunkIdx> chunksToRelease; - while (it != LogChunks.end() && it != std::prev(LogChunks.end()) - && ChunkState[it->ChunkIdx].CommitState == TChunkState::LOG_COMMITTED && it->CurrentUserCount == 0) { - // Clear all info about a chunk, but do not add the chunk to a free list to prevent reuse - // of the chunk before NextChunkReference is written to device - const ui32 chunkIdx = it->ChunkIdx; - chunksToRelease.push_back(chunkIdx); - TChunkState &state = ChunkState[chunkIdx]; - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " chunkIdx# " << chunkIdx << " released as unused, ownerId# " << ui32(state.OwnerId) - << " -> " << ui32(OwnerUnallocated) << " Marker# BPD55"); - Y_VERIFY_S(state.OwnerId == OwnerSystem, "PDiskId# " << PDiskId - << " Unexpected ownerId# " << ui32(state.OwnerId)); - state.CommitState = TChunkState::FREE; - state.OwnerId = OwnerUnallocated; - Mon.LogChunks->Dec(); - - auto curr = it; - ++it; - LogChunks.erase(curr); - } - if (it != LogChunks.end()) { - if (gapStart) { - it->IsEndOfSplice = true; - } - gapEnd = *it; - } - - // Nothing to release - if (chunksToRelease.empty()) { - return false; - // Case 1: Chunks to be deleted located at the start of LogChunks list - } else if (!gapStart && gapEnd) { - IsLogChunksReleaseInflight = true; - completion->Req = THolder<TRequestBase>(ReqCreator.CreateFromArgs<TReleaseChunks>(std::move(chunksToRelease))); - SysLogRecord.LogHeadChunkIdx = gapEnd->ChunkIdx; - SysLogRecord.LogHeadChunkPreviousNonce = ChunkState[gapEnd->ChunkIdx].PreviousNonce; - PrintLogChunksInfo("cut tail log"); + if (IsLogChunksReleaseInflight) { + return false; + } + + // Both gapStart end gapEnd point to non-empty log chunks around empty chunk region + TMaybe<TLogChunkInfo> gapStart; + TMaybe<TLogChunkInfo> gapEnd; + auto it = LogChunks.begin(); + + // Gap search requires whole LogChunks list traversal + if (LogChunks.size() > 3 && KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE) { + while (it != LogChunks.end() && ChunkState[it->ChunkIdx].CommitState == TChunkState::LOG_COMMITTED + && it->CurrentUserCount != 0) { + gapStart = *it; + ++it; + } + } + + PrintLogChunksInfo("before log cut"); + TVector<TChunkIdx> chunksToRelease; + while (it != LogChunks.end() && it != std::prev(LogChunks.end()) + && ChunkState[it->ChunkIdx].CommitState == TChunkState::LOG_COMMITTED && it->CurrentUserCount == 0) { + // Clear all info about a chunk, but do not add the chunk to a free list to prevent reuse + // of the chunk before NextChunkReference is written to device + const ui32 chunkIdx = it->ChunkIdx; + chunksToRelease.push_back(chunkIdx); + TChunkState &state = ChunkState[chunkIdx]; + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " chunkIdx# " << chunkIdx << " released as unused, ownerId# " << ui32(state.OwnerId) + << " -> " << ui32(OwnerUnallocated) << " Marker# BPD55"); + Y_VERIFY_S(state.OwnerId == OwnerSystem, "PDiskId# " << PDiskId + << " Unexpected ownerId# " << ui32(state.OwnerId)); + state.CommitState = TChunkState::FREE; + state.OwnerId = OwnerUnallocated; + Mon.LogChunks->Dec(); + + auto curr = it; + ++it; + LogChunks.erase(curr); + } + if (it != LogChunks.end()) { + if (gapStart) { + it->IsEndOfSplice = true; + } + gapEnd = *it; + } + + // Nothing to release + if (chunksToRelease.empty()) { + return false; + // Case 1: Chunks to be deleted located at the start of LogChunks list + } else if (!gapStart && gapEnd) { + IsLogChunksReleaseInflight = true; + completion->Req = THolder<TRequestBase>(ReqCreator.CreateFromArgs<TReleaseChunks>(std::move(chunksToRelease))); + SysLogRecord.LogHeadChunkIdx = gapEnd->ChunkIdx; + SysLogRecord.LogHeadChunkPreviousNonce = ChunkState[gapEnd->ChunkIdx].PreviousNonce; + PrintLogChunksInfo("cut tail log"); return true; - - // Case 2: Chunks to be deleted located in the middle of LogChunksList - } else if (gapStart && gapEnd) { - Y_VERIFY(KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE); - IsLogChunksReleaseInflight = true; - Mon.SplicedLogChunks->Add(chunksToRelease.size()); - auto *releaseReq = ReqCreator.CreateFromArgs<TReleaseChunks>(*gapStart, *gapEnd, std::move(chunksToRelease)); - + + // Case 2: Chunks to be deleted located in the middle of LogChunksList + } else if (gapStart && gapEnd) { + Y_VERIFY(KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE); + IsLogChunksReleaseInflight = true; + Mon.SplicedLogChunks->Add(chunksToRelease.size()); + auto *releaseReq = ReqCreator.CreateFromArgs<TReleaseChunks>(*gapStart, *gapEnd, std::move(chunksToRelease)); + auto flushAction = MakeHolder<TCompletionEventSender>(this, THolder<TReleaseChunks>(releaseReq)); - TReqId reqId(AtomicIncrement(ReqCreator.LastReqId)); - WriteSysLogRestorePoint(flushAction.Release(), reqId, {}); - PrintLogChunksInfo("log splice"); - return false; - } else { - TStringStream ss; - ss << "Impossible situation - we have non empty chunksToRelease vector and cannot release them"; - ss << " gapStart# "; - if (gapStart) { - ss << gapStart->ChunkIdx; - } else { - ss << "null"; - } - ss << " gapEnd# "; - if (gapEnd) { - ss << gapEnd->ChunkIdx; - } else { - ss << "null"; - } - Y_FAIL_S(ss.Str()); - return false; + TReqId reqId(AtomicIncrement(ReqCreator.LastReqId)); + WriteSysLogRestorePoint(flushAction.Release(), reqId, {}); + PrintLogChunksInfo("log splice"); + return false; + } else { + TStringStream ss; + ss << "Impossible situation - we have non empty chunksToRelease vector and cannot release them"; + ss << " gapStart# "; + if (gapStart) { + ss << gapStart->ChunkIdx; + } else { + ss << "null"; + } + ss << " gapEnd# "; + if (gapEnd) { + ss << gapEnd->ChunkIdx; + } else { + ss << "null"; + } + Y_FAIL_S(ss.Str()); + return false; } } @@ -555,7 +555,7 @@ ui32 TPDisk::GetTotalChunks(ui32 ownerId, const EOwnerGroupType ownerGroupType) ui32 TPDisk::GetFreeChunks(ui32 ownerId, const EOwnerGroupType ownerGroupType) const { Y_UNUSED(ownerGroupType); // TODO(cthulhu): use ownerGroupType for logs - return Max<i64>(0, Keeper.GetOwnerFree(ownerId)); + return Max<i64>(0, Keeper.GetOwnerFree(ownerId)); } ui32 TPDisk::GetUsedChunks(ui32 ownerId, const EOwnerGroupType ownerGroupType) const { @@ -596,7 +596,7 @@ NPDisk::TStatusFlags TPDisk::NotEnoughDiskSpaceStatusFlags(ui32 ownerId, const E void TPDisk::SendCutLog(TAskForCutLog &request) { TGuard<TMutex> guard(StateMutex); if (request.Owner < OwnerData.size()) { - const TOwnerData &data = OwnerData[request.Owner]; + const TOwnerData &data = OwnerData[request.Owner]; if (data.VDiskId != TVDiskID::InvalidId) { AskVDisksToCutLogs(request.Owner, true); } @@ -609,7 +609,7 @@ void TPDisk::AskVDisksToCutLogs(TOwner ownerFilter, bool doForce) { ui32 activeOwners = 0; for (ui32 owner = 0; owner < OwnerData.size(); ++owner) { - const TOwnerData &data = OwnerData[owner]; + const TOwnerData &data = OwnerData[owner]; if (data.VDiskId != TVDiskID::InvalidId) { ++activeOwners; } @@ -618,11 +618,11 @@ void TPDisk::AskVDisksToCutLogs(TOwner ownerFilter, bool doForce) { size_t cutThreshold = (size_t)Max(Cfg->MinLogChunksTotal, (ui64)(ui64(activeOwners) * Cfg->MaxLogChunksPerOwnerMultiplier + Cfg->MaxLogChunksPerOwnerDivisor - 1ull) / Cfg->MaxLogChunksPerOwnerDivisor); - if (logChunkCount > cutThreshold * Cfg->WarningLogChunksMultiplier) { - *Mon.TooMuchLogChunks = 1; - } else { - *Mon.TooMuchLogChunks = 0; - } + if (logChunkCount > cutThreshold * Cfg->WarningLogChunksMultiplier) { + *Mon.TooMuchLogChunks = 1; + } else { + *Mon.TooMuchLogChunks = 0; + } if (logChunkCount > cutThreshold || doForce) { if (logChunkCount > cutThreshold * Cfg->InsaneLogChunksMultiplier) { if (InsaneLogChunks == 0) { @@ -638,10 +638,10 @@ void TPDisk::AskVDisksToCutLogs(TOwner ownerFilter, bool doForce) { str << " at PDiskId# " << (ui32)PDiskId; str << " cutThreshold# " << cutThreshold; for (ui32 owner = 0; owner < OwnerData.size(); ++owner) { - const TOwnerData &data = OwnerData[owner]; + const TOwnerData &data = OwnerData[owner]; if (data.VDiskId != TVDiskID::InvalidId) { str << " OwnerId# " << (ui32)owner; - str << " { VDiskId# " << data.VDiskId.ToStringWOGeneration(); + str << " { VDiskId# " << data.VDiskId.ToStringWOGeneration(); str << " CutLogId# " << data.CutLogId.ToString(); str << " WhiteboardProxyId# " << data.WhiteboardProxyId; str << " CurLsnToKeep# " << data.CurrentFirstLsnToKeep; @@ -655,11 +655,11 @@ void TPDisk::AskVDisksToCutLogs(TOwner ownerFilter, bool doForce) { auto chunkIt = LogChunks.begin(); str << chunkIt->ToString(); str << "}"; - str << " CommitState# "; - str << ChunkState[LogChunks.begin()->ChunkIdx].CommitState; + str << " CommitState# "; + str << ChunkState[LogChunks.begin()->ChunkIdx].CommitState; str << " details# "; OutputHtmlLogChunksDetails(str); - Y_FAIL_S(str.Str()); + Y_FAIL_S(str.Str()); } } } else { @@ -777,10 +777,10 @@ bool TPDisk::ChunkWritePiece(TChunkWrite *evChunkWrite, ui32 pieceShift, ui32 pi return true; } TGuard<TMutex> guard(StateMutex); - Y_VERIFY(pieceShift % Format.SectorPayloadSize() == 0); - Y_VERIFY_S(pieceSize % Format.SectorPayloadSize() == 0 || pieceShift + pieceSize == evChunkWrite->TotalSize, - "pieceShift# " << pieceShift << " pieceSize# " << pieceSize - << " evChunkWrite->TotalSize# " << evChunkWrite->TotalSize); + Y_VERIFY(pieceShift % Format.SectorPayloadSize() == 0); + Y_VERIFY_S(pieceSize % Format.SectorPayloadSize() == 0 || pieceShift + pieceSize == evChunkWrite->TotalSize, + "pieceShift# " << pieceShift << " pieceSize# " << pieceSize + << " evChunkWrite->TotalSize# " << evChunkWrite->TotalSize); ui32 chunkIdx = evChunkWrite->ChunkIdx; @@ -790,32 +790,32 @@ bool TPDisk::ChunkWritePiece(TChunkWrite *evChunkWrite, ui32 pieceShift, ui32 pi ui64 desiredSectorIdx = 0; ui64 sectorOffset = 0; ui64 lastSectorIdx; - if (!ParseSectorOffset(Format, ActorSystem, PDiskId, evChunkWrite->Offset + evChunkWrite->BytesWritten, + if (!ParseSectorOffset(Format, ActorSystem, PDiskId, evChunkWrite->Offset + evChunkWrite->BytesWritten, evChunkWrite->TotalSize - evChunkWrite->BytesWritten, desiredSectorIdx, lastSectorIdx, sectorOffset)) { guard.Release(); TString err = Sprintf("PDiskId# %" PRIu32 " Can't write chunk: incorrect offset/size offset# %" PRIu32 " size# %" PRIu32 " chunkIdx# %" PRIu32 " ownerId# %" PRIu32, (ui32)PDiskId, (ui32)evChunkWrite->Offset, (ui32)evChunkWrite->TotalSize, (ui32)chunkIdx, (ui32)evChunkWrite->Owner); LOG_ERROR(*ActorSystem, NKikimrServices::BS_PDISK, "%s", err.c_str()); - SendChunkWriteError(*evChunkWrite, err, NKikimrProto::ERROR); + SendChunkWriteError(*evChunkWrite, err, NKikimrProto::ERROR); return true; } - TChunkState &state = ChunkState[chunkIdx]; + TChunkState &state = ChunkState[chunkIdx]; state.CurrentNonce = state.Nonce + (ui64)desiredSectorIdx; - ui32 dataChunkSizeSectors = Format.ChunkSize / Format.SectorSize; - TChunkWriter writer(Mon, *BlockDevice.Get(), Format, state.CurrentNonce, Format.ChunkKey, BufferPool.Get(), - desiredSectorIdx, dataChunkSizeSectors, Format.MagicDataChunk, chunkIdx, nullptr, desiredSectorIdx, - nullptr, ActorSystem, PDiskId, &DriveModel, Cfg->UseT1ha0HashInFooter, Cfg->EnableSectorEncryption); + ui32 dataChunkSizeSectors = Format.ChunkSize / Format.SectorSize; + TChunkWriter writer(Mon, *BlockDevice.Get(), Format, state.CurrentNonce, Format.ChunkKey, BufferPool.Get(), + desiredSectorIdx, dataChunkSizeSectors, Format.MagicDataChunk, chunkIdx, nullptr, desiredSectorIdx, + nullptr, ActorSystem, PDiskId, &DriveModel, Cfg->UseT1ha0HashInFooter, Cfg->EnableSectorEncryption); guard.Release(); - LWTRACK(PDiskChunkWritePieceSendToDevice, evChunkWrite->Orbit, PDiskId, evChunkWrite->Owner, chunkIdx, - pieceShift, pieceSize); - + LWTRACK(PDiskChunkWritePieceSendToDevice, evChunkWrite->Orbit, PDiskId, evChunkWrite->Owner, chunkIdx, + pieceShift, pieceSize); + ui32 bytesAvailable = pieceSize; Y_VERIFY(evChunkWrite->BytesWritten == pieceShift); - const ui32 count = evChunkWrite->PartsPtr->Size(); + const ui32 count = evChunkWrite->PartsPtr->Size(); for (ui32 partIdx = evChunkWrite->CurrentPart; partIdx < count; ++partIdx) { ui32 remainingPartSize = (*evChunkWrite->PartsPtr)[partIdx].second - evChunkWrite->CurrentPartOffset; if (bytesAvailable < remainingPartSize) { @@ -859,62 +859,62 @@ bool TPDisk::ChunkWritePiece(TChunkWrite *evChunkWrite, ui32 pieceShift, ui32 pi LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId << " chunkIdx# " << (ui32)chunkIdx - << " ownerId# " << evChunkWrite->Owner + << " ownerId# " << evChunkWrite->Owner << " ChunkWrite writer at sectorIdx# " << writer.SectorIdx << " Marker# BPD79"); - if (!writer.IsEmptySector()) { - *Mon.BandwidthPChunkPadding += writer.SectorBytesFree; - writer.WriteZeroes(writer.SectorBytesFree, evChunkWrite->ReqId, &evChunkWrite->TraceId); + if (!writer.IsEmptySector()) { + *Mon.BandwidthPChunkPadding += writer.SectorBytesFree; + writer.WriteZeroes(writer.SectorBytesFree, evChunkWrite->ReqId, &evChunkWrite->TraceId); LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " chunkIdx# %" PRIu32 " was zero-padded after writing", (ui32)PDiskId, (ui32)chunkIdx); } - evChunkWrite->Completion->Orbit = std::move(evChunkWrite->Orbit); + evChunkWrite->Completion->Orbit = std::move(evChunkWrite->Orbit); writer.Flush(evChunkWrite->ReqId, &evChunkWrite->TraceId, evChunkWrite->Completion.Release()); - - - evChunkWrite->IsReplied = true; + + + evChunkWrite->IsReplied = true; return true; } -void TPDisk::SendChunkWriteError(TChunkWrite &chunkWrite, const TString &errorReason, - NKikimrProto::EReplyStatus status) { - Y_VERIFY_DEBUG(errorReason); - Y_VERIFY_DEBUG(status != NKikimrProto::OK); - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, errorReason); - Y_VERIFY(!chunkWrite.IsReplied); - NPDisk::TStatusFlags flags = status == NKikimrProto::OUT_OF_SPACE - ? NotEnoughDiskSpaceStatusFlags(chunkWrite.Owner, chunkWrite.OwnerGroupType) - : GetStatusFlags(chunkWrite.Owner, chunkWrite.OwnerGroupType); - ActorSystem->Send(chunkWrite.Sender, new NPDisk::TEvChunkWriteResult(status, - chunkWrite.ChunkIdx, chunkWrite.Cookie, flags, errorReason)); - Mon.GetWriteCounter(chunkWrite.PriorityClass)->CountResponse(); - chunkWrite.IsReplied = true; +void TPDisk::SendChunkWriteError(TChunkWrite &chunkWrite, const TString &errorReason, + NKikimrProto::EReplyStatus status) { + Y_VERIFY_DEBUG(errorReason); + Y_VERIFY_DEBUG(status != NKikimrProto::OK); + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, errorReason); + Y_VERIFY(!chunkWrite.IsReplied); + NPDisk::TStatusFlags flags = status == NKikimrProto::OUT_OF_SPACE + ? NotEnoughDiskSpaceStatusFlags(chunkWrite.Owner, chunkWrite.OwnerGroupType) + : GetStatusFlags(chunkWrite.Owner, chunkWrite.OwnerGroupType); + ActorSystem->Send(chunkWrite.Sender, new NPDisk::TEvChunkWriteResult(status, + chunkWrite.ChunkIdx, chunkWrite.Cookie, flags, errorReason)); + Mon.GetWriteCounter(chunkWrite.PriorityClass)->CountResponse(); + chunkWrite.IsReplied = true; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Chunk reading //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void TPDisk::SendChunkReadError(const TIntrusivePtr<TChunkRead>& read, TStringStream& error, NKikimrProto::EReplyStatus status) { - error << " for owner# " << read->Owner << " can't read chunkIdx# " << read->ChunkIdx; - Y_VERIFY(status != NKikimrProto::OK); - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, error.Str()); - +void TPDisk::SendChunkReadError(const TIntrusivePtr<TChunkRead>& read, TStringStream& error, NKikimrProto::EReplyStatus status) { + error << " for owner# " << read->Owner << " can't read chunkIdx# " << read->ChunkIdx; + Y_VERIFY(status != NKikimrProto::OK); + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, error.Str()); + THolder<NPDisk::TEvChunkReadResult> result = MakeHolder<NPDisk::TEvChunkReadResult>(status, - read->ChunkIdx, read->Offset, read->Cookie, GetStatusFlags(read->Owner, read->OwnerGroupType), error.Str()); - ActorSystem->Send(read->Sender, result.Release()); - read->IsReplied = true; - Mon.GetReadCounter(read->PriorityClass)->CountResponse(); + read->ChunkIdx, read->Offset, read->Cookie, GetStatusFlags(read->Owner, read->OwnerGroupType), error.Str()); + ActorSystem->Send(read->Sender, result.Release()); + read->IsReplied = true; + Mon.GetReadCounter(read->PriorityClass)->CountResponse(); } -TPDisk::EChunkReadPieceResult TPDisk::ChunkReadPiece(TIntrusivePtr<TChunkRead> &read, ui64 pieceCurrentSector, - ui64 pieceSizeLimit, ui64 *reallyReadDiskBytes) { +TPDisk::EChunkReadPieceResult TPDisk::ChunkReadPiece(TIntrusivePtr<TChunkRead> &read, ui64 pieceCurrentSector, + ui64 pieceSizeLimit, ui64 *reallyReadDiskBytes) { if (read->IsReplied) { return ReadPieceResultOk; } - Y_VERIFY_S(pieceCurrentSector == read->CurrentSector, pieceCurrentSector << " != " << read->CurrentSector); + Y_VERIFY_S(pieceCurrentSector == read->CurrentSector, pieceCurrentSector << " != " << read->CurrentSector); ui64 sectorsCount = read->LastSector - read->FirstSector + 1; ui64 sectorsToRead = sectorsCount - read->CurrentSector; ui64 bytesToRead = sectorsToRead * Format.SectorSize; @@ -923,50 +923,50 @@ TPDisk::EChunkReadPieceResult TPDisk::ChunkReadPiece(TIntrusivePtr<TChunkRead> & bytesToRead = sectorsToRead * Format.SectorSize; } - Y_VERIFY(sectorsToRead); - - if (reallyReadDiskBytes) { - *reallyReadDiskBytes = bytesToRead; - } - + Y_VERIFY(sectorsToRead); + + if (reallyReadDiskBytes) { + *reallyReadDiskBytes = bytesToRead; + } + ui64 firstSector; ui64 lastSector; ui64 sectorOffset; - bool isOk = ParseSectorOffset(Format, ActorSystem, PDiskId, - read->Offset, read->Size, firstSector, lastSector, sectorOffset); - Y_VERIFY(isOk); - + bool isOk = ParseSectorOffset(Format, ActorSystem, PDiskId, + read->Offset, read->Size, firstSector, lastSector, sectorOffset); + Y_VERIFY(isOk); + ui64 currentSectorOffset = (ui64)read->CurrentSector * (ui64)Format.SectorSize; - bool isTheFirstPart = read->CurrentSector == 0; - bool isTheLastPart = read->FirstSector + read->CurrentSector + sectorsToRead > read->LastSector; - - ui64 payloadBytesToRead; - ui64 payloadOffset; - ParsePayloadFromSectorOffset(Format, read->FirstSector, read->FirstSector + read->CurrentSector + sectorsToRead - 1, - read->FirstSector + read->CurrentSector, &payloadBytesToRead, &payloadOffset); - - if (!isTheFirstPart) { - payloadOffset -= sectorOffset; - } - - //Adjust read size only if there is more than one piece - if (isTheFirstPart && !isTheLastPart) { - payloadBytesToRead -= sectorOffset; - } - if (!isTheFirstPart && isTheLastPart) { - payloadBytesToRead += sectorOffset; - } - - payloadBytesToRead = Min(payloadBytesToRead, read->RemainingSize); - - read->CurrentSector += sectorsToRead; - read->RemainingSize -= payloadBytesToRead; - AtomicAdd(InFlightChunkRead, (ui64)bytesToRead); - - if (isTheLastPart) { - Y_VERIFY(read->RemainingSize == 0); - } - + bool isTheFirstPart = read->CurrentSector == 0; + bool isTheLastPart = read->FirstSector + read->CurrentSector + sectorsToRead > read->LastSector; + + ui64 payloadBytesToRead; + ui64 payloadOffset; + ParsePayloadFromSectorOffset(Format, read->FirstSector, read->FirstSector + read->CurrentSector + sectorsToRead - 1, + read->FirstSector + read->CurrentSector, &payloadBytesToRead, &payloadOffset); + + if (!isTheFirstPart) { + payloadOffset -= sectorOffset; + } + + //Adjust read size only if there is more than one piece + if (isTheFirstPart && !isTheLastPart) { + payloadBytesToRead -= sectorOffset; + } + if (!isTheFirstPart && isTheLastPart) { + payloadBytesToRead += sectorOffset; + } + + payloadBytesToRead = Min(payloadBytesToRead, read->RemainingSize); + + read->CurrentSector += sectorsToRead; + read->RemainingSize -= payloadBytesToRead; + AtomicAdd(InFlightChunkRead, (ui64)bytesToRead); + + if (isTheLastPart) { + Y_VERIFY(read->RemainingSize == 0); + } + ui64 footerTotalSize = sectorsToRead * sizeof(TDataSectorFooter); *Mon.BandwidthPChunkReadPayload += bytesToRead - footerTotalSize; *Mon.BandwidthPChunkReadSectorFooter += footerTotalSize; @@ -974,241 +974,241 @@ TPDisk::EChunkReadPieceResult TPDisk::ChunkReadPiece(TIntrusivePtr<TChunkRead> & ui64 readOffset = Format.Offset(read->ChunkIdx, read->FirstSector, currentSectorOffset); // TODO: Get this from the drive WILSON_TRACE(*ActorSystem, &read->TraceId, AsyncReadScheduled, DiskOffset = readOffset, Size = bytesToRead); - THolder<TCompletionChunkReadPart> completion(new TCompletionChunkReadPart(this, read, bytesToRead, - payloadBytesToRead, payloadOffset, read->FinalCompletion, isTheLastPart, Cfg->UseT1ha0HashInFooter)); - completion->CostNs = DriveModel.TimeForSizeNs(bytesToRead, read->ChunkIdx, TDriveModel::OP_TYPE_READ); - Y_VERIFY(bytesToRead <= completion->GetBuffer()->Size()); - ui8 *data = completion->GetBuffer()->Data(); - BlockDevice->PreadAsync(data, bytesToRead, readOffset, completion.Release(), + THolder<TCompletionChunkReadPart> completion(new TCompletionChunkReadPart(this, read, bytesToRead, + payloadBytesToRead, payloadOffset, read->FinalCompletion, isTheLastPart, Cfg->UseT1ha0HashInFooter)); + completion->CostNs = DriveModel.TimeForSizeNs(bytesToRead, read->ChunkIdx, TDriveModel::OP_TYPE_READ); + Y_VERIFY(bytesToRead <= completion->GetBuffer()->Size()); + ui8 *data = completion->GetBuffer()->Data(); + BlockDevice->PreadAsync(data, bytesToRead, readOffset, completion.Release(), read->ReqId, &read->TraceId); // TODO: align the data on SectorSize, not PAGE_SIZE // TODO: use the BLKSSZGET ioctl to obtain a backing store's sector size - return isTheLastPart ? ReadPieceResultOk : ReadPieceResultInProgress; + return isTheLastPart ? ReadPieceResultOk : ReadPieceResultInProgress; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Chunk locking -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Chunk locking +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void TPDisk::ChunksLockByRange(TFreeChunks &freeChunks, ui32 begin, ui32 end, TVector<ui32> &lockedChunks) { - ui32 freeChunksCount = freeChunks.Size(); + ui32 freeChunksCount = freeChunks.Size(); TVector<ui32> tmpChunks; - tmpChunks.reserve(freeChunksCount); - for (ui32 i = 0; i < freeChunksCount; ++i) { - ui32 idx = freeChunks.Pop(); - tmpChunks.push_back(idx); - } - for (ui32 i = 0; i < freeChunksCount; ++i) { - ui32 idx = tmpChunks.back(); - tmpChunks.pop_back(); - if (begin <= idx && idx < end) { - ChunkState[idx].OwnerId = OwnerLocked; - ChunkState[idx].CommitState = TChunkState::FREE; - lockedChunks.push_back(idx); - Mon.LockedChunks->Inc(); - } else { - freeChunks.Push(idx); - } - } -} - + tmpChunks.reserve(freeChunksCount); + for (ui32 i = 0; i < freeChunksCount; ++i) { + ui32 idx = freeChunks.Pop(); + tmpChunks.push_back(idx); + } + for (ui32 i = 0; i < freeChunksCount; ++i) { + ui32 idx = tmpChunks.back(); + tmpChunks.pop_back(); + if (begin <= idx && idx < end) { + ChunkState[idx].OwnerId = OwnerLocked; + ChunkState[idx].CommitState = TChunkState::FREE; + lockedChunks.push_back(idx); + Mon.LockedChunks->Inc(); + } else { + freeChunks.Push(idx); + } + } +} + void TPDisk::ChunksLockByNumber(ui32 begin, ui32 count, TVector<ui32> &lockedChunks) { - begin = begin > 0 ? begin : 1; + begin = begin > 0 ? begin : 1; TVector<ui32> tmpChunks; tmpChunks.reserve(Keeper.GetFreeChunkCount()); TString errorReason; while (ui32 idx = Keeper.PopFreeChunkHack(errorReason)) { - tmpChunks.push_back(idx); - } - Sort(tmpChunks.begin(), tmpChunks.end()); - for (ui32 i = 0; i < tmpChunks.size(); ++i) { - ui32 idx = tmpChunks[i]; - if (begin <= idx && lockedChunks.size() < count) { - ChunkState[idx].OwnerId = OwnerLocked; - ChunkState[idx].CommitState = TChunkState::FREE; - lockedChunks.push_back(idx); - Mon.LockedChunks->Inc(); - } else { + tmpChunks.push_back(idx); + } + Sort(tmpChunks.begin(), tmpChunks.end()); + for (ui32 i = 0; i < tmpChunks.size(); ++i) { + ui32 idx = tmpChunks[i]; + if (begin <= idx && lockedChunks.size() < count) { + ChunkState[idx].OwnerId = OwnerLocked; + ChunkState[idx].CommitState = TChunkState::FREE; + lockedChunks.push_back(idx); + Mon.LockedChunks->Inc(); + } else { Keeper.PushFreeChunkHack(idx); - } - } -} - -void TPDisk::ChunksLock(TChunksLock &evChunksLock) { + } + } +} + +void TPDisk::ChunksLock(TChunksLock &evChunksLock) { TVector<ui32> lockedChunks; - if (evChunksLock.LockByRange) { - if (evChunksLock.Begin < evChunksLock.End) { - lockedChunks.reserve(evChunksLock.End - evChunksLock.Begin); - } - } else { - lockedChunks.reserve(evChunksLock.Count); - } - TGuard<TMutex> guard(StateMutex); - if (evChunksLock.LockByRange) { + if (evChunksLock.LockByRange) { + if (evChunksLock.Begin < evChunksLock.End) { + lockedChunks.reserve(evChunksLock.End - evChunksLock.Begin); + } + } else { + lockedChunks.reserve(evChunksLock.Count); + } + TGuard<TMutex> guard(StateMutex); + if (evChunksLock.LockByRange) { // TODO(cthulhu): Implement lock by range for owned chunks //ChunksLockByRange(TrimmedFreeChunks, evChunksLock.Begin, evChunksLock.End, lockedChunks); //ChunksLockByRange(UntrimmedFreeChunks, evChunksLock.Begin, evChunksLock.End, lockedChunks); - LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " Locked %" PRIu32 \ - " chunks in range [%" PRIu32 ", %" PRIu32 ")", (ui32)PDiskId, (ui32)lockedChunks.size(), - (ui32)evChunksLock.Begin, (ui32)evChunksLock.End); - } else { - ChunksLockByNumber(evChunksLock.Begin, evChunksLock.Count, lockedChunks); - LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " Locked %" PRIu32 \ - " chunks starting from %" PRIu32 "", (ui32)PDiskId, (ui32)lockedChunks.size(), (ui32)evChunksLock.Begin); - } + LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " Locked %" PRIu32 \ + " chunks in range [%" PRIu32 ", %" PRIu32 ")", (ui32)PDiskId, (ui32)lockedChunks.size(), + (ui32)evChunksLock.Begin, (ui32)evChunksLock.End); + } else { + ChunksLockByNumber(evChunksLock.Begin, evChunksLock.Count, lockedChunks); + LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " Locked %" PRIu32 \ + " chunks starting from %" PRIu32 "", (ui32)PDiskId, (ui32)lockedChunks.size(), (ui32)evChunksLock.Begin); + } ui32 availableChunkCount = Keeper.GetFreeChunkCount(); - guard.Release(); - ActorSystem->Send(evChunksLock.Sender, new NPDisk::TEvChunksLockResult(NKikimrProto::OK, lockedChunks, - availableChunkCount)); -} - -void TPDisk::ChunksUnlock(TChunksUnlock &evChunksUnlock) { - TGuard<TMutex> guard(StateMutex); - ui32 unlockedChunks = 0; - for (ui32 chunkIdx = 0; chunkIdx < ChunkState.size(); ++chunkIdx) { - if (ChunkState[chunkIdx].OwnerId == OwnerLocked) { - TChunkState &state = ChunkState[chunkIdx]; - state.OwnerId = OwnerUnallocated; - state.CommitState = TChunkState::FREE; - Mon.LockedChunks->Dec(); + guard.Release(); + ActorSystem->Send(evChunksLock.Sender, new NPDisk::TEvChunksLockResult(NKikimrProto::OK, lockedChunks, + availableChunkCount)); +} + +void TPDisk::ChunksUnlock(TChunksUnlock &evChunksUnlock) { + TGuard<TMutex> guard(StateMutex); + ui32 unlockedChunks = 0; + for (ui32 chunkIdx = 0; chunkIdx < ChunkState.size(); ++chunkIdx) { + if (ChunkState[chunkIdx].OwnerId == OwnerLocked) { + TChunkState &state = ChunkState[chunkIdx]; + state.OwnerId = OwnerUnallocated; + state.CommitState = TChunkState::FREE; + Mon.LockedChunks->Dec(); Keeper.PushFreeChunkHack(chunkIdx); - } - ++unlockedChunks; - } - guard.Release(); - LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " Unlocked %" PRIu32 "", - (ui32)PDiskId, (ui32)unlockedChunks); - ActorSystem->Send(evChunksUnlock.Sender, new NPDisk::TEvChunksUnlockResult(NKikimrProto::OK, unlockedChunks)); - return; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + } + ++unlockedChunks; + } + guard.Release(); + LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " Unlocked %" PRIu32 "", + (ui32)PDiskId, (ui32)unlockedChunks); + ActorSystem->Send(evChunksUnlock.Sender, new NPDisk::TEvChunksUnlockResult(NKikimrProto::OK, unlockedChunks)); + return; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Chunk reservation //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -TVector<TChunkIdx> TPDisk::AllocateChunkForOwner(const TRequestBase *req, const ui32 count, TString &errorReason) { - // chunkIdx = 0 is deprecated and will not be soon removed +TVector<TChunkIdx> TPDisk::AllocateChunkForOwner(const TRequestBase *req, const ui32 count, TString &errorReason) { + // chunkIdx = 0 is deprecated and will not be soon removed TGuard<TMutex> guard(StateMutex); - Y_VERIFY_DEBUG(IsOwnerUser(req->Owner)); - - const ui32 sharedFree = Keeper.GetFreeChunkCount() - 1; - i64 ownerFree = Keeper.GetOwnerFree(req->Owner); - auto color = Keeper.EstimateSpaceColor(req->Owner, count); - - auto makeError = [&](TString info) { + Y_VERIFY_DEBUG(IsOwnerUser(req->Owner)); + + const ui32 sharedFree = Keeper.GetFreeChunkCount() - 1; + i64 ownerFree = Keeper.GetOwnerFree(req->Owner); + auto color = Keeper.EstimateSpaceColor(req->Owner, count); + + auto makeError = [&](TString info) { guard.Release(); TStringStream str; - str << "PDiskId# " << PDiskId - << " Can't reserve " << count << " chunks" - << " for ownerId# " << req->Owner - << " sharedFree# " << sharedFree - << " ownerFree# " << ownerFree - << " estimatedColor after allocation# " << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(color) - << " " << info + str << "PDiskId# " << PDiskId + << " Can't reserve " << count << " chunks" + << " for ownerId# " << req->Owner + << " sharedFree# " << sharedFree + << " ownerFree# " << ownerFree + << " estimatedColor after allocation# " << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(color) + << " " << info << " Marker# BPD20"; - errorReason = str.Str(); - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, str.Str()); - }; - - if (sharedFree <= count || color == NKikimrBlobStorage::TPDiskSpaceColor::BLACK) { - makeError(""); - return {}; - } - - TVector<TChunkIdx> chunks = Keeper.PopOwnerFreeChunks(req->Owner, count, errorReason); - if (chunks.empty()) { - makeError("PopOwnerFreeChunks failed"); - return {}; - } - - const ui32 dataChunkSizeSectors = Format.ChunkSize / Format.SectorSize; - for (TChunkIdx chunkIdx : chunks) { + errorReason = str.Str(); + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, str.Str()); + }; + + if (sharedFree <= count || color == NKikimrBlobStorage::TPDiskSpaceColor::BLACK) { + makeError(""); + return {}; + } + + TVector<TChunkIdx> chunks = Keeper.PopOwnerFreeChunks(req->Owner, count, errorReason); + if (chunks.empty()) { + makeError("PopOwnerFreeChunks failed"); + return {}; + } + + const ui32 dataChunkSizeSectors = Format.ChunkSize / Format.SectorSize; + for (TChunkIdx chunkIdx : chunks) { ui64 chunkNonce = SysLogRecord.Nonces.Value[NonceData]; SysLogRecord.Nonces.Value[NonceData] += dataChunkSizeSectors; - OnNonceChange(NonceData, req->ReqId, &req->TraceId); + OnNonceChange(NonceData, req->ReqId, &req->TraceId); // Remember who owns the sector, save chunk Nonce in order to be able to continue writing the chunk TChunkState &state = ChunkState[chunkIdx]; - Y_VERIFY_S(state.OwnerId == OwnerUnallocated || state.OwnerId == OwnerUnallocatedTrimmed, - "PDiskId# " << PDiskId << " chunkIdx# " << chunkIdx << " state# " << state.ToString()); + Y_VERIFY_S(state.OwnerId == OwnerUnallocated || state.OwnerId == OwnerUnallocatedTrimmed, + "PDiskId# " << PDiskId << " chunkIdx# " << chunkIdx << " state# " << state.ToString()); state.Nonce = chunkNonce; state.CurrentNonce = chunkNonce; - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId << " chunkIdx# " << chunkIdx << - " allocated, ownerId# " << state.OwnerId << " -> " << req->Owner); - state.OwnerId = req->Owner; - state.CommitState = TChunkState::DATA_RESERVED; + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId << " chunkIdx# " << chunkIdx << + " allocated, ownerId# " << state.OwnerId << " -> " << req->Owner); + state.OwnerId = req->Owner; + state.CommitState = TChunkState::DATA_RESERVED; Mon.UncommitedDataChunks->Inc(); } - return chunks; -} - -void TPDisk::ChunkReserve(TChunkReserve &evChunkReserve) { - TStringStream errorReason; - TGuard<TMutex> guard(StateMutex); - - THolder<NPDisk::TEvChunkReserveResult> result; - TString allocateError; - TVector<TChunkIdx> chunks = AllocateChunkForOwner(&evChunkReserve, evChunkReserve.SizeChunks, allocateError); - errorReason << allocateError; - - if (chunks.empty()) { + return chunks; +} + +void TPDisk::ChunkReserve(TChunkReserve &evChunkReserve) { + TStringStream errorReason; + TGuard<TMutex> guard(StateMutex); + + THolder<NPDisk::TEvChunkReserveResult> result; + TString allocateError; + TVector<TChunkIdx> chunks = AllocateChunkForOwner(&evChunkReserve, evChunkReserve.SizeChunks, allocateError); + errorReason << allocateError; + + if (chunks.empty()) { result = MakeHolder<NPDisk::TEvChunkReserveResult>(NKikimrProto::OUT_OF_SPACE, - NotEnoughDiskSpaceStatusFlags(evChunkReserve.Owner, evChunkReserve.OwnerGroupType), - errorReason.Str()); - } else { + NotEnoughDiskSpaceStatusFlags(evChunkReserve.Owner, evChunkReserve.OwnerGroupType), + errorReason.Str()); + } else { result = MakeHolder<NPDisk::TEvChunkReserveResult>(NKikimrProto::OK, 0); - result->ChunkIds = std::move(chunks); - result->StatusFlags = GetStatusFlags(evChunkReserve.Owner, evChunkReserve.OwnerGroupType); - } - + result->ChunkIds = std::move(chunks); + result->StatusFlags = GetStatusFlags(evChunkReserve.Owner, evChunkReserve.OwnerGroupType); + } + guard.Release(); ActorSystem->Send(evChunkReserve.Sender, result.Release()); - Mon.ChunkReserve.CountResponse(); + Mon.ChunkReserve.CountResponse(); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Report to Whiteboard -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -void TPDisk::WhiteboardReport(TWhiteboardReport &whiteboardReport) { - TEvWhiteboardReportResult *reportResult = whiteboardReport.Response.Release(); - { - TGuard<TMutex> guard(StateMutex); - const ui64 totalSize = Format.DiskSize; - const ui64 availableSize = (ui64)Format.ChunkSize * Keeper.GetFreeChunkCount(); - *Mon.FreeSpaceBytes = availableSize; - *Mon.UsedSpaceBytes = totalSize - *Mon.FreeSpaceBytes; - NKikimrWhiteboard::TPDiskStateInfo& pdiskState = reportResult->PDiskState->Record; - pdiskState.SetPDiskId(PDiskId); - pdiskState.SetPath(Cfg->GetDevicePath()); - pdiskState.SetSerialNumber(Cfg->ExpectedSerial); - pdiskState.SetAvailableSize(availableSize); - pdiskState.SetTotalSize(totalSize); - const auto& state = static_cast<NKikimrBlobStorage::TPDiskState::E>(Mon.PDiskState->Val()); - pdiskState.SetState(state); - +// Report to Whiteboard +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +void TPDisk::WhiteboardReport(TWhiteboardReport &whiteboardReport) { + TEvWhiteboardReportResult *reportResult = whiteboardReport.Response.Release(); + { + TGuard<TMutex> guard(StateMutex); + const ui64 totalSize = Format.DiskSize; + const ui64 availableSize = (ui64)Format.ChunkSize * Keeper.GetFreeChunkCount(); + *Mon.FreeSpaceBytes = availableSize; + *Mon.UsedSpaceBytes = totalSize - *Mon.FreeSpaceBytes; + NKikimrWhiteboard::TPDiskStateInfo& pdiskState = reportResult->PDiskState->Record; + pdiskState.SetPDiskId(PDiskId); + pdiskState.SetPath(Cfg->GetDevicePath()); + pdiskState.SetSerialNumber(Cfg->ExpectedSerial); + pdiskState.SetAvailableSize(availableSize); + pdiskState.SetTotalSize(totalSize); + const auto& state = static_cast<NKikimrBlobStorage::TPDiskState::E>(Mon.PDiskState->Val()); + pdiskState.SetState(state); + reportResult->DiskMetrics = MakeHolder<TEvBlobStorage::TEvControllerUpdateDiskStatus>(); - i64 minSlotSize = Max<i64>(); - for (const auto& [vdiskId, owner] : VDiskOwners) { - const TOwnerData &data = OwnerData[owner]; - // May be less than 0 if owner exceeded his quota - i64 ownerFree = Max<i64>(0, Keeper.GetOwnerFree(owner)) * Format.ChunkSize; - i64 ownerAllocated = (i64)Keeper.GetOwnerUsed(owner) * Format.ChunkSize; - minSlotSize = Min(minSlotSize, Keeper.GetOwnerHardLimit(owner) * Format.ChunkSize); + i64 minSlotSize = Max<i64>(); + for (const auto& [vdiskId, owner] : VDiskOwners) { + const TOwnerData &data = OwnerData[owner]; + // May be less than 0 if owner exceeded his quota + i64 ownerFree = Max<i64>(0, Keeper.GetOwnerFree(owner)) * Format.ChunkSize; + i64 ownerAllocated = (i64)Keeper.GetOwnerUsed(owner) * Format.ChunkSize; + minSlotSize = Min(minSlotSize, Keeper.GetOwnerHardLimit(owner) * Format.ChunkSize); reportResult->VDiskStateVect.emplace_back(data.WhiteboardProxyId, NKikimrWhiteboard::TVDiskStateInfo()); - auto& vdiskInfo = std::get<1>(reportResult->VDiskStateVect.back()); - vdiskInfo.SetAvailableSize(ownerFree); - vdiskInfo.SetAllocatedSize(ownerAllocated); - - NKikimrBlobStorage::TVDiskMetrics* vdiskMetrics = reportResult->DiskMetrics->Record.AddVDisksMetrics(); - VDiskIDFromVDiskID(vdiskId, vdiskMetrics->MutableVDiskId()); - vdiskMetrics->MutableVDiskId()->ClearGroupGeneration(); - vdiskMetrics->SetAvailableSize(ownerFree); - vdiskMetrics->SetAllocatedSize(ownerAllocated); - vdiskMetrics->SetStatusFlags(Keeper.GetSpaceStatusFlags(owner)); + auto& vdiskInfo = std::get<1>(reportResult->VDiskStateVect.back()); + vdiskInfo.SetAvailableSize(ownerFree); + vdiskInfo.SetAllocatedSize(ownerAllocated); + + NKikimrBlobStorage::TVDiskMetrics* vdiskMetrics = reportResult->DiskMetrics->Record.AddVDisksMetrics(); + VDiskIDFromVDiskID(vdiskId, vdiskMetrics->MutableVDiskId()); + vdiskMetrics->MutableVDiskId()->ClearGroupGeneration(); + vdiskMetrics->SetAvailableSize(ownerFree); + vdiskMetrics->SetAllocatedSize(ownerAllocated); + vdiskMetrics->SetStatusFlags(Keeper.GetSpaceStatusFlags(owner)); auto *vslotId = vdiskMetrics->MutableVSlotId(); vslotId->SetNodeId(ActorSystem->NodeId); vslotId->SetPDiskId(PDiskId); vslotId->SetVSlotId(data.VDiskSlotId); - } + } NKikimrBlobStorage::TPDiskMetrics& pDiskMetrics = *reportResult->DiskMetrics->Record.AddPDisksMetrics(); pDiskMetrics.SetPDiskId(PDiskId); pDiskMetrics.SetTotalSize(Format.DiskSize); @@ -1218,114 +1218,114 @@ void TPDisk::WhiteboardReport(TWhiteboardReport &whiteboardReport) { pDiskMetrics.SetNonRealTimeMs(AtomicGet(NonRealTimeMs)); pDiskMetrics.SetSlowDeviceMs(Max((ui64)AtomicGet(SlowDeviceMs), (ui64)*Mon.DeviceNonperformanceMs)); pDiskMetrics.SetMaxIOPS(DriveModel.IOPS()); - if (minSlotSize != Max<i64>()) { - pDiskMetrics.SetEnforcedDynamicSlotSize(minSlotSize); - } + if (minSlotSize != Max<i64>()) { + pDiskMetrics.SetEnforcedDynamicSlotSize(minSlotSize); + } pDiskMetrics.SetState(state); - } - - ActorSystem->Send(whiteboardReport.Sender, reportResult); - // Update VDisk's state Solomon metrics - - i64 atLeastOneVDiskNotLogged = 0; - for (const TOwnerData& data : OwnerData) { - if (data.VDiskId != TVDiskID::InvalidId && data.Status != TOwnerData::VDISK_STATUS_LOGGED) { - atLeastOneVDiskNotLogged = 1; - break; - } - } - *Mon.AtLeastOneVDiskNotLogged = atLeastOneVDiskNotLogged; - if (Cfg->SectorMap) { - *Mon.SectorMapAllocatedBytes = Cfg->SectorMap->AllocatedBytes.load(); - } - -} - -void TPDisk::EventUndelivered(TUndelivered &req) { - switch (req.Event->SourceType) { - case TEvCutLog::EventType: - { + } + + ActorSystem->Send(whiteboardReport.Sender, reportResult); + // Update VDisk's state Solomon metrics + + i64 atLeastOneVDiskNotLogged = 0; + for (const TOwnerData& data : OwnerData) { + if (data.VDiskId != TVDiskID::InvalidId && data.Status != TOwnerData::VDISK_STATUS_LOGGED) { + atLeastOneVDiskNotLogged = 1; + break; + } + } + *Mon.AtLeastOneVDiskNotLogged = atLeastOneVDiskNotLogged; + if (Cfg->SectorMap) { + *Mon.SectorMapAllocatedBytes = Cfg->SectorMap->AllocatedBytes.load(); + } + +} + +void TPDisk::EventUndelivered(TUndelivered &req) { + switch (req.Event->SourceType) { + case TEvCutLog::EventType: + { for (ui32 i = OwnerBeginUser; i < OwnerEndUser; ++i) { - if (OwnerData[i].CutLogId == req.Sender) { - LOG_CRIT_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << " TEvCutLog was undelivered to VDiskId# " << OwnerData[i].VDiskId.ToStringWOGeneration() + if (OwnerData[i].CutLogId == req.Sender) { + LOG_CRIT_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << " TEvCutLog was undelivered to VDiskId# " << OwnerData[i].VDiskId.ToStringWOGeneration() << " Marker# BPD24"); - return; - } - - } - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + return; + } + + } + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId << " TEvCutLog was undelivered to unknown VDisk Marker# BPD25"); - return; - } - default: - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + return; + } + default: + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId << "Event#" << req.Event->ToString() << " was undelivered to ActorID# " << req.Sender << " Marker# BPD26"); - return; - } -} + return; + } +} void TPDisk::CommitLogChunks(TCommitLogChunks &req) { TGuard<TMutex> guard(StateMutex); for (auto it = req.CommitedLogChunks.begin(); it != req.CommitedLogChunks.end(); ++it) { Y_VERIFY_S(ChunkState[*it].OwnerId == OwnerSystem, "Unexpected chunkIdx# " << *it << " ownerId# " << (ui32)ChunkState[*it].OwnerId << " in CommitLogChunks PDiskId# " << PDiskId); - Y_VERIFY_DEBUG(ChunkState[*it].CommitState == TChunkState::LOG_RESERVED); - ChunkState[*it].CommitState = TChunkState::LOG_COMMITTED; + Y_VERIFY_DEBUG(ChunkState[*it].CommitState == TChunkState::LOG_RESERVED); + ChunkState[*it].CommitState = TChunkState::LOG_COMMITTED; } } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // PDisk formatting //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void TPDisk::WriteApplyFormatRecord(TDiskFormat format, const TKey &mainKey) { - // Use temporal copy of format to restore from that copy because we can damage 'format' variable - // by writing Magic in first bytes - const TDiskFormat originalFormat = format; + // Use temporal copy of format to restore from that copy because we can damage 'format' variable + // by writing Magic in first bytes + const TDiskFormat originalFormat = format; Format = format; Format.ChunkSize = FormatSectorSize * ReplicationFactor; Format.SectorSize = FormatSectorSize; - { - + { + // Encrypt chunk0 format record using mainKey - ui64 nonce = 1; + ui64 nonce = 1; bool encrypt = true; // Always write encrypter format because some tests use wrong main key to initiate errors TSysLogWriter formatWriter(Mon, *BlockDevice.Get(), Format, nonce, mainKey, BufferPool.Get(), - 0, ReplicationFactor, Format.MagicFormatChunk, 0, nullptr, 0, nullptr, ActorSystem, PDiskId, - &DriveModel, Cfg->UseT1ha0HashInFooter, encrypt); - - if (format.IsFormatInProgress()) { - // Fill first bytes with magic pattern - ui64 *formatBegin = reinterpret_cast<ui64*>(&format); - ui64 *formatMagicEnd = reinterpret_cast<ui64*>((ui8*)&format + MagicIncompleteFormatSize); - Y_VERIFY((ui8*)formatMagicEnd - (ui8*)formatBegin <= (intptr_t)sizeof(format)); - Fill(formatBegin, formatMagicEnd, MagicIncompleteFormat); - } + 0, ReplicationFactor, Format.MagicFormatChunk, 0, nullptr, 0, nullptr, ActorSystem, PDiskId, + &DriveModel, Cfg->UseT1ha0HashInFooter, encrypt); + + if (format.IsFormatInProgress()) { + // Fill first bytes with magic pattern + ui64 *formatBegin = reinterpret_cast<ui64*>(&format); + ui64 *formatMagicEnd = reinterpret_cast<ui64*>((ui8*)&format + MagicIncompleteFormatSize); + Y_VERIFY((ui8*)formatMagicEnd - (ui8*)formatBegin <= (intptr_t)sizeof(format)); + Fill(formatBegin, formatMagicEnd, MagicIncompleteFormat); + } formatWriter.Write(&format, sizeof(TDiskFormat), TReqId(TReqId::WriteApplyFormatRecordWrite, 0), {}); - TSignalEvent doneEvent; + TSignalEvent doneEvent; formatWriter.Flush(TReqId(TReqId::WriteApplyFormatRecordFlush, 0), {}, - new TCompletionSignal(&doneEvent)); - doneEvent.WaitI(); - } - - Format = originalFormat; + new TCompletionSignal(&doneEvent)); + doneEvent.WaitI(); + } + + Format = originalFormat; } -void TPDisk::WriteDiskFormat(ui64 diskSizeBytes, ui32 sectorSizeBytes, ui32 userAccessibleChunkSizeBytes, +void TPDisk::WriteDiskFormat(ui64 diskSizeBytes, ui32 sectorSizeBytes, ui32 userAccessibleChunkSizeBytes, const ui64 &diskGuid, const TKey &chunkKey, const TKey &logKey, const TKey &sysLogKey, const TKey &mainKey, - TString textMessage, const bool isErasureEncodeUserLog, const bool trimEntireDevice) { + TString textMessage, const bool isErasureEncodeUserLog, const bool trimEntireDevice) { TGuard<TMutex> guard(StateMutex); // Prepare format record alignas(16) TDiskFormat format; format.Clear(); format.DiskSize = diskSizeBytes; format.SectorSize = sectorSizeBytes; - ui64 erasureFlags = FormatFlagErasureEncodeUserLog; - format.FormatFlags = (format.FormatFlags & (~erasureFlags)) | (isErasureEncodeUserLog ? erasureFlags : 0); + ui64 erasureFlags = FormatFlagErasureEncodeUserLog; + format.FormatFlags = (format.FormatFlags & (~erasureFlags)) | (isErasureEncodeUserLog ? erasureFlags : 0); format.ChunkSize = SystemChunkSize(format, userAccessibleChunkSizeBytes, sectorSizeBytes); format.Guid = diskGuid; format.ChunkKey = chunkKey; @@ -1340,29 +1340,29 @@ void TPDisk::WriteDiskFormat(ui64 diskSizeBytes, ui32 sectorSizeBytes, ui32 user // Check disk size { ui32 diskSizeChunks = format.DiskSizeChunks(); - Y_VERIFY_S(diskSizeChunks > format.SystemChunkCount + 2, - "Incorrect disk parameters! Total chunks# " << diskSizeChunks - << ", System chunks needed# " << format.SystemChunkCount << ", cant run with < 3 free chunks!" - << " Debug format# " << format.ToString()); + Y_VERIFY_S(diskSizeChunks > format.SystemChunkCount + 2, + "Incorrect disk parameters! Total chunks# " << diskSizeChunks + << ", System chunks needed# " << format.SystemChunkCount << ", cant run with < 3 free chunks!" + << " Debug format# " << format.ToString()); } - // Trim the entire device - if (trimEntireDevice && DriveModel.IsTrimSupported()) { + // Trim the entire device + if (trimEntireDevice && DriveModel.IsTrimSupported()) { LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "Trim of the entire device started Marker# BPD28"); - NHPTimer::STime start = HPNow(); - TReqId reqId(TReqId::FormatTrim, AtomicIncrement(ReqCreator.LastReqId)); - BlockDevice->TrimSync(diskSizeBytes, 0); - for (ui32 i = 0; i < ChunkState.size(); ++i) { - if (ChunkState[i].OwnerId == OwnerUnallocated) { - ChunkState[i].OwnerId = OwnerUnallocatedTrimmed; - } - } - double trimDurationSec = HPSecondsFloat(HPNow() - start); - LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "Trim of the entire device done, spent " << + NHPTimer::STime start = HPNow(); + TReqId reqId(TReqId::FormatTrim, AtomicIncrement(ReqCreator.LastReqId)); + BlockDevice->TrimSync(diskSizeBytes, 0); + for (ui32 i = 0; i < ChunkState.size(); ++i) { + if (ChunkState[i].OwnerId == OwnerUnallocated) { + ChunkState[i].OwnerId = OwnerUnallocatedTrimmed; + } + } + double trimDurationSec = HPSecondsFloat(HPNow() - start); + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "Trim of the entire device done, spent " << trimDurationSec << " seconds, trim speed " << diskSizeBytes / (1u << 20) / trimDurationSec << " MiB/s" << " Marker# BPD29"); - } - - // Write and apply format record with magic in first bytes + } + + // Write and apply format record with magic in first bytes format.SetFormatInProgress(true); format.SetHash(); WriteApplyFormatRecord(format, mainKey); @@ -1370,7 +1370,7 @@ void TPDisk::WriteDiskFormat(ui64 diskSizeBytes, ui32 sectorSizeBytes, ui32 user // Prepare initial SysLogRecord memset(&SysLogRecord, 0, sizeof(SysLogRecord)); SysLogRecord.LogHeadChunkIdx = format.SystemChunkCount; - FirstLogChunkToParseCommits = format.SystemChunkCount; + FirstLogChunkToParseCommits = format.SystemChunkCount; for (ui32 i = 0; i < NonceCount; ++i) { SysLogRecord.Nonces.Value[i] = 1; LoggedNonces.Value[i] = 1; @@ -1381,18 +1381,18 @@ void TPDisk::WriteDiskFormat(ui64 diskSizeBytes, ui32 sectorSizeBytes, ui32 user } // Fill the cyclic log with initial SysLogRecords - SysLogger.Reset(new TSysLogWriter(Mon, *BlockDevice.Get(), Format, SysLogRecord.Nonces.Value[NonceSysLog], - Format.SysLogKey, BufferPool.Get(), firstSectorIdx, endSectorIdx, Format.MagicSysLogChunk, 0, - nullptr, firstSectorIdx, nullptr, ActorSystem, PDiskId, &DriveModel, Cfg->UseT1ha0HashInFooter, - Cfg->EnableSectorEncryption)); + SysLogger.Reset(new TSysLogWriter(Mon, *BlockDevice.Get(), Format, SysLogRecord.Nonces.Value[NonceSysLog], + Format.SysLogKey, BufferPool.Get(), firstSectorIdx, endSectorIdx, Format.MagicSysLogChunk, 0, + nullptr, firstSectorIdx, nullptr, ActorSystem, PDiskId, &DriveModel, Cfg->UseT1ha0HashInFooter, + Cfg->EnableSectorEncryption)); bool isFull = false; - while (!isFull) { - ui64 sectorIdx = SysLogger->SectorIdx; - TSignalEvent doneEvent; - WriteSysLogRestorePoint(new TCompletionSignal(&doneEvent), TReqId(TReqId::FormatFillSysLog, 0), {}); - doneEvent.WaitI(); - isFull = SysLogger->SectorIdx < sectorIdx; + while (!isFull) { + ui64 sectorIdx = SysLogger->SectorIdx; + TSignalEvent doneEvent; + WriteSysLogRestorePoint(new TCompletionSignal(&doneEvent), TReqId(TReqId::FormatFillSysLog, 0), {}); + doneEvent.WaitI(); + isFull = SysLogger->SectorIdx < sectorIdx; } // Write and apply format record @@ -1406,163 +1406,163 @@ void TPDisk::WriteDiskFormat(ui64 diskSizeBytes, ui32 sectorSizeBytes, ui32 user //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void TPDisk::ReplyErrorYardInitResult(TYardInit &evYardInit, const TString &str) { - TStringStream error; - error << "PDiskId# " << PDiskId << " YardInit error for VDiskId# " << evYardInit.VDisk.ToStringWOGeneration() - << " reason# " << str; - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, error.Str()); + TStringStream error; + error << "PDiskId# " << PDiskId << " YardInit error for VDiskId# " << evYardInit.VDisk.ToStringWOGeneration() + << " reason# " << str; + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, error.Str()); ui64 writeBlockSize = ForsetiOpPieceSizeCached; ui64 readBlockSize = ForsetiOpPieceSizeCached; ActorSystem->Send(evYardInit.Sender, new NPDisk::TEvYardInitResult(NKikimrProto::ERROR, - DriveModel.SeekTimeNs() / 1000ull, DriveModel.Speed(TDriveModel::OP_TYPE_READ), + DriveModel.SeekTimeNs() / 1000ull, DriveModel.Speed(TDriveModel::OP_TYPE_READ), DriveModel.Speed(TDriveModel::OP_TYPE_WRITE), readBlockSize, writeBlockSize, DriveModel.BulkWriteBlockSize(), GetUserAccessibleChunkSize(), GetChunkAppendBlockSize(), OwnerSystem, 0, - GetStatusFlags(OwnerSystem, evYardInit.OwnerGroupType), TVector<TChunkIdx>(), error.Str())); + GetStatusFlags(OwnerSystem, evYardInit.OwnerGroupType), TVector<TChunkIdx>(), error.Str())); Mon.YardInit.CountResponse(); } -TOwner TPDisk::FindNextOwnerId() { - const TOwner start = LastOwnerId; - do { - ++LastOwnerId; - Mon.OwnerIdsIssued->Inc(); - if (LastOwnerId == OwnerEndUser) { - LastOwnerId = OwnerBeginUser; - } - if (LastOwnerId == start) { - return 0; - } - } while (OwnerData[LastOwnerId].VDiskId != TVDiskID::InvalidId); - - *Mon.LastOwnerId = LastOwnerId; - return LastOwnerId; -} - -bool TPDisk::YardInitForKnownVDisk(TYardInit &evYardInit, TOwner owner) { - // Just register cut log id and reply with starting points. - TVDiskID vDiskId = evYardInit.VDiskIdWOGeneration(); - - TOwnerData &ownerData = OwnerData[owner]; - - ownerData.OwnerRound = evYardInit.OwnerRound; - TOwnerRound ownerRound = evYardInit.OwnerRound; - TVector<TChunkIdx> ownedChunks; - ownedChunks.reserve(ChunkState.size()); - for (TChunkIdx chunkId = 0; chunkId < ChunkState.size(); ++chunkId) { - if (ChunkState[chunkId].OwnerId == owner) { - ownedChunks.push_back(chunkId); - } - } - ui64 writeBlockSize = ForsetiOpPieceSizeCached; - ui64 readBlockSize = ForsetiOpPieceSizeCached; - THolder<NPDisk::TEvYardInitResult> result(new NPDisk::TEvYardInitResult(NKikimrProto::OK, - DriveModel.SeekTimeNs() / 1000ull, DriveModel.Speed(TDriveModel::OP_TYPE_READ), - DriveModel.Speed(TDriveModel::OP_TYPE_WRITE), readBlockSize, writeBlockSize, - DriveModel.BulkWriteBlockSize(), GetUserAccessibleChunkSize(), GetChunkAppendBlockSize(), owner, - ownerRound, GetStatusFlags(OwnerSystem, evYardInit.OwnerGroupType), ownedChunks, nullptr)); - GetStartingPoints(owner, result->StartingPoints); - ownerData.VDiskId = vDiskId; - ownerData.CutLogId = evYardInit.CutLogId; +TOwner TPDisk::FindNextOwnerId() { + const TOwner start = LastOwnerId; + do { + ++LastOwnerId; + Mon.OwnerIdsIssued->Inc(); + if (LastOwnerId == OwnerEndUser) { + LastOwnerId = OwnerBeginUser; + } + if (LastOwnerId == start) { + return 0; + } + } while (OwnerData[LastOwnerId].VDiskId != TVDiskID::InvalidId); + + *Mon.LastOwnerId = LastOwnerId; + return LastOwnerId; +} + +bool TPDisk::YardInitForKnownVDisk(TYardInit &evYardInit, TOwner owner) { + // Just register cut log id and reply with starting points. + TVDiskID vDiskId = evYardInit.VDiskIdWOGeneration(); + + TOwnerData &ownerData = OwnerData[owner]; + + ownerData.OwnerRound = evYardInit.OwnerRound; + TOwnerRound ownerRound = evYardInit.OwnerRound; + TVector<TChunkIdx> ownedChunks; + ownedChunks.reserve(ChunkState.size()); + for (TChunkIdx chunkId = 0; chunkId < ChunkState.size(); ++chunkId) { + if (ChunkState[chunkId].OwnerId == owner) { + ownedChunks.push_back(chunkId); + } + } + ui64 writeBlockSize = ForsetiOpPieceSizeCached; + ui64 readBlockSize = ForsetiOpPieceSizeCached; + THolder<NPDisk::TEvYardInitResult> result(new NPDisk::TEvYardInitResult(NKikimrProto::OK, + DriveModel.SeekTimeNs() / 1000ull, DriveModel.Speed(TDriveModel::OP_TYPE_READ), + DriveModel.Speed(TDriveModel::OP_TYPE_WRITE), readBlockSize, writeBlockSize, + DriveModel.BulkWriteBlockSize(), GetUserAccessibleChunkSize(), GetChunkAppendBlockSize(), owner, + ownerRound, GetStatusFlags(OwnerSystem, evYardInit.OwnerGroupType), ownedChunks, nullptr)); + GetStartingPoints(owner, result->StartingPoints); + ownerData.VDiskId = vDiskId; + ownerData.CutLogId = evYardInit.CutLogId; ownerData.WhiteboardProxyId = evYardInit.WhiteboardProxyId; ownerData.VDiskSlotId = evYardInit.SlotId; - ownerData.LogRecordsConsequentlyRead = 0; - ownerData.LastSeenLsn = 0; - ownerData.HasAlreadyLoggedThisIncarnation = false; - ownerData.HasReadTheWholeLog = false; - ownerData.LogStartPosition = TLogPosition{0, 0}; - ownerData.Status = TOwnerData::VDISK_STATUS_SENT_INIT; - - AddCbsSet(owner); - - LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " registered known VDisk# " << vDiskId << " as ownerId# " << owner - << " ownerRound# " << ownerRound << " Marker# BPD30"); - - ActorSystem->Send(evYardInit.Sender, result.Release()); - Mon.YardInit.CountResponse(); - AskVDisksToCutLogs(owner, false); - return true; -} - -bool TPDisk::YardInitStart(TYardInit &evYardInit) { + ownerData.LogRecordsConsequentlyRead = 0; + ownerData.LastSeenLsn = 0; + ownerData.HasAlreadyLoggedThisIncarnation = false; + ownerData.HasReadTheWholeLog = false; + ownerData.LogStartPosition = TLogPosition{0, 0}; + ownerData.Status = TOwnerData::VDISK_STATUS_SENT_INIT; + + AddCbsSet(owner); + + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " registered known VDisk# " << vDiskId << " as ownerId# " << owner + << " ownerRound# " << ownerRound << " Marker# BPD30"); + + ActorSystem->Send(evYardInit.Sender, result.Release()); + Mon.YardInit.CountResponse(); + AskVDisksToCutLogs(owner, false); + return true; +} + +bool TPDisk::YardInitStart(TYardInit &evYardInit) { if (evYardInit.VDisk == TVDiskID::InvalidId) { - ReplyErrorYardInitResult(evYardInit, "VDisk == InvalidId. Marker# BPD03"); - return false; + ReplyErrorYardInitResult(evYardInit, "VDisk == InvalidId. Marker# BPD03"); + return false; } if (evYardInit.PDiskGuid != Format.Guid) { TStringStream str; - str << "incorrect guid. User-provided# " << evYardInit.PDiskGuid - << " on-disk# " << Format.Guid + str << "incorrect guid. User-provided# " << evYardInit.PDiskGuid + << " on-disk# " << Format.Guid << " Marker# BPD04"; ReplyErrorYardInitResult(evYardInit, str.Str()); - return false; + return false; } // Make sure owner round is always higher than the NextOwnerRound if (evYardInit.OwnerRound <= NextOwnerRound) { TStringStream str; - str << "requested OwnerRound# " << evYardInit.OwnerRound - << " <= minExpectedOwnerRound# " << NextOwnerRound + str << "requested OwnerRound# " << evYardInit.OwnerRound + << " <= minExpectedOwnerRound# " << NextOwnerRound << " OwnerRound seems to be older than the PDisk. Marker# BPD05"; ReplyErrorYardInitResult(evYardInit, str.Str()); - return false; + return false; } - TOwner owner; + TOwner owner; - TVDiskID vDiskId = evYardInit.VDiskIdWOGeneration(); - auto it = VDiskOwners.find(vDiskId); - if (it != VDiskOwners.end()) { - // Owner is already known, but use next ownerRound to decrease probability of errors - owner = it->second; - } else { - owner = FindNextOwnerId(); + TVDiskID vDiskId = evYardInit.VDiskIdWOGeneration(); + auto it = VDiskOwners.find(vDiskId); + if (it != VDiskOwners.end()) { + // Owner is already known, but use next ownerRound to decrease probability of errors + owner = it->second; + } else { + owner = FindNextOwnerId(); if (owner == 0) { - ReplyErrorYardInitResult(evYardInit, "owner limit is reached. Marker# BPD07"); - return false; - } - // TODO REPLY ERROR - TOwnerData &data = OwnerData[owner]; - Y_VERIFY_S(!data.HaveRequestsInFlight(), "owner# " << owner); - } - evYardInit.Owner = owner; - - TOwnerData &ownerData = OwnerData[owner]; - ui64 prevOwnerRound = ownerData.OwnerRound; - if (prevOwnerRound >= evYardInit.OwnerRound) { - TStringStream str; - str << "requested OwnerRound# " << evYardInit.OwnerRound - << " <= prevoiuslyUsedOwnerRound# " << prevOwnerRound - << " OwnerRound may never decrease and can only be used once for YardInit. Marker# BPD13"; - ReplyErrorYardInitResult(evYardInit, str.Str()); - return false; - } - - // Update round and wait for all pending requests of old owner to finish - ownerData.OwnerRound = evYardInit.OwnerRound; - return true; -} - -void TPDisk::YardInitFinish(TYardInit &evYardInit) { - TOwner owner = evYardInit.Owner; - TOwnerRound ownerRound = evYardInit.OwnerRound; - { - TGuard<TMutex> guard(StateMutex); - TVDiskID vDiskId = evYardInit.VDiskIdWOGeneration(); - - auto it = VDiskOwners.find(vDiskId); - if (it != VDiskOwners.end()) { - YardInitForKnownVDisk(evYardInit, it->second); + ReplyErrorYardInitResult(evYardInit, "owner limit is reached. Marker# BPD07"); + return false; + } + // TODO REPLY ERROR + TOwnerData &data = OwnerData[owner]; + Y_VERIFY_S(!data.HaveRequestsInFlight(), "owner# " << owner); + } + evYardInit.Owner = owner; + + TOwnerData &ownerData = OwnerData[owner]; + ui64 prevOwnerRound = ownerData.OwnerRound; + if (prevOwnerRound >= evYardInit.OwnerRound) { + TStringStream str; + str << "requested OwnerRound# " << evYardInit.OwnerRound + << " <= prevoiuslyUsedOwnerRound# " << prevOwnerRound + << " OwnerRound may never decrease and can only be used once for YardInit. Marker# BPD13"; + ReplyErrorYardInitResult(evYardInit, str.Str()); + return false; + } + + // Update round and wait for all pending requests of old owner to finish + ownerData.OwnerRound = evYardInit.OwnerRound; + return true; +} + +void TPDisk::YardInitFinish(TYardInit &evYardInit) { + TOwner owner = evYardInit.Owner; + TOwnerRound ownerRound = evYardInit.OwnerRound; + { + TGuard<TMutex> guard(StateMutex); + TVDiskID vDiskId = evYardInit.VDiskIdWOGeneration(); + + auto it = VDiskOwners.find(vDiskId); + if (it != VDiskOwners.end()) { + YardInitForKnownVDisk(evYardInit, it->second); return; } - - // Make sure owner round never decreases - // Allocate quota for the owner - // TODO(cthulhu): don't allocate more owners than expected - Keeper.AddOwner(owner, vDiskId); - - OwnerData[owner] = TOwnerData{}; - + + // Make sure owner round never decreases + // Allocate quota for the owner + // TODO(cthulhu): don't allocate more owners than expected + Keeper.AddOwner(owner, vDiskId); + + OwnerData[owner] = TOwnerData{}; + // A new owner is created. AtomicIncrement(TotalOwners); @@ -1574,15 +1574,15 @@ void TPDisk::YardInitFinish(TYardInit &evYardInit) { OwnerData[owner].VDiskSlotId = evYardInit.SlotId; OwnerData[owner].OwnerRound = evYardInit.OwnerRound; VDiskOwners[vDiskId] = owner; - OwnerData[owner].Status = TOwnerData::VDISK_STATUS_SENT_INIT; + OwnerData[owner].Status = TOwnerData::VDISK_STATUS_SENT_INIT; SysLogRecord.OwnerVDisks[owner] = vDiskId; ownerRound = OwnerData[owner].OwnerRound; AddCbsSet(owner); - LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " new owner is created. ownerId# " << owner - << " vDiskId# " << vDiskId.ToStringWOGeneration() + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " new owner is created. ownerId# " << owner + << " vDiskId# " << vDiskId.ToStringWOGeneration() << " FirstNonceToKeep# " << SysLogFirstNoncesToKeep.FirstNonceToKeep[owner] << " CutLogId# " << OwnerData[owner].CutLogId << " ownerRound# " << OwnerData[owner].OwnerRound @@ -1593,12 +1593,12 @@ void TPDisk::YardInitFinish(TYardInit &evYardInit) { ui64 writeBlockSize = ForsetiOpPieceSizeCached; ui64 readBlockSize = ForsetiOpPieceSizeCached; - + THolder<NPDisk::TEvYardInitResult> result(new NPDisk::TEvYardInitResult( NKikimrProto::OK, - DriveModel.SeekTimeNs() / 1000ull, DriveModel.Speed(TDriveModel::OP_TYPE_READ), + DriveModel.SeekTimeNs() / 1000ull, DriveModel.Speed(TDriveModel::OP_TYPE_READ), DriveModel.Speed(TDriveModel::OP_TYPE_WRITE), readBlockSize, writeBlockSize, - DriveModel.BulkWriteBlockSize(), GetUserAccessibleChunkSize(), GetChunkAppendBlockSize(), owner, ownerRound, + DriveModel.BulkWriteBlockSize(), GetUserAccessibleChunkSize(), GetChunkAppendBlockSize(), owner, ownerRound, GetStatusFlags(OwnerSystem, evYardInit.OwnerGroupType) | ui32(NKikimrBlobStorage::StatusNewOwner), TVector<TChunkIdx>(), nullptr)); GetStartingPoints(result->PDiskParams->Owner, result->StartingPoints); @@ -1615,20 +1615,20 @@ void TPDisk::ConfigureCbs(ui32 ownerId, EGate gate, ui64 weight) { } } -void TPDisk::SchedulerConfigure(const TConfigureScheduler &reqCfg) { +void TPDisk::SchedulerConfigure(const TConfigureScheduler &reqCfg) { // TODO(cthulhu): Check OwnerRound here - const TPDiskSchedulerConfig& cfg = reqCfg.SchedulerCfg; - ui32 ownerId = reqCfg.OwnerId; - ui64 bytesTotalWeight = cfg.LogWeight + cfg.FreshWeight + cfg.CompWeight; - ConfigureCbs(ownerId, GateLog, cfg.LogWeight * cfg.BytesSchedulerWeight); - ConfigureCbs(ownerId, GateFresh, cfg.FreshWeight * cfg.BytesSchedulerWeight); - ConfigureCbs(ownerId, GateComp, cfg.CompWeight * cfg.BytesSchedulerWeight); - ConfigureCbs(ownerId, GateFastRead, cfg.FastReadWeight * bytesTotalWeight); - ConfigureCbs(ownerId, GateOtherRead, cfg.OtherReadWeight * bytesTotalWeight); - ConfigureCbs(ownerId, GateLoad, cfg.LoadWeight * bytesTotalWeight); - ConfigureCbs(ownerId, GateHuge, cfg.HugeWeight * bytesTotalWeight); - ConfigureCbs(ownerId, GateSyncLog, cfg.SyncLogWeight * bytesTotalWeight); - ConfigureCbs(ownerId, GateLow, cfg.LowReadWeight); + const TPDiskSchedulerConfig& cfg = reqCfg.SchedulerCfg; + ui32 ownerId = reqCfg.OwnerId; + ui64 bytesTotalWeight = cfg.LogWeight + cfg.FreshWeight + cfg.CompWeight; + ConfigureCbs(ownerId, GateLog, cfg.LogWeight * cfg.BytesSchedulerWeight); + ConfigureCbs(ownerId, GateFresh, cfg.FreshWeight * cfg.BytesSchedulerWeight); + ConfigureCbs(ownerId, GateComp, cfg.CompWeight * cfg.BytesSchedulerWeight); + ConfigureCbs(ownerId, GateFastRead, cfg.FastReadWeight * bytesTotalWeight); + ConfigureCbs(ownerId, GateOtherRead, cfg.OtherReadWeight * bytesTotalWeight); + ConfigureCbs(ownerId, GateLoad, cfg.LoadWeight * bytesTotalWeight); + ConfigureCbs(ownerId, GateHuge, cfg.HugeWeight * bytesTotalWeight); + ConfigureCbs(ownerId, GateSyncLog, cfg.SyncLogWeight * bytesTotalWeight); + ConfigureCbs(ownerId, GateLow, cfg.LowReadWeight); ForsetiScheduler.UpdateTotalWeight(); } @@ -1644,7 +1644,7 @@ void TPDisk::CheckSpace(TCheckSpace &evCheckSpace) { GetUsedChunks(evCheckSpace.Owner, evCheckSpace.OwnerGroupType), TString())); ActorSystem->Send(evCheckSpace.Sender, result.Release()); - Mon.CheckSpace.CountResponse(); + Mon.CheckSpace.CountResponse(); return; } @@ -1652,96 +1652,96 @@ void TPDisk::CheckSpace(TCheckSpace &evCheckSpace) { // Owner self-destruction //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void TPDisk::ForceDeleteChunk(TChunkIdx chunkIdx) { - TGuard<TMutex> guard(StateMutex); - TChunkState &state = ChunkState[chunkIdx]; - TOwner owner = state.OwnerId; - Y_VERIFY_S(!state.HasAnyOperationsInProgress(), "PDiskId# " << PDiskId << " ForceDeleteChunk, ownerId# " << owner - << " chunkIdx# " << chunkIdx << " has operationsInProgress, state# " << state.ToString()); - - switch (state.CommitState) { - case TChunkState::DATA_ON_QUARANTINE: - LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " chunkIdx# " << chunkIdx << " owned by owner# " << state.OwnerId - << " is released from quarantine and marked as free at ForceDeleteChunk"); - [[fallthrough]]; - case TChunkState::DATA_RESERVED: - [[fallthrough]]; - case TChunkState::DATA_COMMITTED: - state.OwnerId = OwnerUnallocated; - state.CommitState = TChunkState::FREE; - state.Nonce = 0; - state.CurrentNonce = 0; - Keeper.PushFreeOwnerChunk(owner, chunkIdx); - break; - case TChunkState::DATA_COMMITTED_DELETE_IN_PROGRESS: - [[fallthrough]]; - case TChunkState::DATA_RESERVED_DELETE_IN_PROGRESS: - // Chunk will be freed in TPDisk::DeleteChunk() - break; - default: - Y_FAIL_S("PDiskId# " << PDiskId << " ForceDeleteChunk, ownerId# " << owner - << " chunkIdx# " << chunkIdx << " unexpected commitState# " << state.CommitState); - break; - } -} - +void TPDisk::ForceDeleteChunk(TChunkIdx chunkIdx) { + TGuard<TMutex> guard(StateMutex); + TChunkState &state = ChunkState[chunkIdx]; + TOwner owner = state.OwnerId; + Y_VERIFY_S(!state.HasAnyOperationsInProgress(), "PDiskId# " << PDiskId << " ForceDeleteChunk, ownerId# " << owner + << " chunkIdx# " << chunkIdx << " has operationsInProgress, state# " << state.ToString()); + + switch (state.CommitState) { + case TChunkState::DATA_ON_QUARANTINE: + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " chunkIdx# " << chunkIdx << " owned by owner# " << state.OwnerId + << " is released from quarantine and marked as free at ForceDeleteChunk"); + [[fallthrough]]; + case TChunkState::DATA_RESERVED: + [[fallthrough]]; + case TChunkState::DATA_COMMITTED: + state.OwnerId = OwnerUnallocated; + state.CommitState = TChunkState::FREE; + state.Nonce = 0; + state.CurrentNonce = 0; + Keeper.PushFreeOwnerChunk(owner, chunkIdx); + break; + case TChunkState::DATA_COMMITTED_DELETE_IN_PROGRESS: + [[fallthrough]]; + case TChunkState::DATA_RESERVED_DELETE_IN_PROGRESS: + // Chunk will be freed in TPDisk::DeleteChunk() + break; + default: + Y_FAIL_S("PDiskId# " << PDiskId << " ForceDeleteChunk, ownerId# " << owner + << " chunkIdx# " << chunkIdx << " unexpected commitState# " << state.CommitState); + break; + } +} + void TPDisk::KillOwner(TOwner owner, TOwnerRound killOwnerRound, TCompletionEventSender *completionAction) { Y_UNUSED(killOwnerRound); { TGuard<TMutex> guard(StateMutex); - bool pushedOwnerIntoQuarantine = false; + bool pushedOwnerIntoQuarantine = false; for (ui32 i = 0; i < ChunkState.size(); ++i) { - TChunkState &state = ChunkState[i]; - if (state.OwnerId == owner) { - if (state.CommitState == TChunkState::DATA_RESERVED) { - Mon.UncommitedDataChunks->Dec(); - } else if (state.CommitState == TChunkState::DATA_COMMITTED) { - Mon.CommitedDataChunks->Dec(); - LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 + TChunkState &state = ChunkState[i]; + if (state.OwnerId == owner) { + if (state.CommitState == TChunkState::DATA_RESERVED) { + Mon.UncommitedDataChunks->Dec(); + } else if (state.CommitState == TChunkState::DATA_COMMITTED) { + Mon.CommitedDataChunks->Dec(); + LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " Line# %" PRIu32 " --CommitedDataChunks# %" PRIi64 " chunkIdx# %" PRIu32 " Marker# BPD84", - (ui32)PDiskId, (ui32)__LINE__, (i64)Mon.CommitedDataChunks->Val(), (ui32)i); - } - if (state.HasAnyOperationsInProgress()) { - if (state.CommitState == TChunkState::DATA_RESERVED - || state.CommitState == TChunkState::DATA_COMMITTED) { - state.CommitState = TChunkState::DATA_ON_QUARANTINE; - } - QuarantineChunks.push_back(i); - - if (!pushedOwnerIntoQuarantine) { - pushedOwnerIntoQuarantine = true; - QuarantineOwners.push_back(owner); - LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " push owner# " << owner << " into quarantine"); - } + (ui32)PDiskId, (ui32)__LINE__, (i64)Mon.CommitedDataChunks->Val(), (ui32)i); + } + if (state.HasAnyOperationsInProgress()) { + if (state.CommitState == TChunkState::DATA_RESERVED + || state.CommitState == TChunkState::DATA_COMMITTED) { + state.CommitState = TChunkState::DATA_ON_QUARANTINE; + } + QuarantineChunks.push_back(i); + + if (!pushedOwnerIntoQuarantine) { + pushedOwnerIntoQuarantine = true; + QuarantineOwners.push_back(owner); + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " push owner# " << owner << " into quarantine"); + } } else { - ForceDeleteChunk(i); + ForceDeleteChunk(i); } } } - if (!pushedOwnerIntoQuarantine) { - Keeper.RemoveOwner(owner); - LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " removed owner# " << owner << " from chunks Keeper"); - } - + if (!pushedOwnerIntoQuarantine) { + Keeper.RemoveOwner(owner); + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " removed owner# " << owner << " from chunks Keeper"); + } + TryTrimChunk(false, 0); ui64 lastSeenLsn = 0; - auto it = LogChunks.begin(); + auto it = LogChunks.begin(); while (it != LogChunks.end()) { if (it->OwnerLsnRange.size() > owner && it->OwnerLsnRange[owner].IsPresent) { Y_VERIFY(it->CurrentUserCount > 0); it->CurrentUserCount--; it->OwnerLsnRange[owner].IsPresent = false; it->OwnerLsnRange[owner].FirstLsn = 0; - lastSeenLsn = Max(it->OwnerLsnRange[owner].LastLsn, lastSeenLsn); + lastSeenLsn = Max(it->OwnerLsnRange[owner].LastLsn, lastSeenLsn); it->OwnerLsnRange[owner].LastLsn = 0; } ++it; } - ReleaseUnusedLogChunks(completionAction); + ReleaseUnusedLogChunks(completionAction); TVDiskID vDiskId = SysLogRecord.OwnerVDisks[owner]; vDiskId.GroupGeneration = -1; // Since it might be non-zero. @@ -1752,13 +1752,13 @@ void TPDisk::KillOwner(TOwner owner, TOwnerRound killOwnerRound, TCompletionEven AtomicDecrement(TotalOwners); TOwnerRound ownerRound = OwnerData[owner].OwnerRound; - OwnerData[owner] = TOwnerData{}; + OwnerData[owner] = TOwnerData{}; OwnerData[owner].OwnerRound = ownerRound; VDiskOwners.erase(vDiskId); - - LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " KillOwner, ownerId# " << owner << " ownerRound# " << ownerRound - << " VDiskId# " << vDiskId.ToStringWOGeneration() << " lastSeenLsn# " << lastSeenLsn << " Marker# BPD12"); + + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " KillOwner, ownerId# " << owner << " ownerRound# " << ownerRound + << " VDiskId# " << vDiskId.ToStringWOGeneration() << " lastSeenLsn# " << lastSeenLsn << " Marker# BPD12"); } WriteSysLogRestorePoint(completionAction, TReqId(TReqId::KillOwnerSysLog, 0), {}); @@ -1769,7 +1769,7 @@ void TPDisk::Harakiri(THarakiri &evHarakiri) { new TCompletionEventSender(this, evHarakiri.Sender, new NPDisk::TEvHarakiriResult(NKikimrProto::OK, GetStatusFlags(evHarakiri.Owner, evHarakiri.OwnerGroupType), TString()), - Mon.Harakiri.Results)); + Mon.Harakiri.Results)); } void TPDisk::Slay(TSlay &evSlay) { @@ -1777,10 +1777,10 @@ void TPDisk::Slay(TSlay &evSlay) { TGuard<TMutex> guard(StateMutex); TVDiskID vDiskId = evSlay.VDiskId; vDiskId.GroupGeneration = -1; - auto it = VDiskOwners.find(vDiskId); + auto it = VDiskOwners.find(vDiskId); if (it == VDiskOwners.end()) { TStringStream str; - str << "PDiskId# " << (ui32)PDiskId << " Can't slay VDiskId# " << evSlay.VDiskId; + str << "PDiskId# " << (ui32)PDiskId << " Can't slay VDiskId# " << evSlay.VDiskId; str << " as it is not created yet or is already slain" << " Marker# BPD31"; LOG_ERROR(*ActorSystem, NKikimrServices::BS_PDISK, "%s", str.Str().c_str()); @@ -1796,7 +1796,7 @@ void TPDisk::Slay(TSlay &evSlay) { TOwnerRound ownerRound = OwnerData[owner].OwnerRound; if (evSlay.SlayOwnerRound <= ownerRound) { TStringStream str; - str << "PDiskId# " << (ui32)PDiskId << " Can't slay VDiskId# " << evSlay.VDiskId; + str << "PDiskId# " << (ui32)PDiskId << " Can't slay VDiskId# " << evSlay.VDiskId; str << " as SlayOwnerRound# " << evSlay.SlayOwnerRound << " <= ownerRound# " << ownerRound << " Marker# BPD32"; LOG_ERROR(*ActorSystem, NKikimrServices::BS_PDISK, "%s", str.Str().c_str()); @@ -1832,14 +1832,14 @@ void TPDisk::ProcessChunkWriteQueue() { { TChunkWritePiece *piece = static_cast<TChunkWritePiece*>(req); if (ChunkWritePiece(piece->ChunkWrite.Get(), piece->PieceShift, piece->PieceSize)) { - Mon.IncrementQueueTime(piece->ChunkWrite->PriorityClass, - piece->ChunkWrite->LifeDurationMs(now)); + Mon.IncrementQueueTime(piece->ChunkWrite->PriorityClass, + piece->ChunkWrite->LifeDurationMs(now)); } delete piece; break; } default: - Y_FAIL_S("Unexpected request type# " << ui64(req->GetType()) << " in JointChunkWrites"); + Y_FAIL_S("Unexpected request type# " << ui64(req->GetType()) << " in JointChunkWrites"); } } JointChunkWrites.clear(); @@ -1851,16 +1851,16 @@ void TPDisk::ProcessChunkReadQueue() { } NHPTimer::STime now = HPNow(); - // Size (bytes) of elementary sectors block, it is useless to read/write less than that blockSize + // Size (bytes) of elementary sectors block, it is useless to read/write less than that blockSize ui64 bufferSize = BufferPool->GetBufferSize() / Format.SectorSize * Format.SectorSize; - - for (auto& req : JointChunkReads) { + + for (auto& req : JointChunkReads) { switch (req->GetType()) { case ERequestType::RequestChunkReadPiece: { - TChunkReadPiece *piece = static_cast<TChunkReadPiece*>(req.Get()); - Y_VERIFY(!piece->SelfPointer); + TChunkReadPiece *piece = static_cast<TChunkReadPiece*>(req.Get()); + Y_VERIFY(!piece->SelfPointer); TIntrusivePtr<TChunkRead> &read = piece->ChunkRead; TReqId reqId = read->ReqId; ui32 chunkIdx = read->ChunkIdx; @@ -1876,17 +1876,17 @@ void TPDisk::ProcessChunkReadQueue() { ui32 size = 0; while (!isComplete && size < piece->PieceSizeLimit) { ui64 currentLimit = Min(bufferSize, piece->PieceSizeLimit - size); - ui64 reallyReadDiskBytes; - EChunkReadPieceResult result = ChunkReadPiece(read, piece->PieceCurrentSector + size / Format.SectorSize, - currentLimit, &reallyReadDiskBytes); + ui64 reallyReadDiskBytes; + EChunkReadPieceResult result = ChunkReadPiece(read, piece->PieceCurrentSector + size / Format.SectorSize, + currentLimit, &reallyReadDiskBytes); isComplete = (result != ReadPieceResultInProgress); - // Read pieces is sliced previously and it is expected that ChunkReadPiece will read exactly - // currentLimit bytes - Y_VERIFY_S(reallyReadDiskBytes == currentLimit, reallyReadDiskBytes << " != " << currentLimit); + // Read pieces is sliced previously and it is expected that ChunkReadPiece will read exactly + // currentLimit bytes + Y_VERIFY_S(reallyReadDiskBytes == currentLimit, reallyReadDiskBytes << " != " << currentLimit); size += currentLimit; } } - piece->OnSuccessfulDestroy(ActorSystem); + piece->OnSuccessfulDestroy(ActorSystem); if (isComplete) { // // WARNING: Don't access "read" after this point. @@ -1901,27 +1901,27 @@ void TPDisk::ProcessChunkReadQueue() { break; } default: - Y_FAIL_S("Unexpected request type# " << ui64(req->GetType()) << " in JointChunkReads"); + Y_FAIL_S("Unexpected request type# " << ui64(req->GetType()) << " in JointChunkReads"); } } JointChunkReads.clear(); } -void TPDisk::TrimAllUntrimmedChunks() { - TGuard<TMutex> g(StateMutex); - if (!DriveModel.IsTrimSupported()) { - return; - } - +void TPDisk::TrimAllUntrimmedChunks() { + TGuard<TMutex> g(StateMutex); + if (!DriveModel.IsTrimSupported()) { + return; + } + while (ui32 idx = Keeper.PopUntrimmedFreeChunk()) { - BlockDevice->TrimSync(Format.ChunkSize, idx * Format.ChunkSize); - Y_VERIFY_S(ChunkState[idx].OwnerId == OwnerUnallocated || ChunkState[idx].OwnerId == OwnerUnallocatedTrimmed, - "PDiskId# " << PDiskId << " Unexpected ownerId# " << ui32(ChunkState[idx].OwnerId)); - ChunkState[idx].OwnerId = OwnerUnallocatedTrimmed; + BlockDevice->TrimSync(Format.ChunkSize, idx * Format.ChunkSize); + Y_VERIFY_S(ChunkState[idx].OwnerId == OwnerUnallocated || ChunkState[idx].OwnerId == OwnerUnallocatedTrimmed, + "PDiskId# " << PDiskId << " Unexpected ownerId# " << ui32(ChunkState[idx].OwnerId)); + ChunkState[idx].OwnerId = OwnerUnallocatedTrimmed; Keeper.PushTrimmedFreeChunk(idx); - } -} - + } +} + void TPDisk::ProcessChunkTrimQueue() { Y_VERIFY(JointChunkTrims.size() <= 1); for (auto it = JointChunkTrims.begin(); it != JointChunkTrims.end(); ++it) { @@ -1933,46 +1933,46 @@ void TPDisk::ProcessChunkTrimQueue() { // If deadline occurs, than we want to maximize throughput, so trim entire chunk trimSize = Format.ChunkSize - trim->Offset; } - auto completion = MakeHolder<TChunkTrimCompletion>(this, trim->CreationTime, trimSize, trim->ReqId); + auto completion = MakeHolder<TChunkTrimCompletion>(this, trim->CreationTime, trimSize, trim->ReqId); completion->CostNs = DriveModel.TrimTimeForSizeNs(trimSize); - BlockDevice->TrimAsync(trimSize, offset, completion.Release(), trim->ReqId); + BlockDevice->TrimAsync(trimSize, offset, completion.Release(), trim->ReqId); delete trim; } JointChunkTrims.clear(); } -void TPDisk::ClearQuarantineChunks() { - if (QuarantineChunks.empty() && QuarantineOwners.empty()) { - return; - } - - TGuard<TMutex> guard(StateMutex); - { - const auto it = std::partition(QuarantineChunks.begin(), QuarantineChunks.end(), [&] (TChunkIdx i) { - return ChunkState[i].HasAnyOperationsInProgress() - || ChunkState[i].CommitState == TChunkState::DATA_COMMITTED_ON_QUARANTINE; - }); - for (auto delIt = it; delIt != QuarantineChunks.end(); ++delIt) { - ForceDeleteChunk(*delIt); - } - QuarantineChunks.erase(it, QuarantineChunks.end()); - *Mon.QuarantineChunks = QuarantineChunks.size(); - } - - { - const auto it = std::partition(QuarantineOwners.begin(), QuarantineOwners.end(), [&] (TOwner i) { - return Keeper.GetOwnerUsed(i); - }); - for (auto delIt = it; delIt != QuarantineOwners.end(); ++delIt) { - Keeper.RemoveOwner(*delIt); - LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " removed owner# " << *delIt << " from chunks Keeper through QuarantineOwners"); - } - QuarantineOwners.erase(it, QuarantineOwners.end()); - *Mon.QuarantineOwners = QuarantineOwners.size(); - } -} - +void TPDisk::ClearQuarantineChunks() { + if (QuarantineChunks.empty() && QuarantineOwners.empty()) { + return; + } + + TGuard<TMutex> guard(StateMutex); + { + const auto it = std::partition(QuarantineChunks.begin(), QuarantineChunks.end(), [&] (TChunkIdx i) { + return ChunkState[i].HasAnyOperationsInProgress() + || ChunkState[i].CommitState == TChunkState::DATA_COMMITTED_ON_QUARANTINE; + }); + for (auto delIt = it; delIt != QuarantineChunks.end(); ++delIt) { + ForceDeleteChunk(*delIt); + } + QuarantineChunks.erase(it, QuarantineChunks.end()); + *Mon.QuarantineChunks = QuarantineChunks.size(); + } + + { + const auto it = std::partition(QuarantineOwners.begin(), QuarantineOwners.end(), [&] (TOwner i) { + return Keeper.GetOwnerUsed(i); + }); + for (auto delIt = it; delIt != QuarantineOwners.end(); ++delIt) { + Keeper.RemoveOwner(*delIt); + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " removed owner# " << *delIt << " from chunks Keeper through QuarantineOwners"); + } + QuarantineOwners.erase(it, QuarantineOwners.end()); + *Mon.QuarantineOwners = QuarantineOwners.size(); + } +} + // Should be called to initiate TRIM (on chunk delete or prev trim done) void TPDisk::TryTrimChunk(bool prevDone, ui64 trimmedSize) { TGuard<TMutex> g(StateMutex); @@ -1990,22 +1990,22 @@ void TPDisk::TryTrimChunk(bool prevDone, ui64 trimmedSize) { if (!ChunkBeingTrimmed) { // There was no previous TRIMs ChunkBeingTrimmed = Keeper.PopUntrimmedFreeChunk(); if (ChunkBeingTrimmed) { - Y_VERIFY_S(ChunkState[ChunkBeingTrimmed].OwnerId == OwnerUnallocated - || ChunkState[ChunkBeingTrimmed].OwnerId == OwnerUnallocatedTrimmed, "PDiskId# " << PDiskId - << " Unexpected ownerId# " << ui32(ChunkState[ChunkBeingTrimmed].OwnerId)); + Y_VERIFY_S(ChunkState[ChunkBeingTrimmed].OwnerId == OwnerUnallocated + || ChunkState[ChunkBeingTrimmed].OwnerId == OwnerUnallocatedTrimmed, "PDiskId# " << PDiskId + << " Unexpected ownerId# " << ui32(ChunkState[ChunkBeingTrimmed].OwnerId)); } TrimOffset = 0; } else if (TrimOffset >= Format.ChunkSize) { // Previous chunk entirely trimmed - Y_VERIFY_S(ChunkState[ChunkBeingTrimmed].OwnerId == OwnerUnallocated - || ChunkState[ChunkBeingTrimmed].OwnerId == OwnerUnallocatedTrimmed, "PDiskId# " << PDiskId - << " Unexpected ownerId# " << ui32(ChunkState[ChunkBeingTrimmed].OwnerId)); - ChunkState[ChunkBeingTrimmed].OwnerId = OwnerUnallocatedTrimmed; + Y_VERIFY_S(ChunkState[ChunkBeingTrimmed].OwnerId == OwnerUnallocated + || ChunkState[ChunkBeingTrimmed].OwnerId == OwnerUnallocatedTrimmed, "PDiskId# " << PDiskId + << " Unexpected ownerId# " << ui32(ChunkState[ChunkBeingTrimmed].OwnerId)); + ChunkState[ChunkBeingTrimmed].OwnerId = OwnerUnallocatedTrimmed; Keeper.PushTrimmedFreeChunk(ChunkBeingTrimmed); ChunkBeingTrimmed = Keeper.PopUntrimmedFreeChunk(); if (ChunkBeingTrimmed) { - Y_VERIFY_S(ChunkState[ChunkBeingTrimmed].OwnerId == OwnerUnallocated - || ChunkState[ChunkBeingTrimmed].OwnerId == OwnerUnallocatedTrimmed, "PDiskId# " << PDiskId - << " Unexpected ownerId# " << ui32(ChunkState[ChunkBeingTrimmed].OwnerId)); + Y_VERIFY_S(ChunkState[ChunkBeingTrimmed].OwnerId == OwnerUnallocated + || ChunkState[ChunkBeingTrimmed].OwnerId == OwnerUnallocatedTrimmed, "PDiskId# " << PDiskId + << " Unexpected ownerId# " << ui32(ChunkState[ChunkBeingTrimmed].OwnerId)); } TrimOffset = 0; } @@ -2013,66 +2013,66 @@ void TPDisk::TryTrimChunk(bool prevDone, ui64 trimmedSize) { if (ChunkBeingTrimmed) { // Initiate trim of next part of chunk const ui64 trimStep = (Keeper.GetTrimmedFreeChunkCount() > 100 ? 2 << 20 : 32 << 20); ui64 trimSize = Min<ui64>(Format.ChunkSize - TrimOffset, trimStep); - TChunkTrim* trim = ReqCreator.CreateChunkTrim(ChunkBeingTrimmed, TrimOffset, trimSize); + TChunkTrim* trim = ReqCreator.CreateChunkTrim(ChunkBeingTrimmed, TrimOffset, trimSize); InputRequest(trim); TrimInFly = true; } } void TPDisk::ProcessFastOperationsQueue() { - for (auto& req : FastOperationsQueue) { - switch (req->GetType()) { - case ERequestType::RequestYardInit: { + for (auto& req : FastOperationsQueue) { + switch (req->GetType()) { + case ERequestType::RequestYardInit: { std::unique_ptr<TYardInit> init{static_cast<TYardInit*>(req.release())}; - if (YardInitStart(*init)) { - PendingYardInits.emplace(std::move(init)); - } + if (YardInitStart(*init)) { + PendingYardInits.emplace(std::move(init)); + } break; - } + } case ERequestType::RequestCheckSpace: - CheckSpace(static_cast<TCheckSpace&>(*req)); + CheckSpace(static_cast<TCheckSpace&>(*req)); break; case ERequestType::RequestHarakiri: - Harakiri(static_cast<THarakiri&>(*req)); + Harakiri(static_cast<THarakiri&>(*req)); break; - case ERequestType::RequestYardSlay: - Slay(static_cast<TSlay&>(*req)); + case ERequestType::RequestYardSlay: + Slay(static_cast<TSlay&>(*req)); break; case ERequestType::RequestChunkReserve: - ChunkReserve(static_cast<TChunkReserve&>(*req)); + ChunkReserve(static_cast<TChunkReserve&>(*req)); + break; + case ERequestType::RequestChunksLock: + ChunksLock(static_cast<TChunksLock&>(*req)); + break; + case ERequestType::RequestChunksUnlock: + ChunksUnlock(static_cast<TChunksUnlock&>(*req)); break; - case ERequestType::RequestChunksLock: - ChunksLock(static_cast<TChunksLock&>(*req)); - break; - case ERequestType::RequestChunksUnlock: - ChunksUnlock(static_cast<TChunksUnlock&>(*req)); - break; case ERequestType::RequestAskForCutLog: - SendCutLog(static_cast<TAskForCutLog&>(*req)); + SendCutLog(static_cast<TAskForCutLog&>(*req)); break; case ERequestType::RequestConfigureScheduler: - SchedulerConfigure(static_cast<TConfigureScheduler&>(*req)); + SchedulerConfigure(static_cast<TConfigureScheduler&>(*req)); + break; + case ERequestType::RequestWhiteboartReport: + WhiteboardReport(static_cast<TWhiteboardReport&>(*req)); + break; + case ERequestType::RequestHttpInfo: + HttpInfo(static_cast<THttpInfo&>(*req)); + break; + case ERequestType::RequestUndelivered: + EventUndelivered(static_cast<TUndelivered&>(*req)); break; - case ERequestType::RequestWhiteboartReport: - WhiteboardReport(static_cast<TWhiteboardReport&>(*req)); - break; - case ERequestType::RequestHttpInfo: - HttpInfo(static_cast<THttpInfo&>(*req)); - break; - case ERequestType::RequestUndelivered: - EventUndelivered(static_cast<TUndelivered&>(*req)); - break; case ERequestType::RequestCommitLogChunks: - CommitLogChunks(static_cast<TCommitLogChunks&>(*req)); + CommitLogChunks(static_cast<TCommitLogChunks&>(*req)); break; case ERequestType::RequestLogCommitDone: - OnLogCommitDone(static_cast<TLogCommitDone&>(*req)); + OnLogCommitDone(static_cast<TLogCommitDone&>(*req)); break; case ERequestType::RequestTryTrimChunk: - TryTrimChunk(true, static_cast<TTryTrimChunk&>(*req).TrimSize); + TryTrimChunk(true, static_cast<TTryTrimChunk&>(*req).TrimSize); break; case ERequestType::RequestReleaseChunks: - MarkChunksAsReleased(static_cast<TReleaseChunks&>(*req)); + MarkChunksAsReleased(static_cast<TReleaseChunks&>(*req)); break; default: Y_FAIL(); @@ -2087,14 +2087,14 @@ void TPDisk::ProcessFastOperationsQueue() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void TPDisk::OnDriveStartup() { - if (Cfg->UseSpdkNvmeDriver) { - return; - } + if (Cfg->UseSpdkNvmeDriver) { + return; + } TStringStream str; str << " OnDriveStartup "; - - DriveData = BlockDevice->GetDriveData(); - + + DriveData = BlockDevice->GetDriveData(); + bool isChanged = false; switch (Cfg->WriteCacheSwitch) { case NKikimrBlobStorage::TPDiskConfig::Enable: @@ -2130,7 +2130,7 @@ void TPDisk::OnDriveStartup() { if (isChanged) { if (Cfg->GetDriveDataSwitch == NKikimrBlobStorage::TPDiskConfig::Enable || Cfg->GetDriveDataSwitch == NKikimrBlobStorage::TPDiskConfig::ForceEnable) { - DriveData = BlockDevice->GetDriveData(); + DriveData = BlockDevice->GetDriveData(); } } @@ -2148,63 +2148,63 @@ void TPDisk::OnDriveStartup() { // Internal interface //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool TPDisk::Initialize(TActorSystem *actorSystem, const TActorId &pDiskActor) { -#define REGISTER_LOCAL_CONTROL(control) \ - actorSystem->AppData<TAppData>()->Icb->RegisterLocalControl(control, \ - TStringBuilder() << "PDisk_" << PDiskId << "_" << #control) - - PDiskActor = pDiskActor; - if (!IsStarted) { - if (actorSystem && actorSystem->AppData<TAppData>() && actorSystem->AppData<TAppData>()->Icb) { - REGISTER_LOCAL_CONTROL(SlowdownAddLatencyNs); - REGISTER_LOCAL_CONTROL(EnableForsetiBinLog); - REGISTER_LOCAL_CONTROL(ForsetiMinLogCostNsControl); - REGISTER_LOCAL_CONTROL(ForsetiMilliBatchSize); - REGISTER_LOCAL_CONTROL(ForsetiMaxLogBatchNs); - REGISTER_LOCAL_CONTROL(ForsetiOpPieceSizeSsd); - REGISTER_LOCAL_CONTROL(ForsetiOpPieceSizeRot); - REGISTER_LOCAL_CONTROL(Cfg->UseT1ha0HashInFooter); - } - Y_VERIFY(BlockDevice); - BlockDevice->Initialize(actorSystem, PDiskActor); - ActorSystem = actorSystem; - ReqCreator.SetActorSystem(actorSystem); - IsStarted = true; - - BufferPool = THolder<TBufferPool>(CreateBufferPool(Cfg->BufferPoolBufferSizeBytes, Cfg->BufferPoolBufferCount, - UseHugePages, {Mon.DeviceBufferPoolFailedAllocations, ActorSystem, PDiskId})); - - if (ActorSystem) { - LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, - "PDiskId# %" PRIu32 " Initialize Cfg# %s DriveModel# %s", - (ui32)PDiskId, Cfg->ToString().data(), DriveModel.ToString().data()); - } - } - - if (PDiskThread.Running()) { +bool TPDisk::Initialize(TActorSystem *actorSystem, const TActorId &pDiskActor) { +#define REGISTER_LOCAL_CONTROL(control) \ + actorSystem->AppData<TAppData>()->Icb->RegisterLocalControl(control, \ + TStringBuilder() << "PDisk_" << PDiskId << "_" << #control) + + PDiskActor = pDiskActor; + if (!IsStarted) { + if (actorSystem && actorSystem->AppData<TAppData>() && actorSystem->AppData<TAppData>()->Icb) { + REGISTER_LOCAL_CONTROL(SlowdownAddLatencyNs); + REGISTER_LOCAL_CONTROL(EnableForsetiBinLog); + REGISTER_LOCAL_CONTROL(ForsetiMinLogCostNsControl); + REGISTER_LOCAL_CONTROL(ForsetiMilliBatchSize); + REGISTER_LOCAL_CONTROL(ForsetiMaxLogBatchNs); + REGISTER_LOCAL_CONTROL(ForsetiOpPieceSizeSsd); + REGISTER_LOCAL_CONTROL(ForsetiOpPieceSizeRot); + REGISTER_LOCAL_CONTROL(Cfg->UseT1ha0HashInFooter); + } + Y_VERIFY(BlockDevice); + BlockDevice->Initialize(actorSystem, PDiskActor); + ActorSystem = actorSystem; + ReqCreator.SetActorSystem(actorSystem); + IsStarted = true; + + BufferPool = THolder<TBufferPool>(CreateBufferPool(Cfg->BufferPoolBufferSizeBytes, Cfg->BufferPoolBufferCount, + UseHugePages, {Mon.DeviceBufferPoolFailedAllocations, ActorSystem, PDiskId})); + + if (ActorSystem) { + LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, + "PDiskId# %" PRIu32 " Initialize Cfg# %s DriveModel# %s", + (ui32)PDiskId, Cfg->ToString().data(), DriveModel.ToString().data()); + } + } + + if (PDiskThread.Running()) { return true; } - PDiskThread.Start(); - + PDiskThread.Start(); + if (!BlockDevice->IsGood()) { *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::OpenFileError; - *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - TStringStream errStr; - errStr << "Can't open file " << Cfg->GetDevicePath().Quote() << ": "; - if (!Cfg->GetDevicePath() && Cfg->ExpectedSerial) { - errStr << "no device with such serial was found"; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorNoDeviceWithSuchSerial; - } else if (BlockDevice->GetLastErrno() == ENOENT) { - errStr << "no such file."; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorOpenNonexistentFile; - } else if (BlockDevice->GetLastErrno() == EACCES) { - errStr << "not enough rights."; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorOpenFileWithoutPermissions; - } else { - errStr << "unknown reason, errno# " << BlockDevice->GetLastErrno() << "."; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorOpenFileUnknown; - } - ErrorStr = errStr.Str(); + *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + TStringStream errStr; + errStr << "Can't open file " << Cfg->GetDevicePath().Quote() << ": "; + if (!Cfg->GetDevicePath() && Cfg->ExpectedSerial) { + errStr << "no device with such serial was found"; + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorNoDeviceWithSuchSerial; + } else if (BlockDevice->GetLastErrno() == ENOENT) { + errStr << "no such file."; + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorOpenNonexistentFile; + } else if (BlockDevice->GetLastErrno() == EACCES) { + errStr << "not enough rights."; + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorOpenFileWithoutPermissions; + } else { + errStr << "unknown reason, errno# " << BlockDevice->GetLastErrno() << "."; + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorOpenFileUnknown; + } + ErrorStr = errStr.Str(); if (ActorSystem) { LOG_CRIT_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId << " BlockDevice initialization error! " << errStr.Str() @@ -2215,29 +2215,29 @@ bool TPDisk::Initialize(TActorSystem *actorSystem, const TActorId &pDiskActor) { OnDriveStartup(); - if (!Cfg->CheckSerial(DriveData.SerialNumber)) { - BlockDevice->Stop(); - - TStringStream str; - str << "serial number mismatch, expected# " << Cfg->ExpectedSerial.Quote() - << " but got# " << DriveData.SerialNumber.Quote(); - if (ActorSystem) { - LOG_WARN_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId << " " << str.Str()); - } - ErrorStr = str.Str(); - + if (!Cfg->CheckSerial(DriveData.SerialNumber)) { + BlockDevice->Stop(); + + TStringStream str; + str << "serial number mismatch, expected# " << Cfg->ExpectedSerial.Quote() + << " but got# " << DriveData.SerialNumber.Quote(); + if (ActorSystem) { + LOG_WARN_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId << " " << str.Str()); + } + ErrorStr = str.Str(); + *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::OpenFileError; - *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorDeviceSerialMismatch; - - *Mon.SerialNumberMismatched = 1; - return false; - } else { - *Mon.SerialNumberMismatched = 0; - } - - return true; -#undef REGISTER_LOCAL_CONTROL + *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorDeviceSerialMismatch; + + *Mon.SerialNumberMismatched = 1; + return false; + } else { + *Mon.SerialNumberMismatched = 0; + } + + return true; +#undef REGISTER_LOCAL_CONTROL } //////////////////////////////////// Forseti scheduler Gates /////////////////////////////////////////////////////////// @@ -2271,269 +2271,269 @@ bool TPDisk::Initialize(TActorSystem *actorSystem, const TActorId &pDiskActor) { // FastOperation //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -NKikimrProto::EReplyStatus TPDisk::ValidateRequest(TLogWrite *logWrite, TStringStream& outErr) { - if (logWrite->Signature.HasCommitRecord() && logWrite->CommitRecord.FirstLsnToKeep > logWrite->Lsn) { - outErr << " invalid commit record, CommitRecord.FirstLsnToKeep# " << logWrite->CommitRecord.FirstLsnToKeep - << " > Lsn# " << logWrite->Lsn << " Marker# BPD73"; - return NKikimrProto::ERROR; - } - if (logWrite->Result && logWrite->Result->Status != NKikimrProto::OK) { - return NKikimrProto::ERROR; - } - - return NKikimrProto::OK; -} - -void TPDisk::PrepareLogError(TLogWrite *logWrite, TStringStream& err, NKikimrProto::EReplyStatus status) { - Y_VERIFY_DEBUG(status != NKikimrProto::OK); - if (logWrite->Result && logWrite->Result->Status != NKikimrProto::OK) { - return; - } - - err << " error in TLogWrite for owner# " << logWrite->Owner << " ownerRound# " << logWrite->OwnerRound - << " lsn# " << logWrite->Lsn; - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, err.Str()); - +NKikimrProto::EReplyStatus TPDisk::ValidateRequest(TLogWrite *logWrite, TStringStream& outErr) { + if (logWrite->Signature.HasCommitRecord() && logWrite->CommitRecord.FirstLsnToKeep > logWrite->Lsn) { + outErr << " invalid commit record, CommitRecord.FirstLsnToKeep# " << logWrite->CommitRecord.FirstLsnToKeep + << " > Lsn# " << logWrite->Lsn << " Marker# BPD73"; + return NKikimrProto::ERROR; + } + if (logWrite->Result && logWrite->Result->Status != NKikimrProto::OK) { + return NKikimrProto::ERROR; + } + + return NKikimrProto::OK; +} + +void TPDisk::PrepareLogError(TLogWrite *logWrite, TStringStream& err, NKikimrProto::EReplyStatus status) { + Y_VERIFY_DEBUG(status != NKikimrProto::OK); + if (logWrite->Result && logWrite->Result->Status != NKikimrProto::OK) { + return; + } + + err << " error in TLogWrite for owner# " << logWrite->Owner << " ownerRound# " << logWrite->OwnerRound + << " lsn# " << logWrite->Lsn; + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, err.Str()); + logWrite->Result.Reset(new NPDisk::TEvLogResult(status, GetStatusFlags(logWrite->Owner, logWrite->OwnerGroupType), err.Str())); - logWrite->Result->Results.push_back(NPDisk::TEvLogResult::TRecord(logWrite->Lsn, logWrite->Cookie)); - WILSON_TRACE(*ActorSystem, &logWrite->TraceId, EnqueueLogWrite); -} - -NKikimrProto::EReplyStatus TPDisk::CheckOwnerAndRound(TRequestBase* req, TStringStream& err) { - const auto& ownerData = OwnerData[req->Owner]; - - if (!IsOwnerUser(req->Owner)) { - err << " ownerId# " << req->Owner << " < Begin# " << (ui32)OwnerBeginUser - << " or >= End# " << (ui32)OwnerEndUser << " Marker# BPD72"; - return NKikimrProto::ERROR; - } else if (ownerData.VDiskId == TVDiskID::InvalidId) { - err << " request from unregistered ownerId# " << req->Owner; - return NKikimrProto::INVALID_OWNER; - } else if (ownerData.OwnerRound != req->OwnerRound) { - err << " ownerId# " << req->Owner - << " invalid OwnerRound, got# " << req->OwnerRound - << " expected# " << ownerData.OwnerRound; - return NKikimrProto::INVALID_ROUND; - } - - return NKikimrProto::OK; -} - -// Checks request validity, responds to and deletes non-valid requests -// Fills some important fields in request + logWrite->Result->Results.push_back(NPDisk::TEvLogResult::TRecord(logWrite->Lsn, logWrite->Cookie)); + WILSON_TRACE(*ActorSystem, &logWrite->TraceId, EnqueueLogWrite); +} + +NKikimrProto::EReplyStatus TPDisk::CheckOwnerAndRound(TRequestBase* req, TStringStream& err) { + const auto& ownerData = OwnerData[req->Owner]; + + if (!IsOwnerUser(req->Owner)) { + err << " ownerId# " << req->Owner << " < Begin# " << (ui32)OwnerBeginUser + << " or >= End# " << (ui32)OwnerEndUser << " Marker# BPD72"; + return NKikimrProto::ERROR; + } else if (ownerData.VDiskId == TVDiskID::InvalidId) { + err << " request from unregistered ownerId# " << req->Owner; + return NKikimrProto::INVALID_OWNER; + } else if (ownerData.OwnerRound != req->OwnerRound) { + err << " ownerId# " << req->Owner + << " invalid OwnerRound, got# " << req->OwnerRound + << " expected# " << ownerData.OwnerRound; + return NKikimrProto::INVALID_ROUND; + } + + return NKikimrProto::OK; +} + +// Checks request validity, responds to and deletes non-valid requests +// Fills some important fields in request // Must be called under StateMutex -// Returns is request valid and should be processed further -bool TPDisk::PreprocessRequest(TRequestBase *request) { - TStringStream err; - err << "PDiskId# " << PDiskId << " "; - - // Advisory check, further code may ignore results - NKikimrProto::EReplyStatus errStatus = CheckOwnerAndRound(request, err); - - LOG_TRACE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PreprocessRequest " << TypeName(*request) - << " from owner# " << request->Owner << " round# " << request->OwnerRound - << " errStatus# " << errStatus); - +// Returns is request valid and should be processed further +bool TPDisk::PreprocessRequest(TRequestBase *request) { + TStringStream err; + err << "PDiskId# " << PDiskId << " "; + + // Advisory check, further code may ignore results + NKikimrProto::EReplyStatus errStatus = CheckOwnerAndRound(request, err); + + LOG_TRACE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PreprocessRequest " << TypeName(*request) + << " from owner# " << request->Owner << " round# " << request->OwnerRound + << " errStatus# " << errStatus); + switch (request->GetType()) { case ERequestType::RequestLogRead: { TLogRead &evLog = *static_cast<TLogRead*>(request); TOwnerData &ownerData = OwnerData[evLog.Owner]; - if (ownerData.HasReadTheWholeLog) { - err << "Can't read log for ownerId# " << evLog.Owner - << " ownerRound# " << evLog.OwnerRound << ", owner has already read the log!"; - errStatus = NKikimrProto::ERROR; // -> NOT OK + if (ownerData.HasReadTheWholeLog) { + err << "Can't read log for ownerId# " << evLog.Owner + << " ownerRound# " << evLog.OwnerRound << ", owner has already read the log!"; + errStatus = NKikimrProto::ERROR; // -> NOT OK + } + + if (errStatus != NKikimrProto::OK) { + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, err.Str()); + auto result = MakeHolder<NPDisk::TEvReadLogResult>(errStatus, evLog.Position, evLog.Position, true, + GetStatusFlags(evLog.Owner, evLog.OwnerGroupType), err.Str(), evLog.Owner); + ActorSystem->Send(evLog.Sender, result.Release()); + Mon.LogRead.CountResponse(); + delete request; + return false; } - - if (errStatus != NKikimrProto::OK) { - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, err.Str()); - auto result = MakeHolder<NPDisk::TEvReadLogResult>(errStatus, evLog.Position, evLog.Position, true, - GetStatusFlags(evLog.Owner, evLog.OwnerGroupType), err.Str(), evLog.Owner); - ActorSystem->Send(evLog.Sender, result.Release()); - Mon.LogRead.CountResponse(); - delete request; - return false; - } - evLog.SetOwnerGroupType(ownerData.IsStaticGroupOwner()); - request->JobKind = NSchLab::JobKindRead; - break; - } - case ERequestType::RequestLogReadContinue: - request->JobKind = NSchLab::JobKindRead; - break; - case ERequestType::RequestLogSectorRestore: - break; - case ERequestType::RequestLogReadResultProcess: - break; + evLog.SetOwnerGroupType(ownerData.IsStaticGroupOwner()); + request->JobKind = NSchLab::JobKindRead; + break; + } + case ERequestType::RequestLogReadContinue: + request->JobKind = NSchLab::JobKindRead; + break; + case ERequestType::RequestLogSectorRestore: + break; + case ERequestType::RequestLogReadResultProcess: + break; case ERequestType::RequestChunkRead: { Mon.QueueRequests->Dec(); - TIntrusivePtr<TChunkRead> read = std::move(static_cast<TChunkRead*>(request)->SelfPointer); - *Mon.QueueBytes -= read->Size; - - if (errStatus != NKikimrProto::OK) { - err << "ReqId# " << read->ReqId.Id; - SendChunkReadError(read, err, errStatus); - return false; - } - if (read->ChunkIdx >= ChunkState.size()) { - err << "chunkIdx is too large (total# " << ChunkState.size() << ")"; - SendChunkReadError(read, err, NKikimrProto::ERROR); - return false; - } - TChunkState &state = ChunkState[read->ChunkIdx]; - if (!IsOwnerUser(state.OwnerId)) { - err << "chunk owned by the system"; - SendChunkReadError(read, err, NKikimrProto::ERROR); - return false; - } - if (state.OwnerId != read->Owner) { - err << "chunk's real ownerId# " << state.OwnerId; - SendChunkReadError(read, err, NKikimrProto::ERROR); - return false; - } - if (state.CommitState != TChunkState::DATA_RESERVED && state.CommitState != TChunkState::DATA_COMMITTED) { - err << "chunk has unexpected CommitState# " << state.CommitState; - SendChunkReadError(read, err, NKikimrProto::ERROR); - return false; - } - ui64 offset = 0; - if (!ParseSectorOffset(Format, ActorSystem, PDiskId, read->Offset, read->Size, - read->FirstSector, read->LastSector, offset)) { - err << "invalid size# " << read->Size << " and offset# " << read->Offset; - SendChunkReadError(read, err, NKikimrProto::ERROR); - return false; - } - TOwnerData &ownerData = OwnerData[request->Owner]; - read->SetOwnerGroupType(ownerData.IsStaticGroupOwner()); - ownerData.ReadThroughput.Increment(read->Size, ActorSystem->Timestamp()); - request->JobKind = NSchLab::JobKindRead; - - Y_VERIFY(read->FinalCompletion == nullptr); - - - ++state.OperationsInProgress; - ++ownerData.InFlight->ChunkReads; - auto onDestroy = [&, inFlight = ownerData.InFlight]() { - --state.OperationsInProgress; - --inFlight->ChunkReads; - }; - read->FinalCompletion = new TCompletionChunkRead(this, read, std::move(onDestroy), state.Nonce); - - static_cast<TChunkRead*>(request)->SelfPointer = std::move(read); - - return true; + TIntrusivePtr<TChunkRead> read = std::move(static_cast<TChunkRead*>(request)->SelfPointer); + *Mon.QueueBytes -= read->Size; + + if (errStatus != NKikimrProto::OK) { + err << "ReqId# " << read->ReqId.Id; + SendChunkReadError(read, err, errStatus); + return false; + } + if (read->ChunkIdx >= ChunkState.size()) { + err << "chunkIdx is too large (total# " << ChunkState.size() << ")"; + SendChunkReadError(read, err, NKikimrProto::ERROR); + return false; + } + TChunkState &state = ChunkState[read->ChunkIdx]; + if (!IsOwnerUser(state.OwnerId)) { + err << "chunk owned by the system"; + SendChunkReadError(read, err, NKikimrProto::ERROR); + return false; + } + if (state.OwnerId != read->Owner) { + err << "chunk's real ownerId# " << state.OwnerId; + SendChunkReadError(read, err, NKikimrProto::ERROR); + return false; + } + if (state.CommitState != TChunkState::DATA_RESERVED && state.CommitState != TChunkState::DATA_COMMITTED) { + err << "chunk has unexpected CommitState# " << state.CommitState; + SendChunkReadError(read, err, NKikimrProto::ERROR); + return false; + } + ui64 offset = 0; + if (!ParseSectorOffset(Format, ActorSystem, PDiskId, read->Offset, read->Size, + read->FirstSector, read->LastSector, offset)) { + err << "invalid size# " << read->Size << " and offset# " << read->Offset; + SendChunkReadError(read, err, NKikimrProto::ERROR); + return false; + } + TOwnerData &ownerData = OwnerData[request->Owner]; + read->SetOwnerGroupType(ownerData.IsStaticGroupOwner()); + ownerData.ReadThroughput.Increment(read->Size, ActorSystem->Timestamp()); + request->JobKind = NSchLab::JobKindRead; + + Y_VERIFY(read->FinalCompletion == nullptr); + + + ++state.OperationsInProgress; + ++ownerData.InFlight->ChunkReads; + auto onDestroy = [&, inFlight = ownerData.InFlight]() { + --state.OperationsInProgress; + --inFlight->ChunkReads; + }; + read->FinalCompletion = new TCompletionChunkRead(this, read, std::move(onDestroy), state.Nonce); + + static_cast<TChunkRead*>(request)->SelfPointer = std::move(read); + + return true; } case ERequestType::RequestChunkWrite: { - + TChunkWrite &ev = *static_cast<TChunkWrite*>(request); TOwnerData &ownerData = OwnerData[ev.Owner]; Mon.QueueRequests->Dec(); - const ui32 size = ev.PartsPtr ? ev.PartsPtr->ByteSize() : 0; - *Mon.QueueBytes -= size; - if (errStatus != NKikimrProto::OK) { - SendChunkWriteError(ev, err.Str(), errStatus); - delete request; - return false; + const ui32 size = ev.PartsPtr ? ev.PartsPtr->ByteSize() : 0; + *Mon.QueueBytes -= size; + if (errStatus != NKikimrProto::OK) { + SendChunkWriteError(ev, err.Str(), errStatus); + delete request; + return false; + } + if (!ev.PartsPtr) { + err << Sprintf("Can't write chunkIdx# %" PRIu32 " with null PartsPtr for ownerId# %" + PRIu32, (ui32)ev.ChunkIdx, (ui32)ev.Owner); + SendChunkWriteError(ev, err.Str(), NKikimrProto::ERROR); + delete request; + return false; + } + if (ev.ChunkIdx > ChunkState.size()) { + err << Sprintf("Can't write chunk: chunkIdx# %" PRIu32 + " is too large (total# %" PRIu32 ") ownerId# %" PRIu32, + (ui32)ev.ChunkIdx, (ui32)ChunkState.size(), (ui32)ev.Owner); + SendChunkWriteError(ev, err.Str(), NKikimrProto::ERROR); + delete request; + return false; + } + if (ev.ChunkIdx == 0) { + TString allocError; + TVector<TChunkIdx> chunks = AllocateChunkForOwner(request, 1, allocError); + if (chunks.empty()) { + err << allocError; + SendChunkWriteError(ev, err.Str(), NKikimrProto::OUT_OF_SPACE); + delete request; + return false; + } else { + Y_VERIFY_DEBUG(chunks.size() == 1); + ev.ChunkIdx = chunks.front(); + } + } + TChunkState &state = ChunkState[ev.ChunkIdx]; + if (state.OwnerId == OwnerSystem) { + err << "Can't write chunkIdx# " << ev.ChunkIdx + << " destination chunk is owned by the system! ownerId# " << ev.Owner; + SendChunkWriteError(ev, err.Str(), NKikimrProto::ERROR); + delete request; + return false; } - if (!ev.PartsPtr) { - err << Sprintf("Can't write chunkIdx# %" PRIu32 " with null PartsPtr for ownerId# %" - PRIu32, (ui32)ev.ChunkIdx, (ui32)ev.Owner); - SendChunkWriteError(ev, err.Str(), NKikimrProto::ERROR); - delete request; - return false; - } - if (ev.ChunkIdx > ChunkState.size()) { - err << Sprintf("Can't write chunk: chunkIdx# %" PRIu32 - " is too large (total# %" PRIu32 ") ownerId# %" PRIu32, - (ui32)ev.ChunkIdx, (ui32)ChunkState.size(), (ui32)ev.Owner); - SendChunkWriteError(ev, err.Str(), NKikimrProto::ERROR); - delete request; - return false; - } - if (ev.ChunkIdx == 0) { - TString allocError; - TVector<TChunkIdx> chunks = AllocateChunkForOwner(request, 1, allocError); - if (chunks.empty()) { - err << allocError; - SendChunkWriteError(ev, err.Str(), NKikimrProto::OUT_OF_SPACE); - delete request; - return false; - } else { - Y_VERIFY_DEBUG(chunks.size() == 1); - ev.ChunkIdx = chunks.front(); - } - } - TChunkState &state = ChunkState[ev.ChunkIdx]; - if (state.OwnerId == OwnerSystem) { - err << "Can't write chunkIdx# " << ev.ChunkIdx - << " destination chunk is owned by the system! ownerId# " << ev.Owner; - SendChunkWriteError(ev, err.Str(), NKikimrProto::ERROR); - delete request; - return false; - } - if (state.CommitState != TChunkState::DATA_RESERVED - && state.CommitState != TChunkState::DATA_COMMITTED) { - err << "Can't write chunkIdx# " << ev.ChunkIdx - << " destination chunk has CommitState# " << state.CommitState - << " ownerId# " << ev.Owner; - SendChunkWriteError(ev, err.Str(), NKikimrProto::ERROR); - delete request; - return false; - } - - ev.SetOwnerGroupType(ownerData.IsStaticGroupOwner()); - ownerData.WriteThroughput.Increment(ev.TotalSize, ActorSystem->Timestamp()); - request->JobKind = NSchLab::JobKindWrite; - - - THolder<TEvChunkWriteResult> result( - new TEvChunkWriteResult(NKikimrProto::OK, ev.ChunkIdx, ev.Cookie, - GetStatusFlags(ev.Owner, ev.OwnerGroupType), TString())); - - ++state.OperationsInProgress; - ++ownerData.InFlight->ChunkWrites; - auto onDestroy = [&, inFlight = ownerData.InFlight]() { - --state.OperationsInProgress; - --inFlight->ChunkWrites; - }; - ev.Completion = MakeHolder<TCompletionChunkWrite>(ev.Sender, result.Release(), &Mon, PDiskId, - ev.CreationTime, ev.TotalSize, ev.PriorityClass, std::move(onDestroy), ev.ReqId); - - return true; + if (state.CommitState != TChunkState::DATA_RESERVED + && state.CommitState != TChunkState::DATA_COMMITTED) { + err << "Can't write chunkIdx# " << ev.ChunkIdx + << " destination chunk has CommitState# " << state.CommitState + << " ownerId# " << ev.Owner; + SendChunkWriteError(ev, err.Str(), NKikimrProto::ERROR); + delete request; + return false; + } + + ev.SetOwnerGroupType(ownerData.IsStaticGroupOwner()); + ownerData.WriteThroughput.Increment(ev.TotalSize, ActorSystem->Timestamp()); + request->JobKind = NSchLab::JobKindWrite; + + + THolder<TEvChunkWriteResult> result( + new TEvChunkWriteResult(NKikimrProto::OK, ev.ChunkIdx, ev.Cookie, + GetStatusFlags(ev.Owner, ev.OwnerGroupType), TString())); + + ++state.OperationsInProgress; + ++ownerData.InFlight->ChunkWrites; + auto onDestroy = [&, inFlight = ownerData.InFlight]() { + --state.OperationsInProgress; + --inFlight->ChunkWrites; + }; + ev.Completion = MakeHolder<TCompletionChunkWrite>(ev.Sender, result.Release(), &Mon, PDiskId, + ev.CreationTime, ev.TotalSize, ev.PriorityClass, std::move(onDestroy), ev.ReqId); + + return true; } case ERequestType::RequestChunkTrim: - request->JobKind = NSchLab::JobKindWrite; + request->JobKind = NSchLab::JobKindWrite; break; case ERequestType::RequestLogWrite: { - auto *log = static_cast<TLogWrite*>(request); + auto *log = static_cast<TLogWrite*>(request); Mon.QueueRequests->Dec(); - *Mon.QueueBytes -= log->Data.size(); - log->InputTime = HPNow(); - request->JobKind = NSchLab::JobKindWrite; - - TOwnerData &ownerData = OwnerData[log->Owner]; - ++ownerData.InFlight->LogWrites; - log->SetOnDestroy([inFlight = ownerData.InFlight]() { - --inFlight->LogWrites; - }); - - if (errStatus == NKikimrProto::OK) { - errStatus = ValidateRequest(log, err); - } - if (errStatus != NKikimrProto::OK) { - log->SetOwnerGroupType(true); - PrepareLogError(log, err, errStatus); - break; // Reply even for invalid TLogWrite should be sent from completion thread, so allow request to pass further + *Mon.QueueBytes -= log->Data.size(); + log->InputTime = HPNow(); + request->JobKind = NSchLab::JobKindWrite; + + TOwnerData &ownerData = OwnerData[log->Owner]; + ++ownerData.InFlight->LogWrites; + log->SetOnDestroy([inFlight = ownerData.InFlight]() { + --inFlight->LogWrites; + }); + + if (errStatus == NKikimrProto::OK) { + errStatus = ValidateRequest(log, err); + } + if (errStatus != NKikimrProto::OK) { + log->SetOwnerGroupType(true); + PrepareLogError(log, err, errStatus); + break; // Reply even for invalid TLogWrite should be sent from completion thread, so allow request to pass further } - - log->SetOwnerGroupType(ownerData.IsStaticGroupOwner()); - ownerData.SetLastSeenLsn(log->Lsn); - ownerData.WriteThroughput.Increment(log->Data.size(), ActorSystem->Timestamp()); - WILSON_TRACE(*ActorSystem, &log->TraceId, EnqueueLogWrite); + + log->SetOwnerGroupType(ownerData.IsStaticGroupOwner()); + ownerData.SetLastSeenLsn(log->Lsn); + ownerData.WriteThroughput.Increment(log->Data.size(), ActorSystem->Timestamp()); + WILSON_TRACE(*ActorSystem, &log->TraceId, EnqueueLogWrite); break; } case ERequestType::RequestYardInit: @@ -2541,68 +2541,68 @@ bool TPDisk::PreprocessRequest(TRequestBase *request) { case ERequestType::RequestCheckSpace: { TCheckSpace &ev = *static_cast<TCheckSpace*>(request); - if (errStatus != NKikimrProto::OK) { - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, err.Str()); - THolder<NPDisk::TEvCheckSpaceResult> result(new NPDisk::TEvCheckSpaceResult(errStatus, - GetStatusFlags(ev.Owner, ev.OwnerGroupType), 0, 0, 0, err.Str())); - ActorSystem->Send(ev.Sender, result.Release()); - Mon.CheckSpace.CountResponse(); - delete request; - return false; + if (errStatus != NKikimrProto::OK) { + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, err.Str()); + THolder<NPDisk::TEvCheckSpaceResult> result(new NPDisk::TEvCheckSpaceResult(errStatus, + GetStatusFlags(ev.Owner, ev.OwnerGroupType), 0, 0, 0, err.Str())); + ActorSystem->Send(ev.Sender, result.Release()); + Mon.CheckSpace.CountResponse(); + delete request; + return false; } - ev.SetOwnerGroupType(OwnerData[ev.Owner].IsStaticGroupOwner()); - break; + ev.SetOwnerGroupType(OwnerData[ev.Owner].IsStaticGroupOwner()); + break; } case ERequestType::RequestHarakiri: { THarakiri &ev = *static_cast<THarakiri*>(request); - if (errStatus != NKikimrProto::OK) { - THolder<NPDisk::TEvHarakiriResult> result(new NPDisk::TEvHarakiriResult(errStatus, - GetStatusFlags(ev.Owner, ev.OwnerGroupType), err.Str())); - ActorSystem->Send(ev.Sender, result.Release()); - Mon.Harakiri.CountResponse(); - delete request; - return false; + if (errStatus != NKikimrProto::OK) { + THolder<NPDisk::TEvHarakiriResult> result(new NPDisk::TEvHarakiriResult(errStatus, + GetStatusFlags(ev.Owner, ev.OwnerGroupType), err.Str())); + ActorSystem->Send(ev.Sender, result.Release()); + Mon.Harakiri.CountResponse(); + delete request; + return false; } - ev.SetOwnerGroupType(OwnerData[ev.Owner].IsStaticGroupOwner()); - break; + ev.SetOwnerGroupType(OwnerData[ev.Owner].IsStaticGroupOwner()); + break; } - case ERequestType::RequestYardSlay: + case ERequestType::RequestYardSlay: { TSlay &ev = *static_cast<TSlay*>(request); if (ev.VDiskId == TVDiskID::InvalidId) { - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, err.Str()); + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, err.Str()); THolder<NPDisk::TEvSlayResult> result(new NPDisk::TEvSlayResult(NKikimrProto::ERROR, GetStatusFlags(ev.Owner, ev.OwnerGroupType), - ev.VDiskId, ev.SlayOwnerRound, ev.PDiskId, ev.VSlotId, err.Str())); + ev.VDiskId, ev.SlayOwnerRound, ev.PDiskId, ev.VSlotId, err.Str())); ActorSystem->Send(ev.Sender, result.Release()); Mon.YardSlay.CountResponse(); delete request; - return false; + return false; } break; } case ERequestType::RequestChunkReserve: { TChunkReserve &ev = *static_cast<TChunkReserve*>(request); - - if (errStatus != NKikimrProto::OK) { - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, err.Str()); - THolder<NPDisk::TEvChunkReserveResult> result(new NPDisk::TEvChunkReserveResult(errStatus, - GetStatusFlags(ev.Owner, ev.OwnerGroupType), err.Str())); - ActorSystem->Send(ev.Sender, result.Release()); - Mon.ChunkReserve.CountResponse(); - delete request; - return false; + + if (errStatus != NKikimrProto::OK) { + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, err.Str()); + THolder<NPDisk::TEvChunkReserveResult> result(new NPDisk::TEvChunkReserveResult(errStatus, + GetStatusFlags(ev.Owner, ev.OwnerGroupType), err.Str())); + ActorSystem->Send(ev.Sender, result.Release()); + Mon.ChunkReserve.CountResponse(); + delete request; + return false; } - - ev.SetOwnerGroupType(OwnerData[ev.Owner].IsStaticGroupOwner()); - break; - } - case ERequestType::RequestChunksLock: - break; - case ERequestType::RequestChunksUnlock: - break; + + ev.SetOwnerGroupType(OwnerData[ev.Owner].IsStaticGroupOwner()); + break; + } + case ERequestType::RequestChunksLock: + break; + case ERequestType::RequestChunksUnlock: + break; case ERequestType::RequestYardControl: { TYardControl &ev = *static_cast<TYardControl*>(request); @@ -2611,25 +2611,25 @@ bool TPDisk::PreprocessRequest(TRequestBase *request) { Mon.YardControl.CountResponse(); IsQueuePaused = true; delete request; - return false; // OK + return false; // OK } - err << " Can't process yard control Action " << (ui32)ev.Action; - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, err.Str()); - ActorSystem->Send(ev.Sender, new NPDisk::TEvYardControlResult(NKikimrProto::ERROR, ev.Cookie, err.Str())); + err << " Can't process yard control Action " << (ui32)ev.Action; + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, err.Str()); + ActorSystem->Send(ev.Sender, new NPDisk::TEvYardControlResult(NKikimrProto::ERROR, ev.Cookie, err.Str())); Mon.YardControl.CountResponse(); delete request; - return false; + return false; } case ERequestType::RequestAskForCutLog: break; case ERequestType::RequestConfigureScheduler: break; - case ERequestType::RequestWhiteboartReport: - break; - case ERequestType::RequestHttpInfo: - break; - case ERequestType::RequestUndelivered: - break; + case ERequestType::RequestWhiteboartReport: + break; + case ERequestType::RequestHttpInfo: + break; + case ERequestType::RequestUndelivered: + break; case ERequestType::RequestCommitLogChunks: break; case ERequestType::RequestLogCommitDone: @@ -2638,18 +2638,18 @@ bool TPDisk::PreprocessRequest(TRequestBase *request) { break; case ERequestType::RequestReleaseChunks: break; - case ERequestType::RequestStopDevice: - BlockDevice->Stop(); - delete request; - return false; + case ERequestType::RequestStopDevice: + BlockDevice->Stop(); + delete request; + return false; default: Y_FAIL(); break; } - return true; -} + return true; +} -void TPDisk::PushRequestToForseti(TRequestBase *request) { +void TPDisk::PushRequestToForseti(TRequestBase *request) { if (request->GateId != GateFastOperation) { bool isAdded = false; @@ -2658,10 +2658,10 @@ void TPDisk::PushRequestToForseti(TRequestBase *request) { LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId << " ReqId# " << request->ReqId << " Cost# " << request->Cost - << " JobKind# " << (ui64)request->JobKind - << " ownerId# " << request->Owner + << " JobKind# " << (ui64)request->JobKind + << " ownerId# " << request->Owner << " GateId# " << (ui64)request->GateId - << " PushRequestToForseti Can't push to Forseti! Trying system log gate." + << " PushRequestToForseti Can't push to Forseti! Trying system log gate." << " Marker# BPD44"); Mon.ForsetiCbsNotFound->Inc(); ui8 originalGateId = request->GateId; @@ -2672,11 +2672,11 @@ void TPDisk::PushRequestToForseti(TRequestBase *request) { str << "PDiskId# " << (ui32)PDiskId << " ReqId# " << request->ReqId << " Cost# " << request->Cost - << " JobKind# " << (ui64)request->JobKind - << " ownerId# " << request->Owner + << " JobKind# " << (ui64)request->JobKind + << " ownerId# " << request->Owner << " GateId# " << (ui64)request->GateId << " originalGateId# " << (ui64)originalGateId - << " PushRequestToForseti Can't push to Forseti! Request may get lost." + << " PushRequestToForseti Can't push to Forseti! Request may get lost." << " Marker# BPD45"; Y_FAIL_S(str.Str()); } @@ -2696,7 +2696,7 @@ void TPDisk::PushRequestToForseti(TRequestBase *request) { ForsetiScheduler.OnJobCostChange(cbs, job, ForsetiTimeNs, prevCost); LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " ReqId# %" PRIu64 - " PushRequestToForseti AddToBatch in Forseti.", + " PushRequestToForseti AddToBatch in Forseti.", (ui32)PDiskId, (ui64)request->ReqId.Id); isAdded = true; @@ -2713,60 +2713,60 @@ void TPDisk::PushRequestToForseti(TRequestBase *request) { // Schedule small job. TChunkWritePiece *piece = new TChunkWritePiece(whole, idx * smallJobSize, smallJobSize); piece->EstimateCost(DriveModel); - AddJobToForseti(cbs, piece, request->JobKind); + AddJobToForseti(cbs, piece, request->JobKind); } // Schedule large job (there always is one) TChunkWritePiece *piece = new TChunkWritePiece(whole, smallJobCount * smallJobSize, largeJobSize); piece->EstimateCost(DriveModel); - AddJobToForseti(cbs, piece, request->JobKind); - LWTRACK(PDiskAddWritePieceToScheduler, request->Orbit, PDiskId, request->ReqId.Id, - HPSecondsFloat(HPNow() - request->CreationTime), request->Owner, request->IsFast, - request->PriorityClass, whole->TotalSize); + AddJobToForseti(cbs, piece, request->JobKind); + LWTRACK(PDiskAddWritePieceToScheduler, request->Orbit, PDiskId, request->ReqId.Id, + HPSecondsFloat(HPNow() - request->CreationTime), request->Owner, request->IsFast, + request->PriorityClass, whole->TotalSize); } else if (request->GetType() == ERequestType::RequestChunkRead) { TIntrusivePtr<TChunkRead> read = std::move(static_cast<TChunkRead*>(request)->SelfPointer); - ui32 totalSectors = read->LastSector - read->FirstSector + 1; - - ui32 smallJobSize = (ForsetiOpPieceSizeCached + Format.SectorSize - 1) / Format.SectorSize; - ui32 smallJobCount = totalSectors / smallJobSize; - if (smallJobCount) { - smallJobCount--; + ui32 totalSectors = read->LastSector - read->FirstSector + 1; + + ui32 smallJobSize = (ForsetiOpPieceSizeCached + Format.SectorSize - 1) / Format.SectorSize; + ui32 smallJobCount = totalSectors / smallJobSize; + if (smallJobCount) { + smallJobCount--; } - ui32 largeJobSize = totalSectors - smallJobSize * smallJobCount; - - for (ui32 idx = 0; idx < smallJobCount; ++idx) { - // Schedule small job. - auto piece = new TChunkReadPiece(read, idx * smallJobSize, - smallJobSize * Format.SectorSize, false); - LWTRACK(PDiskChunkReadPieceAddToScheduler, read->Orbit, PDiskId, idx, idx * smallJobSize, - smallJobSize * Format.SectorSize); + ui32 largeJobSize = totalSectors - smallJobSize * smallJobCount; + + for (ui32 idx = 0; idx < smallJobCount; ++idx) { + // Schedule small job. + auto piece = new TChunkReadPiece(read, idx * smallJobSize, + smallJobSize * Format.SectorSize, false); + LWTRACK(PDiskChunkReadPieceAddToScheduler, read->Orbit, PDiskId, idx, idx * smallJobSize, + smallJobSize * Format.SectorSize); piece->EstimateCost(DriveModel); piece->SelfPointer = piece; - AddJobToForseti(cbs, piece, request->JobKind); + AddJobToForseti(cbs, piece, request->JobKind); } - // Schedule large job (there always is one) - auto piece = new TChunkReadPiece(read, smallJobCount * smallJobSize, - largeJobSize * Format.SectorSize, true); - LWTRACK(PDiskChunkReadPieceAddToScheduler, read->Orbit, PDiskId, smallJobCount, - smallJobCount * smallJobSize, largeJobSize * Format.SectorSize); - piece->EstimateCost(DriveModel); - piece->SelfPointer = piece; - AddJobToForseti(cbs, piece, request->JobKind); + // Schedule large job (there always is one) + auto piece = new TChunkReadPiece(read, smallJobCount * smallJobSize, + largeJobSize * Format.SectorSize, true); + LWTRACK(PDiskChunkReadPieceAddToScheduler, read->Orbit, PDiskId, smallJobCount, + smallJobCount * smallJobSize, largeJobSize * Format.SectorSize); + piece->EstimateCost(DriveModel); + piece->SelfPointer = piece; + AddJobToForseti(cbs, piece, request->JobKind); } else { - AddJobToForseti(cbs, request, request->JobKind); + AddJobToForseti(cbs, request, request->JobKind); } } } else { FastOperationsQueue.push_back(std::unique_ptr<TRequestBase>(request)); LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " ReqId# %" PRIu64 - " PushRequestToForseti Push to FastOperationsQueue.size# %" PRIu64, + " PushRequestToForseti Push to FastOperationsQueue.size# %" PRIu64, (ui32)PDiskId, (ui64)request->ReqId.Id, (ui64)FastOperationsQueue.size()); } } // Always produces a large job and sometimes produces some small jobs and a large job. void TPDisk::SplitChunkJobSize(ui32 totalSize, ui32 *outSmallJobSize, ui32 *outLargeJobSize, ui32 *outSmallJobCount) { - const ui64 sectorPayloadSize = Format.SectorPayloadSize(); - *outSmallJobSize = (ForsetiOpPieceSizeCached + sectorPayloadSize - 1) / sectorPayloadSize * sectorPayloadSize; + const ui64 sectorPayloadSize = Format.SectorPayloadSize(); + *outSmallJobSize = (ForsetiOpPieceSizeCached + sectorPayloadSize - 1) / sectorPayloadSize * sectorPayloadSize; *outSmallJobCount = totalSize / *outSmallJobSize; if (*outSmallJobCount) { (*outSmallJobCount)--; @@ -2775,8 +2775,8 @@ void TPDisk::SplitChunkJobSize(ui32 totalSize, ui32 *outSmallJobSize, ui32 *outL } void TPDisk::AddJobToForseti(NSchLab::TCbs *cbs, TRequestBase *request, NSchLab::EJobKind jobKind) { - LWTRACK(PDiskAddToScheduler, request->Orbit, PDiskId, request->ReqId.Id, HPSecondsFloat(request->CreationTime), - request->Owner, request->IsFast, request->PriorityClass); + LWTRACK(PDiskAddToScheduler, request->Orbit, PDiskId, request->ReqId.Id, HPSecondsFloat(request->CreationTime), + request->Owner, request->IsFast, request->PriorityClass); TIntrusivePtr<NSchLab::TJob> job = ForsetiScheduler.CreateJob(); job->Payload = request; job->Cost = request->Cost; @@ -2784,7 +2784,7 @@ void TPDisk::AddJobToForseti(NSchLab::TCbs *cbs, TRequestBase *request, NSchLab: ForsetiTimeNs++; ForsetiScheduler.AddJob(cbs, job, request->Owner, request->GateId, ForsetiTimeNs); LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " ReqId# %" PRIu64 - " AddJobToForseti", (ui32)PDiskId, (ui64)request->ReqId.Id); + " AddJobToForseti", (ui32)PDiskId, (ui64)request->ReqId.Id); } void TPDisk::RouteRequest(TRequestBase *request) { @@ -2792,27 +2792,27 @@ void TPDisk::RouteRequest(TRequestBase *request) { " RouteRequest", (ui32)PDiskId, (ui64)request->ReqId.Id, (ui64)request->GetType()); - LWTRACK(PDiskRouteRequest, request->Orbit, PDiskId, request->ReqId.Id, HPSecondsFloat(request->CreationTime), - request->Owner, request->IsFast, request->PriorityClass); - + LWTRACK(PDiskRouteRequest, request->Orbit, PDiskId, request->ReqId.Id, HPSecondsFloat(request->CreationTime), + request->Owner, request->IsFast, request->PriorityClass); + switch (request->GetType()) { case ERequestType::RequestLogRead: - [[fallthrough]]; - case ERequestType::RequestLogReadContinue: - [[fallthrough]]; - case ERequestType::RequestLogReadResultProcess: - [[fallthrough]]; - case ERequestType::RequestLogSectorRestore: - JointLogReads.push_back(request); - break; + [[fallthrough]]; + case ERequestType::RequestLogReadContinue: + [[fallthrough]]; + case ERequestType::RequestLogReadResultProcess: + [[fallthrough]]; + case ERequestType::RequestLogSectorRestore: + JointLogReads.push_back(request); + break; case ERequestType::RequestChunkReadPiece: - { - TChunkReadPiece *piece = static_cast<TChunkReadPiece*>(request); - JointChunkReads.emplace_back(piece->SelfPointer.Get()); - piece->SelfPointer.Reset(); + { + TChunkReadPiece *piece = static_cast<TChunkReadPiece*>(request); + JointChunkReads.emplace_back(piece->SelfPointer.Get()); + piece->SelfPointer.Reset(); // FIXME(cthulhu): Unreserve() for TChunkReadPiece is called while processing to avoid requeueing issues break; - } + } case ERequestType::RequestChunkWritePiece: JointChunkWrites.push_back(request); break; @@ -2829,7 +2829,7 @@ void TPDisk::RouteRequest(TRequestBase *request) { TLogWrite *batch = log->PopFromBatch(); JointLogWrites.push_back(log); - if (log->Signature.HasCommitRecord()) { + if (log->Signature.HasCommitRecord()) { JointCommits.push_back(log); } WILSON_TRACE(*ActorSystem, &log->TraceId, RouteLogWrite); @@ -2838,7 +2838,7 @@ void TPDisk::RouteRequest(TRequestBase *request) { break; } default: - Y_FAIL_S("RouteRequest, unexpected request type# " << ui64(request->GetType())); + Y_FAIL_S("RouteRequest, unexpected request type# " << ui64(request->GetType())); break; } } @@ -2850,9 +2850,9 @@ void TPDisk::ProcessPausedQueue() { PausedQueue.pop_front(); if (ev->Action == NPDisk::TEvYardControl::ActionPause) { - if (PreprocessRequest(ev)) { - Y_FAIL(); - } + if (PreprocessRequest(ev)) { + Y_FAIL(); + } break; } if (ev->Action == NPDisk::TEvYardControl::ActionResume) { @@ -2874,38 +2874,38 @@ void TPDisk::ProcessPausedQueue() { } else { TRequestBase *ev = PausedQueue.front(); PausedQueue.pop_front(); - if (PreprocessRequest(ev)) { - PushRequestToForseti(ev); - } - } - } -} - -void TPDisk::ProcessYardInitSet() { - for (ui32 owner = 0; owner < OwnerData.size(); ++owner) { - TOwnerData &data = OwnerData[owner]; - if (data.LogReader && data.LogReader->GetIsReplied()) { - data.LogReader = nullptr; - } - } - - - // Process pending queue - for (auto it = PendingYardInits.begin(); it != PendingYardInits.end();) { - if (!OwnerData[(*it)->Owner].HaveRequestsInFlight()) { - YardInitFinish(**it); - it = PendingYardInits.erase(it); - } else { - ++it; - } - } - *Mon.PendingYardInits = PendingYardInits.size(); -} - + if (PreprocessRequest(ev)) { + PushRequestToForseti(ev); + } + } + } +} + +void TPDisk::ProcessYardInitSet() { + for (ui32 owner = 0; owner < OwnerData.size(); ++owner) { + TOwnerData &data = OwnerData[owner]; + if (data.LogReader && data.LogReader->GetIsReplied()) { + data.LogReader = nullptr; + } + } + + + // Process pending queue + for (auto it = PendingYardInits.begin(); it != PendingYardInits.end();) { + if (!OwnerData[(*it)->Owner].HaveRequestsInFlight()) { + YardInitFinish(**it); + it = PendingYardInits.erase(it); + } else { + ++it; + } + } + *Mon.PendingYardInits = PendingYardInits.size(); +} + void TPDisk::EnqueueAll() { TGuard<TMutex> guard(StateMutex); - while (InputQueue.GetWaitingSize() > 0) { - TRequestBase* request = InputQueue.Pop(); + while (InputQueue.GetWaitingSize() > 0) { + TRequestBase* request = InputQueue.Pop(); AtomicSub(InputQueueCost, request->Cost); if (IsQueuePaused) { if (IsQueueStep) { @@ -2944,16 +2944,16 @@ void TPDisk::EnqueueAll() { PausedQueue.push_back(request); } } else { - if (PreprocessRequest(request)) { - PushRequestToForseti(request); - } + if (PreprocessRequest(request)) { + PushRequestToForseti(request); + } } } } void TPDisk::Update() { - Mon.UpdateDurationTracker.UpdateStarted(); - + Mon.UpdateDurationTracker.UpdateStarted(); + ui32 userSectorSize = 0; // Make input queue empty @@ -2970,7 +2970,7 @@ void TPDisk::Update() { // Make token injection to correct drive model underestimations and avoid disk underutilization - Mon.UpdateDurationTracker.SchedulingStart(); + Mon.UpdateDurationTracker.SchedulingStart(); // Schedule using Forseti Scheduler // Prepare @@ -3012,17 +3012,17 @@ void TPDisk::Update() { TRequestBase* req = static_cast<TRequestBase*>(job->Payload); // Slowdown requests (ACHTUNG!) - if (const ui64 addLatencyNs = SlowdownAddLatencyNs) { + if (const ui64 addLatencyNs = SlowdownAddLatencyNs) { while (true) { i64 now = HPNow(); if (!req || req->CreationTime <= 0 || req->CreationTime >= now) { break; } ui64 duration = HPNanoSeconds(now - req->CreationTime); - if (addLatencyNs <= duration) { + if (addLatencyNs <= duration) { break; } - ui64 toWaitNs = addLatencyNs - duration; + ui64 toWaitNs = addLatencyNs - duration; const ui64 OneSecondNs = 1000000000ll; if (toWaitNs < OneSecondNs) { NanoSleep(toWaitNs); @@ -3107,10 +3107,10 @@ void TPDisk::Update() { LogSeekCostLoop.Push(logSeekCostNs); } - Mon.UpdateDurationTracker.ProcessingStart(); - - ClearQuarantineChunks(); - + Mon.UpdateDurationTracker.ProcessingStart(); + + ClearQuarantineChunks(); + if (tact == ETact::TactLc) { ProcessLogWriteQueueAndCommits(); } @@ -3125,66 +3125,66 @@ void TPDisk::Update() { LastTact = tact; - ProcessYardInitSet(); - - - Mon.UpdateDurationTracker.WaitingStart(isNothingToDo); + ProcessYardInitSet(); + + + Mon.UpdateDurationTracker.WaitingStart(isNothingToDo); // Wait for something to do if (isNothingToDo && InputQueue.GetWaitingSize() == 0 && ForsetiScheduler.IsEmpty()) { - // use deadline to be able to wakeup in situation of pdisk destruction - InputQueue.ProducedWait(TDuration::MilliSeconds(10)); - } - - Mon.UpdateDurationTracker.UpdateEnded(); - *Mon.PDiskThreadCPU = ThreadCPUTime(); -} - -void TPDisk::UpdateMinLogCostNs() { - ui64 cost = ForsetiMinLogCostNsControl; - if (cost != ForsetiMinLogCostNs) { - ForsetiMinLogCostNs = cost; - for (ui32 ownerId = 0; ownerId < OwnerCount; ++ownerId) { - NSchLab::TCbs *cbs = ForsetiScheduler.GetCbs(ownerId, GateLog); - if (cbs) { - cbs->MaxBudget = ForsetiMinLogCostNs; - } - } - } -} - -void TPDisk::AddCbs(ui32 ownerId, EGate gate, const char *gateName, ui64 minBudget) { - if (!ForsetiScheduler.GetCbs(ownerId, gate)) { - NSchLab::TCbs cbs; - cbs.CbsName = Sprintf("Owner_%" PRIu32 "_%s", (ui32)ownerId, gateName); - cbs.MaxBudget = minBudget; - ForsetiTimeNs++; - ForsetiScheduler.AddCbs(ownerId, gate, cbs, ForsetiTimeNs); - } -} - -void TPDisk::AddCbsSet(ui32 ownerId) { - AddCbs(ownerId, GateLog, "Log", ForsetiMinLogCostNs); - AddCbs(ownerId, GateFresh, "Fresh", 0ull); - AddCbs(ownerId, GateComp, "Comp", 0ull); - AddCbs(ownerId, GateFastRead, "FastRead", 0ull); - AddCbs(ownerId, GateOtherRead, "OtherRead", 0ull); - AddCbs(ownerId, GateLoad, "Load", 0ull); - AddCbs(ownerId, GateHuge, "Huge", 0ull); - AddCbs(ownerId, GateSyncLog, "SyncLog", 0ull); - AddCbs(ownerId, GateLow, "LowRead", 0ull); - - TConfigureScheduler conf(ownerId, 0); - SchedulerConfigure(conf); -} - - + // use deadline to be able to wakeup in situation of pdisk destruction + InputQueue.ProducedWait(TDuration::MilliSeconds(10)); + } + + Mon.UpdateDurationTracker.UpdateEnded(); + *Mon.PDiskThreadCPU = ThreadCPUTime(); +} + +void TPDisk::UpdateMinLogCostNs() { + ui64 cost = ForsetiMinLogCostNsControl; + if (cost != ForsetiMinLogCostNs) { + ForsetiMinLogCostNs = cost; + for (ui32 ownerId = 0; ownerId < OwnerCount; ++ownerId) { + NSchLab::TCbs *cbs = ForsetiScheduler.GetCbs(ownerId, GateLog); + if (cbs) { + cbs->MaxBudget = ForsetiMinLogCostNs; + } + } + } +} + +void TPDisk::AddCbs(ui32 ownerId, EGate gate, const char *gateName, ui64 minBudget) { + if (!ForsetiScheduler.GetCbs(ownerId, gate)) { + NSchLab::TCbs cbs; + cbs.CbsName = Sprintf("Owner_%" PRIu32 "_%s", (ui32)ownerId, gateName); + cbs.MaxBudget = minBudget; + ForsetiTimeNs++; + ForsetiScheduler.AddCbs(ownerId, gate, cbs, ForsetiTimeNs); + } +} + +void TPDisk::AddCbsSet(ui32 ownerId) { + AddCbs(ownerId, GateLog, "Log", ForsetiMinLogCostNs); + AddCbs(ownerId, GateFresh, "Fresh", 0ull); + AddCbs(ownerId, GateComp, "Comp", 0ull); + AddCbs(ownerId, GateFastRead, "FastRead", 0ull); + AddCbs(ownerId, GateOtherRead, "OtherRead", 0ull); + AddCbs(ownerId, GateLoad, "Load", 0ull); + AddCbs(ownerId, GateHuge, "Huge", 0ull); + AddCbs(ownerId, GateSyncLog, "SyncLog", 0ull); + AddCbs(ownerId, GateLow, "LowRead", 0ull); + + TConfigureScheduler conf(ownerId, 0); + SchedulerConfigure(conf); +} + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // External interface //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void TPDisk::Wakeup() { - InputQueue.WakeUp(); -} +void TPDisk::Wakeup() { + InputQueue.WakeUp(); +} // Pushes request to the InputQueue; almost thread-safe void TPDisk::InputRequest(TRequestBase* request) { @@ -3195,7 +3195,7 @@ void TPDisk::InputRequest(TRequestBase* request) { TGuard<TMutex> g(StateMutex); timeout = HPCyclesNs(50000000ull + 14000000000ull * Keeper.GetTrimmedFreeChunkCount() / Format.DiskSizeChunks()); } - LWPROBE(PDiskInputRequestTimeout, PDiskId, request->ReqId.Id, HPMilliSecondsFloat(timeout)); + LWPROBE(PDiskInputRequestTimeout, PDiskId, request->ReqId.Id, HPMilliSecondsFloat(timeout)); request->Deadline = HPNow() + timeout; } ui64 qla = InputQueue.GetWaitingSize(); @@ -3205,14 +3205,14 @@ void TPDisk::InputRequest(TRequestBase* request) { LWTRACK(PDiskInputRequest, request->Orbit, PDiskId, request->ReqId.Id, HPSecondsFloat(request->CreationTime), double(request->Cost) / 1000000.0, qla, double(qca) / 1000000.0, HPSecondsFloat(request->Deadline), - request->Owner, request->IsFast, request->PriorityClass, InputQueue.GetWaitingSize()); + request->Owner, request->IsFast, request->PriorityClass, InputQueue.GetWaitingSize()); - LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " ReqId# %" PRIu64 - " InputRequest InputQueue.Push priortiyClass# %" PRIu64 " creationTime# %f", - (ui32)PDiskId, (ui64)request->ReqId.Id, (ui64)request->PriorityClass, - HPSecondsFloat(request->CreationTime)); + LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " ReqId# %" PRIu64 + " InputRequest InputQueue.Push priortiyClass# %" PRIu64 " creationTime# %f", + (ui32)PDiskId, (ui64)request->ReqId.Id, (ui64)request->PriorityClass, + HPSecondsFloat(request->CreationTime)); - InputQueue.Push(request); + InputQueue.Push(request); } } // NPDisk diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.h index 497544973f..a9b910cb0c 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.h @@ -1,22 +1,22 @@ #pragma once #include "defs.h" - -#include "blobstorage_pdisk_blockdevice.h" + +#include "blobstorage_pdisk_blockdevice.h" #include <ydb/library/pdisk_io/buffers.h> -#include "blobstorage_pdisk_chunk_tracker.h" +#include "blobstorage_pdisk_chunk_tracker.h" #include "blobstorage_pdisk_crypto.h" #include "blobstorage_pdisk_data.h" #include "blobstorage_pdisk_delayed_cost_loop.h" #include "blobstorage_pdisk_drivemodel.h" #include "blobstorage_pdisk_free_chunks.h" #include "blobstorage_pdisk_gate.h" -#include "blobstorage_pdisk_keeper.h" -#include "blobstorage_pdisk_req_creator.h" +#include "blobstorage_pdisk_keeper.h" +#include "blobstorage_pdisk_req_creator.h" #include "blobstorage_pdisk_requestimpl.h" #include "blobstorage_pdisk_state.h" #include "blobstorage_pdisk_tact.h" #include "blobstorage_pdisk_thread.h" -#include "blobstorage_pdisk_util_countedqueuemanyone.h" +#include "blobstorage_pdisk_util_countedqueuemanyone.h" #include "blobstorage_pdisk_writer.h" #include <ydb/core/node_whiteboard/node_whiteboard.h> @@ -49,12 +49,12 @@ public: // Monitoring TPDiskMon Mon; - + // Static state TDriveModel DriveModel; - TReqCreator ReqCreator; - + TReqCreator ReqCreator; + // Real-Time Scheduler ui64 ReorderingMs; @@ -64,12 +64,12 @@ public: ui64 ForsetiPrevTimeNs = 0; NSchLab::TScheduler ForsetiScheduler; - // Request queue - TCountedQueueManyOne<TRequestBase, 4096> InputQueue; - TAtomic InputQueueCost = 0; + // Request queue + TCountedQueueManyOne<TRequestBase, 4096> InputQueue; + TAtomic InputQueueCost = 0; - TVector<TRequestBase*> JointLogReads; - TVector<TIntrusivePtr<TRequestBase>> JointChunkReads; + TVector<TRequestBase*> JointLogReads; + TVector<TIntrusivePtr<TRequestBase>> JointChunkReads; TVector<TRequestBase*> JointChunkWrites; TVector<TLogWrite*> JointLogWrites; TVector<TLogWrite*> JointCommits; @@ -111,59 +111,59 @@ public: TPDiskCategory PDiskCategory; TNonceJumpLogPageHeader2 LastNonceJumpLogPageHeader2; - THolder<TBufferPool> BufferPool; - + THolder<TBufferPool> BufferPool; + // In-memory dynamic state TMutex StateMutex; // The state is modified mainly by the PDisk thread, but can be accessed by other threads. const TOwnerRound NextOwnerRound; // Next unique-id to use for owner creation - TOwner LastOwnerId = OwnerBeginUser; + TOwner LastOwnerId = OwnerBeginUser; TVector<TOwnerData> OwnerData; // Per-owner information - TMap<TVDiskID, TOwner> VDiskOwners; // For fast VDisk -> OwnerID mapping + TMap<TVDiskID, TOwner> VDiskOwners; // For fast VDisk -> OwnerID mapping TVector<TChunkState> ChunkState; // Per-chunk information TKeeper Keeper; // Chunk data manager bool TrimInFly = false; // TChunkTrim request is present somewhere in pdisk TAtomic ChunkBeingTrimmed = 0; TAtomic TrimOffset = 0; - TList<TLogChunkInfo> LogChunks; // Log chunk list + log-specific information - bool IsLogChunksReleaseInflight = false; + TList<TLogChunkInfo> LogChunks; // Log chunk list + log-specific information + bool IsLogChunksReleaseInflight = false; ui64 InsaneLogChunks = 0; // Set when pdisk sees insanely large log, to give vdisks a chance to cut it - ui32 FirstLogChunkToParseCommits = 0; + ui32 FirstLogChunkToParseCommits = 0; + + // Chunks that is owned by killed owner, but has operations InFlight + TVector<TChunkIdx> QuarantineChunks; + TVector<TOwner> QuarantineOwners; - // Chunks that is owned by killed owner, but has operations InFlight - TVector<TChunkIdx> QuarantineChunks; - TVector<TOwner> QuarantineOwners; - TSysLogRecord SysLogRecord; // Current sys log record state, part 1 of 2 TSysLogFirstNoncesToKeep SysLogFirstNoncesToKeep; // Current sys log record state, part 2 of 2 ui64 SysLogLsn = 0; TNonceSet LoggedNonces; // Latest on-disk Nonce set ui64 CostLimitNs; - TControlWrapper UseT1ha0HashInFooter; + TControlWrapper UseT1ha0HashInFooter; TDriveData DriveData; TAtomic EstimatedLogChunkIdx = 0; // For cost estimation only TDriveData DriveData; - TString ErrorStr; - + TString ErrorStr; + // Incapsulated components TPDiskThread PDiskThread; - THolder<IBlockDevice> BlockDevice; - THolder<TLogWriter> CommonLogger; - THolder<TSysLogWriter> SysLogger; + THolder<IBlockDevice> BlockDevice; + THolder<TLogWriter> CommonLogger; + THolder<TSysLogWriter> SysLogger; // Initialization data ui64 InitialSysLogWritePosition = 0; - EInitPhase InitPhase = EInitPhase::Uninitialized; + EInitPhase InitPhase = EInitPhase::Uninitialized; TBuffer *InitialTailBuffer = nullptr; - TLogPosition InitialLogPosition{0, 0}; + TLogPosition InitialLogPosition{0, 0}; volatile ui64 InitialPreviousNonce = 0; volatile ui64 InitialNonceJumpSize = 0; TAtomic IsStarted = false; TMutex StopMutex; TIntrusivePtr<TPDiskConfig> Cfg; - TInstant CreationTime; + TInstant CreationTime; ui64 ExpectedSlotCount = 0; // Number of slots to use for space limit calculation. @@ -172,30 +172,30 @@ public: // stats TAtomic NonRealTimeMs = 0; TAtomic SlowDeviceMs = 0; - - const bool UseHugePages; + + const bool UseHugePages; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Initialization - TPDisk(const TIntrusivePtr<TPDiskConfig> cfg, const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters); + TPDisk(const TIntrusivePtr<TPDiskConfig> cfg, const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters); TString DynamicStateToString(bool isMultiline); bool ReadChunk0Format(ui8* formatSectors, const TKey& mainKey); // Called by actor bool IsFormatMagicValid(ui8 *magicData, ui32 magicDataSize); // Called by actor - bool CheckGuid(TString *outReason); // Called by actor + bool CheckGuid(TString *outReason); // Called by actor bool CheckFormatComplete(); // Called by actor void ReadSysLog(const TActorId &pDiskActor); // Called by actor - void ProcessChunk0(const TEvReadLogResult &readLogResult); - void PrintChunksDebugInfo(); - TString ProcessReadSysLogResult(ui64 &outWritePosition, ui64 &outLsn, const TEvReadLogResult &readLogResult); + void ProcessChunk0(const TEvReadLogResult &readLogResult); + void PrintChunksDebugInfo(); + TString ProcessReadSysLogResult(ui64 &outWritePosition, ui64 &outLsn, const TEvReadLogResult &readLogResult); void ReadAndParseMainLog(const TActorId &pDiskActor); // Called by the log reader on success with the current chunkOwnerMap. void ProcessChunkOwnerMap(TMap<ui32, TChunkState> &chunkOwnerMap); void InitLogChunksInfo(); - void PrintLogChunksInfo(const TString& msg); + void PrintLogChunksInfo(const TString& msg); void InitFreeChunks(); - void InitSysLogger(); + void InitSysLogger(); bool InitCommonLogger(); bool LogNonceJump(ui64 previousNonce); - void GetStartingPoints(TOwner owner, TMap<TLogSignature, TLogRecord> &outStartingPoints); + void GetStartingPoints(TOwner owner, TMap<TLogSignature, TLogRecord> &outStartingPoints); TString StartupOwnerInfo(); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Destruction @@ -209,17 +209,17 @@ public: ui32 SystemChunkSize(const TDiskFormat& format, ui32 userAccessibleChunkSizeBytes, ui32 sectorSizeBytes) const; ui64 UsableSectorsPerLogChunk() const; void CheckLogCanary(ui8* sector, ui32 chunkIdx = 0, ui64 sectorIdx = 0) const; - TLogPosition LogPosition(ui32 chunkIdx, ui64 sectorIdx, ui64 offsetInSector) const; + TLogPosition LogPosition(ui32 chunkIdx, ui64 sectorIdx, ui64 offsetInSector) const; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Common operations - bool ReleaseUnusedLogChunks(TCompletionEventSender *completion); - void MarkChunksAsReleased(TReleaseChunks& req); + bool ReleaseUnusedLogChunks(TCompletionEventSender *completion); + void MarkChunksAsReleased(TReleaseChunks& req); void OnNonceChange(ui32 idx, TReqId reqId, NWilson::TTraceId *traceId); ui32 GetTotalChunks(ui32 ownerId, const EOwnerGroupType ownerGroupType) const; ui32 GetFreeChunks(ui32 ownerId, const EOwnerGroupType ownerGroupType) const; ui32 GetUsedChunks(ui32 ownerId, const EOwnerGroupType ownerGroupType) const; - TStatusFlags GetStatusFlags(TOwner ownerId, const EOwnerGroupType ownerGroupType) const; - TStatusFlags NotEnoughDiskSpaceStatusFlags(ui32 ownerId, const EOwnerGroupType ownerGroupType) const; + TStatusFlags GetStatusFlags(TOwner ownerId, const EOwnerGroupType ownerGroupType) const; + TStatusFlags NotEnoughDiskSpaceStatusFlags(ui32 ownerId, const EOwnerGroupType ownerGroupType) const; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Generic log writing @@ -239,20 +239,20 @@ public: void OnLogCommitDone(TLogCommitDone &req); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Chunk commit log writing - NKikimrProto::EReplyStatus BeforeLoggingCommitRecord(const TLogWrite &evLog, TStringStream& outErrorReason); - bool ValidateCommitChunk(ui32 chunkIdx, TOwner owner, TStringStream& outErrorReason); + NKikimrProto::EReplyStatus BeforeLoggingCommitRecord(const TLogWrite &evLog, TStringStream& outErrorReason); + bool ValidateCommitChunk(ui32 chunkIdx, TOwner owner, TStringStream& outErrorReason); void CommitChunk(ui32 chunkIdx); - bool ValidateDeleteChunk(ui32 chunkIdx, TOwner owner, TStringStream& outErrorReason); - void DeleteChunk(ui32 chunkIdx, TOwner owner); + bool ValidateDeleteChunk(ui32 chunkIdx, TOwner owner, TStringStream& outErrorReason); + void DeleteChunk(ui32 chunkIdx, TOwner owner); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Log reading - void ProcessReadLogRecord(TLogRecordHeader &header, TString &data, TOwner owner, ui64 nonce, - TEvReadLogResult* result, TMap<ui32, TChunkState> *outChunkOwnerMap, bool isInitial, - bool parseCommitMessage); + void ProcessReadLogRecord(TLogRecordHeader &header, TString &data, TOwner owner, ui64 nonce, + TEvReadLogResult* result, TMap<ui32, TChunkState> *outChunkOwnerMap, bool isInitial, + bool parseCommitMessage); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Chunk writing bool ChunkWritePiece(TChunkWrite *evChunkWrite, ui32 pieceShift, ui32 pieceSize); - void SendChunkWriteError(TChunkWrite &evChunkWrite, const TString &errorReason, NKikimrProto::EReplyStatus status); + void SendChunkWriteError(TChunkWrite &evChunkWrite, const TString &errorReason, NKikimrProto::EReplyStatus status); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Chunk reading enum EChunkReadPieceResult { @@ -261,44 +261,44 @@ public: ReadPieceResultError = 2 }; - void SendChunkReadError(const TIntrusivePtr<TChunkRead>& read, TStringStream& errorReason, - NKikimrProto::EReplyStatus status); - EChunkReadPieceResult ChunkReadPiece(TIntrusivePtr<TChunkRead> &read, ui64 pieceCurrentSector, ui64 pieceSizeLimit, - ui64 *reallyReadBytes); + void SendChunkReadError(const TIntrusivePtr<TChunkRead>& read, TStringStream& errorReason, + NKikimrProto::EReplyStatus status); + EChunkReadPieceResult ChunkReadPiece(TIntrusivePtr<TChunkRead> &read, ui64 pieceCurrentSector, ui64 pieceSizeLimit, + ui64 *reallyReadBytes); void SplitChunkJobSize(ui32 totalSize, ui32 *outSmallJobSize, ui32 *outLargeJObSize, ui32 *outSmallJobCount); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void ChunksLockByRange(TFreeChunks &freeChunks, ui32 begin, ui32 end, TVector<ui32> &lockedChunks); void ChunksLockByNumber(ui32 begin, ui32 number, TVector<ui32> &lockedChunks); - void ChunksLock(TChunksLock &evChunksLock); - void ChunksUnlock(TChunksUnlock &evChunksUnlock); + void ChunksLock(TChunksLock &evChunksLock); + void ChunksUnlock(TChunksUnlock &evChunksUnlock); // Chunk reservation - TVector<TChunkIdx> AllocateChunkForOwner(const TRequestBase *req, const ui32 count, TString &errorReason); + TVector<TChunkIdx> AllocateChunkForOwner(const TRequestBase *req, const ui32 count, TString &errorReason); void ChunkReserve(TChunkReserve &evChunkReserve); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Whiteboard and HTTP reports creation + // Whiteboard and HTTP reports creation void WhiteboardReport(TWhiteboardReport &whiteboardReport); // Called by actor void RenderState(IOutputStream &str, THttpInfo &httpInfo); - void OutputHtmlOwners(TStringStream &str); - void OutputHtmlLogChunksDetails(TStringStream &str); - void OutputHtmlChunksLockUnlockInfo(TStringStream &str); + void OutputHtmlOwners(TStringStream &str); + void OutputHtmlLogChunksDetails(TStringStream &str); + void OutputHtmlChunksLockUnlockInfo(TStringStream &str); void HttpInfo(THttpInfo &httpInfo); // Called by actor - void EventUndelivered(TUndelivered &req); - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + void EventUndelivered(TUndelivered &req); + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // PDisk formatting void WriteApplyFormatRecord(TDiskFormat format, const TKey &mainKey); - void WriteDiskFormat(ui64 diskSizeBytes, ui32 sectorSizeBytes, ui32 userAccessibleChunkSizeBytes, const ui64 &diskGuid, + void WriteDiskFormat(ui64 diskSizeBytes, ui32 sectorSizeBytes, ui32 userAccessibleChunkSizeBytes, const ui64 &diskGuid, const TKey &chunkKey, const TKey &logKey, const TKey &sysLogKey, const TKey &mainKey, - TString textMessage, const bool isErasureEncodeUserLog, const bool trimEntireDevice); + TString textMessage, const bool isErasureEncodeUserLog, const bool trimEntireDevice); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Owner initialization void ReplyErrorYardInitResult(TYardInit &evYardInit, const TString &str); - TOwner FindNextOwnerId(); - bool YardInitStart(TYardInit &evYardInit); - void YardInitFinish(TYardInit &evYardInit); - bool YardInitForKnownVDisk(TYardInit &evYardInit, TOwner owner); + TOwner FindNextOwnerId(); + bool YardInitStart(TYardInit &evYardInit); + void YardInitFinish(TYardInit &evYardInit); + bool YardInitForKnownVDisk(TYardInit &evYardInit, TOwner owner); // Scheduler weight configuration void ConfigureCbs(ui32 ownerId, EGate gate, ui64 weight); - void SchedulerConfigure(const TConfigureScheduler &conf); + void SchedulerConfigure(const TConfigureScheduler &conf); void SendCutLog(TAskForCutLog &reqest); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Free space check @@ -309,7 +309,7 @@ public: // Owner destruction void Slay(TSlay &evSlay); // Common implementation details - void ForceDeleteChunk(TChunkIdx chunkIdx); + void ForceDeleteChunk(TChunkIdx chunkIdx); void KillOwner(TOwner owner, TOwnerRound killOwnerRound, TCompletionEventSender *completionAction); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Update process @@ -317,10 +317,10 @@ public: void ProcessChunkWriteQueue(); void ProcessChunkReadQueue(); void ProcessLogReadQueue(); - void ProcessYardInitSet(); - void TrimAllUntrimmedChunks(); + void ProcessYardInitSet(); + void TrimAllUntrimmedChunks(); void ProcessChunkTrimQueue(); - void ClearQuarantineChunks(); + void ClearQuarantineChunks(); // Should be called to initiate TRIM (on chunk delete or prev trim done) void TryTrimChunk(bool prevDone, ui64 trimmedSize); void ProcessFastOperationsQueue(); @@ -331,22 +331,22 @@ public: // Internal interface // Schedules EvReadLogResult event for the system log - void ResetInit(); - bool Initialize(TActorSystem *actorSystem, const TActorId &pDiskActorId); // Called by actor - void InitiateReadSysLog(const TActorId &pDiskActor); // Called by actor - void ProcessReadLogResult(const TEvReadLogResult &evReadLogResult, const TActorId &pDiskActor); - - NKikimrProto::EReplyStatus ValidateRequest(TLogWrite *logWrite, TStringStream& outErrorReason); - void PrepareLogError(TLogWrite *logWrite, TStringStream& errorReason, NKikimrProto::EReplyStatus status); - template<typename T> - bool PreprocessRequestImpl(T *req); // const; - NKikimrProto::EReplyStatus CheckOwnerAndRound(TRequestBase* req, TStringStream& err); - bool PreprocessRequest(TRequestBase *request); - void PushRequestToForseti(TRequestBase *request); + void ResetInit(); + bool Initialize(TActorSystem *actorSystem, const TActorId &pDiskActorId); // Called by actor + void InitiateReadSysLog(const TActorId &pDiskActor); // Called by actor + void ProcessReadLogResult(const TEvReadLogResult &evReadLogResult, const TActorId &pDiskActor); + + NKikimrProto::EReplyStatus ValidateRequest(TLogWrite *logWrite, TStringStream& outErrorReason); + void PrepareLogError(TLogWrite *logWrite, TStringStream& errorReason, NKikimrProto::EReplyStatus status); + template<typename T> + bool PreprocessRequestImpl(T *req); // const; + NKikimrProto::EReplyStatus CheckOwnerAndRound(TRequestBase* req, TStringStream& err); + bool PreprocessRequest(TRequestBase *request); + void PushRequestToForseti(TRequestBase *request); void AddJobToForseti(NSchLab::TCbs *cbs, TRequestBase *request, NSchLab::EJobKind jobKind); void RouteRequest(TRequestBase *request); void ProcessPausedQueue(); - void ProcessPendingActivities(); + void ProcessPendingActivities(); void EnqueueAll(); void Update() override; void Wakeup() override; @@ -361,12 +361,12 @@ private: void UpdateMinLogCostNs(); }; -void ParsePayloadFromSectorOffset(const TDiskFormat& format, ui64 firstSector, ui64 lastSector, ui64 currentSector, - ui64 *outPayloadBytes, ui64 *outPayloadOffset); - -bool ParseSectorOffset(const TDiskFormat& format, TActorSystem *actorSystem, ui32 pDiskId, ui64 offset, ui64 size, - ui64 &outSectorIdx, ui64 &outLastSectorIdx, ui64 &outSectorOffset); - +void ParsePayloadFromSectorOffset(const TDiskFormat& format, ui64 firstSector, ui64 lastSector, ui64 currentSector, + ui64 *outPayloadBytes, ui64 *outPayloadOffset); + +bool ParseSectorOffset(const TDiskFormat& format, TActorSystem *actorSystem, ui32 pDiskId, ui64 offset, ui64 size, + ui64 &outSectorIdx, ui64 &outLastSectorIdx, ui64 &outSectorOffset); + } // NPDisk } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_http.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_http.cpp index 4781602639..5565a0c829 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_http.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_http.cpp @@ -1,459 +1,459 @@ -#include "blobstorage_pdisk_impl.h" - +#include "blobstorage_pdisk_impl.h" + #include <ydb/core/blobstorage/base/html.h> - -#include <library/cpp/monlib/service/pages/templates.h> - -namespace NKikimr::NPDisk { - -void TPDisk::RenderState(IOutputStream &str, THttpInfo &httpInfo) { -#define GREEN_TEXT(str, text) THtmlLightSignalRenderer(NKikimrWhiteboard::EFlag::Green, text).Output(str) -#define RED_TEXT(str, text) THtmlLightSignalRenderer(NKikimrWhiteboard::EFlag::Red, text).Output(str) -#define YELLOW_TEXT(str, text) THtmlLightSignalRenderer(NKikimrWhiteboard::EFlag::Yellow, text).Output(str) - HTML(str) { - H4() {str << "Current state";} - TABLE_CLASS ("table") { - TABLEHEAD() { - TABLER() { - TABLEH() {str << "Component";} - TABLEH() {str << "State";} - TABLEH() {str << "Brief state";} - TABLEH() {str << "Detailed state";} - } - } - TABLEBODY() { - TABLER() { - TABLED() {str << "PDisk";} - switch(Mon.PDiskBriefState->Val()) { - case TPDiskMon::TPDisk::OK: - TABLED() {GREEN_TEXT(str, TPDiskMon::TPDisk::StateToStr(Mon.PDiskState->Val()));} - TABLED() {GREEN_TEXT(str, TPDiskMon::TPDisk::BriefStateToStr(Mon.PDiskBriefState->Val()));} - break; - case TPDiskMon::TPDisk::Booting: - TABLED() {YELLOW_TEXT(str, TPDiskMon::TPDisk::StateToStr(Mon.PDiskState->Val()));} - TABLED() {YELLOW_TEXT(str, TPDiskMon::TPDisk::BriefStateToStr(Mon.PDiskBriefState->Val()));} - break; - case TPDiskMon::TPDisk::Error: - TABLED() {RED_TEXT(str, TPDiskMon::TPDisk::StateToStr(Mon.PDiskState->Val()));} - TABLED() {RED_TEXT(str, TPDiskMon::TPDisk::BriefStateToStr(Mon.PDiskBriefState->Val()));} - break; - } - TABLED() {str << TPDiskMon::TPDisk::DetailedStateToStr(Mon.PDiskDetailedState->Val());} - } - TABLER() { - TABLED() {str << "Device";} - TABLED() {str << httpInfo.DeviceFlagStr;} - } - TABLER() { - TABLED() {str << "Realtime";} - TABLED() {str << httpInfo.RealtimeFlagStr;} - } - TABLER() { - TABLED() {str << "Worst VDisk";} - TABLED() { - TOwnerData::EVDiskStatus worstStatus = TOwnerData::VDISK_STATUS_LOGGED; - for (const TOwnerData& data : OwnerData) { - if (data.VDiskId != TVDiskID::InvalidId && data.Status < worstStatus) { - worstStatus = data.Status; - } - } - if (worstStatus < TOwnerData::VDISK_STATUS_LOGGED) { - YELLOW_TEXT(str, TOwnerData::RenderStatus(worstStatus)); - } else { - GREEN_TEXT(str, TOwnerData::RenderStatus(worstStatus)); - } - } - } - TABLER() { - TABLED() {str << "SerialNumber match";} - TABLED() { - if (!Cfg->ExpectedSerial) { - YELLOW_TEXT(str, "Not set"); - } else if (Cfg->ExpectedSerial != DriveData.SerialNumber) { - RED_TEXT(str, "Error"); - } else { - GREEN_TEXT(str, "Ok"); - } - } - } - } - } - H4() {str << "State description"; } - if (Cfg->SectorMap) { - PARA() {str << "Note - this is SectorMap device<br>"; } - } - if (!Cfg->EnableSectorEncryption) { - PARA() {str << "Note - PDisk sector enctyption is disabled<br>"; } - } - PARA() {str << httpInfo.ErrorStr; } - H4() {str << "Uptime"; } - PARA() { - TDuration uptime = TInstant::Now() - CreationTime; - if (uptime.Days() > 0) { - str << Sprintf("%2lu days ", uptime.Days()); - } - str << Sprintf("%02lu:%02lu:%02lu", uptime.Hours() % 24, uptime.Minutes() % 60, uptime.Seconds() % 60); - } - // Restart button - H4() {str << "Restart"; } - DIV() { - str << R"___( - <script> - function reloadPage() { - window.location.replace(window.location.href); - } - - function sendReloadRequest() { - $.ajax({ - url: "", - data: "restartPDisk=", - method: "POST", - success: reloadPage - }); - } - - function sendStopRequest() { - $.ajax({ - url: "", - data: "stopPDisk=", - method: "POST", - success: reloadPage - }); - } - </script> - )___"; - str << "<button onClick='sendReloadRequest()' name='restartPDisk' class='btn btn-default' "; - if (Cfg->SectorMap || Mon.PDiskBriefState->Val() == TPDiskMon::TPDisk::Error) { - str << "style='background:Tomato; margin:5px' "; - } else { - str << "disabled "; - str << "style='background:LightGray; margin:5px' "; - } - str << ">"; - str << "Restart"; - str << "</button>"; - - if (Cfg->SectorMap) { - str << "<button onClick='sendStopRequest()' name='stopPDisk' class='btn btn-default' "; - str << "style='background:Tomato; margin:5px'>"; - str << "Stop"; - str << "</button>"; - } - } - if (Cfg->SectorMap) { - H4() {str << "SectorMap"; } - PRE() {str << Cfg->SectorMap->ToString();} - } - H4() {str << "Config"; } - PRE() {str << Cfg->ToString(true);} - if (Mon.PDiskBriefState->Val() != TPDiskMon::TPDisk::Booting) { - H4() {str << "Drive Data"; } - PRE() {str << DriveData.ToString(true);} - H4() {str << "Fair Scheduler"; } - PRE() {str << httpInfo.FairSchedulerStr;} - H4() {str << "Format info"; } - PRE() {str << Format.ToString(true);} - H4() {str << "Drive model"; } - PRE() {str << DriveModel.ToString(true);} - H4() {str << "Sys log record"; } - PRE() {str << SysLogRecord.ToString(true);} - H4() {str << "Logged NONCEs"; } - PRE() {str << LoggedNonces.ToString(true);} - H4() {str << "Dynamic state"; } - PRE() {str << DynamicStateToString(true);} - H4() {str << "Last Nonce Jump Log Page Header"; } - PRE() {str << LastNonceJumpLogPageHeader2.ToString(true);} - H4() {str << "VDisk statuses"; } - PRE() { - for (const TOwnerData& data : OwnerData) { - if (data.VDiskId != TVDiskID::InvalidId) { - str << "VDiskId# " << data.VDiskId.ToStringWOGeneration() - << " Status# " << data.GetStringStatus() << Endl; - } - } - } - } - } -#undef GREEN_TEXT -#undef RED_TEXT -#undef YELLOW_TEXT -} - -void TPDisk::OutputHtmlOwners(TStringStream &str) { - ui64 chunksOwned[256]; - memset(chunksOwned, 0, sizeof(chunksOwned)); - - - size_t size = ChunkState.size(); - for (size_t idx = 0; idx < size; idx++) { - chunksOwned[ChunkState[idx].OwnerId]++; - } - - HTML(str) { - TABLE_CLASS ("table table-condensed") { - TABLEHEAD() { - TABLER() { - TABLEH() { str << "OwnerId";} - TABLEH() { str << "VDiskId"; } - TABLEH() { str << "ChunksOwned"; } - TABLEH() { str << "CutLogId"; } + +#include <library/cpp/monlib/service/pages/templates.h> + +namespace NKikimr::NPDisk { + +void TPDisk::RenderState(IOutputStream &str, THttpInfo &httpInfo) { +#define GREEN_TEXT(str, text) THtmlLightSignalRenderer(NKikimrWhiteboard::EFlag::Green, text).Output(str) +#define RED_TEXT(str, text) THtmlLightSignalRenderer(NKikimrWhiteboard::EFlag::Red, text).Output(str) +#define YELLOW_TEXT(str, text) THtmlLightSignalRenderer(NKikimrWhiteboard::EFlag::Yellow, text).Output(str) + HTML(str) { + H4() {str << "Current state";} + TABLE_CLASS ("table") { + TABLEHEAD() { + TABLER() { + TABLEH() {str << "Component";} + TABLEH() {str << "State";} + TABLEH() {str << "Brief state";} + TABLEH() {str << "Detailed state";} + } + } + TABLEBODY() { + TABLER() { + TABLED() {str << "PDisk";} + switch(Mon.PDiskBriefState->Val()) { + case TPDiskMon::TPDisk::OK: + TABLED() {GREEN_TEXT(str, TPDiskMon::TPDisk::StateToStr(Mon.PDiskState->Val()));} + TABLED() {GREEN_TEXT(str, TPDiskMon::TPDisk::BriefStateToStr(Mon.PDiskBriefState->Val()));} + break; + case TPDiskMon::TPDisk::Booting: + TABLED() {YELLOW_TEXT(str, TPDiskMon::TPDisk::StateToStr(Mon.PDiskState->Val()));} + TABLED() {YELLOW_TEXT(str, TPDiskMon::TPDisk::BriefStateToStr(Mon.PDiskBriefState->Val()));} + break; + case TPDiskMon::TPDisk::Error: + TABLED() {RED_TEXT(str, TPDiskMon::TPDisk::StateToStr(Mon.PDiskState->Val()));} + TABLED() {RED_TEXT(str, TPDiskMon::TPDisk::BriefStateToStr(Mon.PDiskBriefState->Val()));} + break; + } + TABLED() {str << TPDiskMon::TPDisk::DetailedStateToStr(Mon.PDiskDetailedState->Val());} + } + TABLER() { + TABLED() {str << "Device";} + TABLED() {str << httpInfo.DeviceFlagStr;} + } + TABLER() { + TABLED() {str << "Realtime";} + TABLED() {str << httpInfo.RealtimeFlagStr;} + } + TABLER() { + TABLED() {str << "Worst VDisk";} + TABLED() { + TOwnerData::EVDiskStatus worstStatus = TOwnerData::VDISK_STATUS_LOGGED; + for (const TOwnerData& data : OwnerData) { + if (data.VDiskId != TVDiskID::InvalidId && data.Status < worstStatus) { + worstStatus = data.Status; + } + } + if (worstStatus < TOwnerData::VDISK_STATUS_LOGGED) { + YELLOW_TEXT(str, TOwnerData::RenderStatus(worstStatus)); + } else { + GREEN_TEXT(str, TOwnerData::RenderStatus(worstStatus)); + } + } + } + TABLER() { + TABLED() {str << "SerialNumber match";} + TABLED() { + if (!Cfg->ExpectedSerial) { + YELLOW_TEXT(str, "Not set"); + } else if (Cfg->ExpectedSerial != DriveData.SerialNumber) { + RED_TEXT(str, "Error"); + } else { + GREEN_TEXT(str, "Ok"); + } + } + } + } + } + H4() {str << "State description"; } + if (Cfg->SectorMap) { + PARA() {str << "Note - this is SectorMap device<br>"; } + } + if (!Cfg->EnableSectorEncryption) { + PARA() {str << "Note - PDisk sector enctyption is disabled<br>"; } + } + PARA() {str << httpInfo.ErrorStr; } + H4() {str << "Uptime"; } + PARA() { + TDuration uptime = TInstant::Now() - CreationTime; + if (uptime.Days() > 0) { + str << Sprintf("%2lu days ", uptime.Days()); + } + str << Sprintf("%02lu:%02lu:%02lu", uptime.Hours() % 24, uptime.Minutes() % 60, uptime.Seconds() % 60); + } + // Restart button + H4() {str << "Restart"; } + DIV() { + str << R"___( + <script> + function reloadPage() { + window.location.replace(window.location.href); + } + + function sendReloadRequest() { + $.ajax({ + url: "", + data: "restartPDisk=", + method: "POST", + success: reloadPage + }); + } + + function sendStopRequest() { + $.ajax({ + url: "", + data: "stopPDisk=", + method: "POST", + success: reloadPage + }); + } + </script> + )___"; + str << "<button onClick='sendReloadRequest()' name='restartPDisk' class='btn btn-default' "; + if (Cfg->SectorMap || Mon.PDiskBriefState->Val() == TPDiskMon::TPDisk::Error) { + str << "style='background:Tomato; margin:5px' "; + } else { + str << "disabled "; + str << "style='background:LightGray; margin:5px' "; + } + str << ">"; + str << "Restart"; + str << "</button>"; + + if (Cfg->SectorMap) { + str << "<button onClick='sendStopRequest()' name='stopPDisk' class='btn btn-default' "; + str << "style='background:Tomato; margin:5px'>"; + str << "Stop"; + str << "</button>"; + } + } + if (Cfg->SectorMap) { + H4() {str << "SectorMap"; } + PRE() {str << Cfg->SectorMap->ToString();} + } + H4() {str << "Config"; } + PRE() {str << Cfg->ToString(true);} + if (Mon.PDiskBriefState->Val() != TPDiskMon::TPDisk::Booting) { + H4() {str << "Drive Data"; } + PRE() {str << DriveData.ToString(true);} + H4() {str << "Fair Scheduler"; } + PRE() {str << httpInfo.FairSchedulerStr;} + H4() {str << "Format info"; } + PRE() {str << Format.ToString(true);} + H4() {str << "Drive model"; } + PRE() {str << DriveModel.ToString(true);} + H4() {str << "Sys log record"; } + PRE() {str << SysLogRecord.ToString(true);} + H4() {str << "Logged NONCEs"; } + PRE() {str << LoggedNonces.ToString(true);} + H4() {str << "Dynamic state"; } + PRE() {str << DynamicStateToString(true);} + H4() {str << "Last Nonce Jump Log Page Header"; } + PRE() {str << LastNonceJumpLogPageHeader2.ToString(true);} + H4() {str << "VDisk statuses"; } + PRE() { + for (const TOwnerData& data : OwnerData) { + if (data.VDiskId != TVDiskID::InvalidId) { + str << "VDiskId# " << data.VDiskId.ToStringWOGeneration() + << " Status# " << data.GetStringStatus() << Endl; + } + } + } + } + } +#undef GREEN_TEXT +#undef RED_TEXT +#undef YELLOW_TEXT +} + +void TPDisk::OutputHtmlOwners(TStringStream &str) { + ui64 chunksOwned[256]; + memset(chunksOwned, 0, sizeof(chunksOwned)); + + + size_t size = ChunkState.size(); + for (size_t idx = 0; idx < size; idx++) { + chunksOwned[ChunkState[idx].OwnerId]++; + } + + HTML(str) { + TABLE_CLASS ("table table-condensed") { + TABLEHEAD() { + TABLER() { + TABLEH() { str << "OwnerId";} + TABLEH() { str << "VDiskId"; } + TABLEH() { str << "ChunksOwned"; } + TABLEH() { str << "CutLogId"; } TABLEH() { str << "WhiteboardProxyId"; } - TABLEH() { str << "CurLsnToKeep"; } - TABLEH() { str << "FirstNonceToKeep"; } - TABLEH() { str << "AskedToCutLogAt"; } - TABLEH() { str << "CutLogAt"; } - } - } - TABLEBODY() { - for (ui32 owner = 0; owner < OwnerData.size(); ++owner) { - const TOwnerData &data = OwnerData[owner]; - if (data.VDiskId != TVDiskID::InvalidId) { - TABLER() { - TABLED() { str << (ui32) owner;} - TABLED() { str << data.VDiskId.ToStringWOGeneration(); } - TABLED() { str << chunksOwned[owner]; } - TABLED() { str << data.CutLogId.ToString(); } + TABLEH() { str << "CurLsnToKeep"; } + TABLEH() { str << "FirstNonceToKeep"; } + TABLEH() { str << "AskedToCutLogAt"; } + TABLEH() { str << "CutLogAt"; } + } + } + TABLEBODY() { + for (ui32 owner = 0; owner < OwnerData.size(); ++owner) { + const TOwnerData &data = OwnerData[owner]; + if (data.VDiskId != TVDiskID::InvalidId) { + TABLER() { + TABLED() { str << (ui32) owner;} + TABLED() { str << data.VDiskId.ToStringWOGeneration(); } + TABLED() { str << chunksOwned[owner]; } + TABLED() { str << data.CutLogId.ToString(); } TABLED() { str << data.WhiteboardProxyId; } - TABLED() { str << data.CurrentFirstLsnToKeep; } - TABLED() { str << SysLogFirstNoncesToKeep.FirstNonceToKeep[owner]; } - TABLED() { str << data.AskedToCutLogAt; } - TABLED() { - if (data.CutLogAt < data.AskedToCutLogAt) { - str << "<font color=\"red\">"; - str << data.CutLogAt; - str << "</font>"; - } else { - str << data.CutLogAt; - } - } - } - } - } - } - } - } -} - -void TPDisk::OutputHtmlLogChunksDetails(TStringStream &str) { - HTML(str) { - TABLE_CLASS ("table table-condensed") { - TVector<ui32> activeOwners; - for (auto& [vdiskId, owner] : VDiskOwners) { - activeOwners.push_back(owner); - } - Sort(activeOwners); - TABLEHEAD() { - TABLER() { - TABLEH() {str << "#";} - TABLEH() {str << "ChunkId";} - TABLEH() {str << "IsCommited";} - TABLEH() {str << "Nonces";} - TABLEH() {str << "Users";} - for (ui32 owner : activeOwners) { - const TVDiskID &id = OwnerData[owner].VDiskId; - if (id == TVDiskID::InvalidId) { - TABLEH() {str << "o" << owner << "v--"; } - } else { - TABLEH() {str << "o" << owner << "v" << id.ToStringWOGeneration(); } - } - } - } - } - TABLEBODY() { - ui32 idx = 0; - for (auto it = LogChunks.begin(); it != LogChunks.end(); ++it) { - ++idx; - TABLER() { - TABLED() { str << idx;} - TABLED() { str << it->ChunkIdx; } - TABLED() { str << (ChunkState[it->ChunkIdx].CommitState == TChunkState::DATA_COMMITTED); } - TABLED() { str << "[" << it->FirstNonce << ", " << it->LastNonce << "]"; } - TABLED() { str << it->CurrentUserCount; } - for (ui32 owner : activeOwners) { - if (owner < it->OwnerLsnRange.size()) { - const TLogChunkInfo::TLsnRange &range = it->OwnerLsnRange[owner]; - if (range.IsPresent) { - TABLED() { - str << "[" << range.FirstLsn << ", " << range.LastLsn << "]"; - } - } else { - TABLED() {str << "-";} - } - } else { - TABLED() {str << ".";} - } - } - if (it->CurrentUserCount > activeOwners.size()) { - // There are some owners of chunks that are not present in VDiskOwners - for (ui64 chunkOwner = 0; chunkOwner < it->OwnerLsnRange.size(); ++chunkOwner) { - const TLogChunkInfo::TLsnRange &range = it->OwnerLsnRange[chunkOwner]; - if (range.IsPresent && - !std::count(activeOwners.begin(), activeOwners.end(), chunkOwner)) { - TABLED() { - str << "ERROR! ownerId# " << chunkOwner - << " OwnerLsnRange.size()# " << it->OwnerLsnRange.size() - << " [" << range.FirstLsn << ", " << range.LastLsn << "]"; - } - } - } - } - } - } - } - } - } -} - -void TPDisk::OutputHtmlChunksLockUnlockInfo(TStringStream &str) { - HTML(str) { - str << "<button type='button' class='btn btn-default' data-toggle='collapse' style='margin:5px' \ - data-target='#lockByRangeCollapse'> Lock by range </button>"; - str << "<button type='button' class='btn btn-default' data-toggle='collapse' style='margin:5px' \ - data-target='#lockByCountCollapse'> Lock by count </button>"; - - str << "<div id='lockByRangeCollapse' class='collapse'>"; - str << "<form class='form_horizontal' method='post'>"; - LABEL_CLASS_FOR("control-label", "begin") { str << "Begin "; } - str << "<input id='inputBegin' name='chunksLockBegin' type='text' value='" << 1 << "'/>"; - LABEL_CLASS_FOR("control-label", "end") { str << " End "; } - str << "<input id='inputEnd' name='chunksLockEnd' type='text' value='" << - ChunkState.size() << "'/>"; - str << "<button type='submit' name='chunksLockByRange' class='btn btn-default'\ - style='background:red; margin:5px'>Lock by range</button>"; - str << "</form>"; - str << "</div>"; - - str << "<div id='lockByCountCollapse' class='collapse'>"; - str << "<form class='form_horizontal' method='post'>"; - LABEL_CLASS_FOR("control-label", "begin") { str << "Begin "; } - str << "<input id='inputBegin' name='chunksLockBegin' type='text' value='" << 1 << "'/>"; - LABEL_CLASS_FOR("control-label", "count") { str << " Count "; } - str << "<input id='inputCount' name='chunksLockCount' type='text' value='" << - ChunkState.size() << "'/>"; - str << "<button type='submit' name='chunksLockByCount' class='btn btn-default' \ - style='background:red; margin:5px'>Lock by count</button>"; - str << "</form>"; - str << "</div>"; - - str << "<form method='post'>"; - str << "<button type='submit' name='chunksUnlock' class='btn btn-default' \ - style='background:green; margin:5px'>Unlock All</button>"; - str << "</form>"; - COLLAPSED_BUTTON_CONTENT("chunksStateTable", "Chunks State") { - TABLE_CLASS ("") { - const size_t columns = 50; - TABLEHEAD() { - TABLER() { - TABLEH() {str << "Chunk";} - for (size_t n = 0; n < columns; ++n) { - TABLEH() {str << Sprintf("%02d", (int)n);} - } - } - } - TABLEBODY() { - size_t size = ChunkState.size(); - for (size_t i = 0; i < size; i += columns) { - TABLER() { - TABLED() {str << (ui32)i;} - for (size_t n = 0; n < columns; ++n) { - size_t idx = i + n; - if (idx < size) { - const TChunkState &chunk = ChunkState[idx]; - TABLED() { - if (chunk.OwnerId == (TOwner) OwnerSystem) { - str << "L"; - } else if (chunk.OwnerId == OwnerUnallocated) { - str << "."; - } else if (chunk.OwnerId == OwnerUnallocatedTrimmed) { - str << ","; - } else if (chunk.OwnerId == OwnerLocked) { - str << "X"; - } else { - str << (ui32)chunk.OwnerId; - if (chunk.CommitState != TChunkState::DATA_COMMITTED) { - str << "-"; - } - } - } - } else { - TABLED() {} - } - } - } - } - } - } - } - } -} - -void TPDisk::HttpInfo(THttpInfo &httpInfo) { - TEvHttpInfoResult *reportResult = new TEvHttpInfoResult(httpInfo.EndCustomer); - if (httpInfo.DoGetSchedule) { - TStringStream out; - out << "HTTP/1.1 200 Ok\r\n" - << "Content-Type: text/html\r\n" - << "Access-Control-Allow-Origin: *\r\n" - << "Connection: Close\r\n\r\n"; - TGuard<TMutex> guard(StateMutex); - ForsetiScheduler.OutputLog(out); - reportResult->HttpInfoRes = new NMon::TEvHttpInfoRes(out.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom); - ActorSystem->Send(httpInfo.Sender, reportResult); - } else { - TStringStream str = httpInfo.OutputString; - TGuard<TMutex> guard(StateMutex); - HTML(str) { - DIV_CLASS("row") { - DIV_CLASS("col-md-7") { RenderState(str, httpInfo); } - DIV_CLASS("col-md-5") { - str << "<button type='button' class='btn btn-default' data-toggle='collapse' style='margin:5px'\ - data-target='#countersHtml'> Solomon counters </button>"; - str << "<div id='countersHtml' class='collapse'>"; - Mon.Counters->OutputHtml(str); - str << "</div>"; - } - } - - DIV_CLASS("panel panel-info") { - DIV_CLASS("panel-heading") { - str << "Chunks Keeper"; - } - DIV_CLASS("panel-body") { - Keeper.PrintHTML(str); - } - } - - DIV_CLASS("panel panel-info") { - DIV_CLASS("panel-heading") { - str << "Owners"; - } - DIV_CLASS("panel-body") { - OutputHtmlOwners(str); - } - } // Owners - - DIV_CLASS("panel panel-info") { - DIV_CLASS("panel-heading") { - str << "Log Chunk Details"; - } - DIV_CLASS("panel-body") { - OutputHtmlLogChunksDetails(str); - } - } // Log Chunk Details - - DIV_CLASS("panel panel-info") { - DIV_CLASS("panel-heading") { - str << "Chunks"; - } - DIV_CLASS("panel-body") { - OutputHtmlChunksLockUnlockInfo(str); - } - } // Chunks - - } - reportResult->HttpInfoRes = new NMon::TEvHttpInfoRes(str.Str()); - ActorSystem->Send(httpInfo.Sender, reportResult); - } -} - -} // NKikimr::NPDisk + TABLED() { str << data.CurrentFirstLsnToKeep; } + TABLED() { str << SysLogFirstNoncesToKeep.FirstNonceToKeep[owner]; } + TABLED() { str << data.AskedToCutLogAt; } + TABLED() { + if (data.CutLogAt < data.AskedToCutLogAt) { + str << "<font color=\"red\">"; + str << data.CutLogAt; + str << "</font>"; + } else { + str << data.CutLogAt; + } + } + } + } + } + } + } + } +} + +void TPDisk::OutputHtmlLogChunksDetails(TStringStream &str) { + HTML(str) { + TABLE_CLASS ("table table-condensed") { + TVector<ui32> activeOwners; + for (auto& [vdiskId, owner] : VDiskOwners) { + activeOwners.push_back(owner); + } + Sort(activeOwners); + TABLEHEAD() { + TABLER() { + TABLEH() {str << "#";} + TABLEH() {str << "ChunkId";} + TABLEH() {str << "IsCommited";} + TABLEH() {str << "Nonces";} + TABLEH() {str << "Users";} + for (ui32 owner : activeOwners) { + const TVDiskID &id = OwnerData[owner].VDiskId; + if (id == TVDiskID::InvalidId) { + TABLEH() {str << "o" << owner << "v--"; } + } else { + TABLEH() {str << "o" << owner << "v" << id.ToStringWOGeneration(); } + } + } + } + } + TABLEBODY() { + ui32 idx = 0; + for (auto it = LogChunks.begin(); it != LogChunks.end(); ++it) { + ++idx; + TABLER() { + TABLED() { str << idx;} + TABLED() { str << it->ChunkIdx; } + TABLED() { str << (ChunkState[it->ChunkIdx].CommitState == TChunkState::DATA_COMMITTED); } + TABLED() { str << "[" << it->FirstNonce << ", " << it->LastNonce << "]"; } + TABLED() { str << it->CurrentUserCount; } + for (ui32 owner : activeOwners) { + if (owner < it->OwnerLsnRange.size()) { + const TLogChunkInfo::TLsnRange &range = it->OwnerLsnRange[owner]; + if (range.IsPresent) { + TABLED() { + str << "[" << range.FirstLsn << ", " << range.LastLsn << "]"; + } + } else { + TABLED() {str << "-";} + } + } else { + TABLED() {str << ".";} + } + } + if (it->CurrentUserCount > activeOwners.size()) { + // There are some owners of chunks that are not present in VDiskOwners + for (ui64 chunkOwner = 0; chunkOwner < it->OwnerLsnRange.size(); ++chunkOwner) { + const TLogChunkInfo::TLsnRange &range = it->OwnerLsnRange[chunkOwner]; + if (range.IsPresent && + !std::count(activeOwners.begin(), activeOwners.end(), chunkOwner)) { + TABLED() { + str << "ERROR! ownerId# " << chunkOwner + << " OwnerLsnRange.size()# " << it->OwnerLsnRange.size() + << " [" << range.FirstLsn << ", " << range.LastLsn << "]"; + } + } + } + } + } + } + } + } + } +} + +void TPDisk::OutputHtmlChunksLockUnlockInfo(TStringStream &str) { + HTML(str) { + str << "<button type='button' class='btn btn-default' data-toggle='collapse' style='margin:5px' \ + data-target='#lockByRangeCollapse'> Lock by range </button>"; + str << "<button type='button' class='btn btn-default' data-toggle='collapse' style='margin:5px' \ + data-target='#lockByCountCollapse'> Lock by count </button>"; + + str << "<div id='lockByRangeCollapse' class='collapse'>"; + str << "<form class='form_horizontal' method='post'>"; + LABEL_CLASS_FOR("control-label", "begin") { str << "Begin "; } + str << "<input id='inputBegin' name='chunksLockBegin' type='text' value='" << 1 << "'/>"; + LABEL_CLASS_FOR("control-label", "end") { str << " End "; } + str << "<input id='inputEnd' name='chunksLockEnd' type='text' value='" << + ChunkState.size() << "'/>"; + str << "<button type='submit' name='chunksLockByRange' class='btn btn-default'\ + style='background:red; margin:5px'>Lock by range</button>"; + str << "</form>"; + str << "</div>"; + + str << "<div id='lockByCountCollapse' class='collapse'>"; + str << "<form class='form_horizontal' method='post'>"; + LABEL_CLASS_FOR("control-label", "begin") { str << "Begin "; } + str << "<input id='inputBegin' name='chunksLockBegin' type='text' value='" << 1 << "'/>"; + LABEL_CLASS_FOR("control-label", "count") { str << " Count "; } + str << "<input id='inputCount' name='chunksLockCount' type='text' value='" << + ChunkState.size() << "'/>"; + str << "<button type='submit' name='chunksLockByCount' class='btn btn-default' \ + style='background:red; margin:5px'>Lock by count</button>"; + str << "</form>"; + str << "</div>"; + + str << "<form method='post'>"; + str << "<button type='submit' name='chunksUnlock' class='btn btn-default' \ + style='background:green; margin:5px'>Unlock All</button>"; + str << "</form>"; + COLLAPSED_BUTTON_CONTENT("chunksStateTable", "Chunks State") { + TABLE_CLASS ("") { + const size_t columns = 50; + TABLEHEAD() { + TABLER() { + TABLEH() {str << "Chunk";} + for (size_t n = 0; n < columns; ++n) { + TABLEH() {str << Sprintf("%02d", (int)n);} + } + } + } + TABLEBODY() { + size_t size = ChunkState.size(); + for (size_t i = 0; i < size; i += columns) { + TABLER() { + TABLED() {str << (ui32)i;} + for (size_t n = 0; n < columns; ++n) { + size_t idx = i + n; + if (idx < size) { + const TChunkState &chunk = ChunkState[idx]; + TABLED() { + if (chunk.OwnerId == (TOwner) OwnerSystem) { + str << "L"; + } else if (chunk.OwnerId == OwnerUnallocated) { + str << "."; + } else if (chunk.OwnerId == OwnerUnallocatedTrimmed) { + str << ","; + } else if (chunk.OwnerId == OwnerLocked) { + str << "X"; + } else { + str << (ui32)chunk.OwnerId; + if (chunk.CommitState != TChunkState::DATA_COMMITTED) { + str << "-"; + } + } + } + } else { + TABLED() {} + } + } + } + } + } + } + } + } +} + +void TPDisk::HttpInfo(THttpInfo &httpInfo) { + TEvHttpInfoResult *reportResult = new TEvHttpInfoResult(httpInfo.EndCustomer); + if (httpInfo.DoGetSchedule) { + TStringStream out; + out << "HTTP/1.1 200 Ok\r\n" + << "Content-Type: text/html\r\n" + << "Access-Control-Allow-Origin: *\r\n" + << "Connection: Close\r\n\r\n"; + TGuard<TMutex> guard(StateMutex); + ForsetiScheduler.OutputLog(out); + reportResult->HttpInfoRes = new NMon::TEvHttpInfoRes(out.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom); + ActorSystem->Send(httpInfo.Sender, reportResult); + } else { + TStringStream str = httpInfo.OutputString; + TGuard<TMutex> guard(StateMutex); + HTML(str) { + DIV_CLASS("row") { + DIV_CLASS("col-md-7") { RenderState(str, httpInfo); } + DIV_CLASS("col-md-5") { + str << "<button type='button' class='btn btn-default' data-toggle='collapse' style='margin:5px'\ + data-target='#countersHtml'> Solomon counters </button>"; + str << "<div id='countersHtml' class='collapse'>"; + Mon.Counters->OutputHtml(str); + str << "</div>"; + } + } + + DIV_CLASS("panel panel-info") { + DIV_CLASS("panel-heading") { + str << "Chunks Keeper"; + } + DIV_CLASS("panel-body") { + Keeper.PrintHTML(str); + } + } + + DIV_CLASS("panel panel-info") { + DIV_CLASS("panel-heading") { + str << "Owners"; + } + DIV_CLASS("panel-body") { + OutputHtmlOwners(str); + } + } // Owners + + DIV_CLASS("panel panel-info") { + DIV_CLASS("panel-heading") { + str << "Log Chunk Details"; + } + DIV_CLASS("panel-body") { + OutputHtmlLogChunksDetails(str); + } + } // Log Chunk Details + + DIV_CLASS("panel panel-info") { + DIV_CLASS("panel-heading") { + str << "Chunks"; + } + DIV_CLASS("panel-body") { + OutputHtmlChunksLockUnlockInfo(str); + } + } // Chunks + + } + reportResult->HttpInfoRes = new NMon::TEvHttpInfoRes(str.Str()); + ActorSystem->Send(httpInfo.Sender, reportResult); + } +} + +} // NKikimr::NPDisk diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp index 31aa101c92..c9e09cc3a4 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp @@ -1,1307 +1,1307 @@ -#include "blobstorage_pdisk_impl.h" - -#include "blobstorage_pdisk_completion_impl.h" -#include "blobstorage_pdisk_logreader.h" -#include "blobstorage_pdisk_syslogreader.h" - -#include <util/random/entropy.h> -#include <util/random/mersenne64.h> - -namespace NKikimr::NPDisk { - -void TPDisk::InitSysLogger() { - ui64 writeSectorIdx = (ui64) ((InitialSysLogWritePosition + Format.SectorSize - 1) / Format.SectorSize); - ui64 beginSectorIdx = (ui64)((FormatSectorSize * ReplicationFactor + Format.SectorSize - 1) / - Format.SectorSize); - ui64 endSectorIdx = beginSectorIdx + Format.SysLogSectorCount * ReplicationFactor; - SysLogger.Reset(new TSysLogWriter(Mon, *BlockDevice.Get(), Format, - SysLogRecord.Nonces.Value[NonceSysLog], Format.SysLogKey, BufferPool.Get(), - beginSectorIdx, endSectorIdx, Format.MagicSysLogChunk, 0, nullptr, writeSectorIdx, nullptr, ActorSystem, PDiskId, - &DriveModel, Cfg->UseT1ha0HashInFooter, Cfg->EnableSectorEncryption)); -} - -bool TPDisk::InitCommonLogger() { - TGuard<TMutex> guard(StateMutex); - const ui32 chunkIdx = InitialLogPosition.ChunkIdx; - ui64 sectorIdx = (InitialLogPosition.OffsetInChunk + Format.SectorSize - 1) / Format.SectorSize; - - TLogChunkInfo *info = &*std::find_if(LogChunks.begin(), LogChunks.end(), [=](const TLogChunkInfo& i) { - return i.ChunkIdx == chunkIdx; - }); - - if (sectorIdx >= UsableSectorsPerLogChunk() && InitialTailBuffer) { - InitialTailBuffer->Release(ActorSystem); - InitialTailBuffer = nullptr; - } - CommonLogger.Reset(new TLogWriter(Mon, *BlockDevice.Get(), Format, - SysLogRecord.Nonces.Value[NonceLog], Format.LogKey, BufferPool.Get(), 0, UsableSectorsPerLogChunk(), - Format.MagicLogChunk, chunkIdx, info, std::min(sectorIdx, UsableSectorsPerLogChunk()), - InitialTailBuffer, ActorSystem, PDiskId, &DriveModel, Cfg->UseT1ha0HashInFooter, Cfg->EnableSectorEncryption)); - InitialTailBuffer = nullptr; - if (sectorIdx >= UsableSectorsPerLogChunk()) { - if (!AllocateLogChunks(1, 0, OwnerSystem, 0, EOwnerGroupType::Static, true)) { - return false; - } - CommonLogger->SwitchToNewChunk(TReqId(TReqId::InitCommonLoggerSwitchToNewChunk, 0), nullptr); - - // Log chunk can be collected as soon as noone needs it - ChunkState[chunkIdx].CommitState = TChunkState::DATA_COMMITTED; - } - bool isOk = LogNonceJump(InitialPreviousNonce); - return isOk; -} - -void TPDisk::InitLogChunksInfo() { - TGuard<TMutex> guard(StateMutex); - for (auto it = LogChunks.begin(); it != LogChunks.end(); ++it) { - for (ui32 owner = 0; owner < it->OwnerLsnRange.size(); ++owner) { - if (OwnerData[owner].VDiskId != TVDiskID::InvalidId) { - bool keep = true; - if (SysLogFirstNoncesToKeep.FirstNonceToKeep[owner] > it->LastNonce) { - keep = false; - } else if (it->OwnerLsnRange.size() > owner) { - if (OwnerData[owner].CurrentFirstLsnToKeep > it->OwnerLsnRange[owner].LastLsn) { - keep = false; - } - } - if (!keep && it->OwnerLsnRange.size() > owner && it->OwnerLsnRange[owner].IsPresent) { - TLogChunkInfo::TLsnRange &range = it->OwnerLsnRange[owner]; - range.IsPresent = false; - Y_VERIFY(it->CurrentUserCount > 0); - it->CurrentUserCount--; - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " InitLogChunksInfo chunkIdx# " << it->ChunkIdx - << " Lsn range [" << range.FirstLsn << ", " << range.LastLsn << "]" - << " present nonces[" << it->FirstNonce << ", " << it->LastNonce << "]" - << " dereferenced by ownerId# " << ui32(owner) - << " CurrentFirstLsnToKeep# " << OwnerData[owner].CurrentFirstLsnToKeep - << " CurrentUserCount# " << it->CurrentUserCount); - } - } - } - } - - for (auto info : LogChunks) { - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " InitLogChunksInfo state. LogChunk# " << info); - } - - PrintLogChunksInfo("startup"); -} - -void TPDisk::PrintLogChunksInfo(const TString& msg) { - auto debugPrint = [&] () { - TStringStream str; - str << "PDiskId# " << PDiskId << " PrintLogChunksInfo " << msg; - str << " ["; - for (auto it = LogChunks.begin(); it != LogChunks.end(); ++it) { - str << "{"; - str << "chunkIdx# " << it->ChunkIdx; - str << " users# " << it->CurrentUserCount; - str << " endOfSplice# " << it->IsEndOfSplice; - - for (ui32 owner = 0; owner < it->OwnerLsnRange.size(); ++owner) { - auto &range = it->OwnerLsnRange[owner]; - if (range.IsPresent) { - str << " {"; - str << "owner# " << owner; - str << " lsn# " << range.FirstLsn << "-" << range.LastLsn; - str << " firstLsnToKeep# " << OwnerData[owner].CurrentFirstLsnToKeep; - str << "},"; - } - } - str << "},"; - } - str << "]"; - return str.Str(); - }; - - LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK_TEST, debugPrint()); -} - -bool TPDisk::LogNonceJump(ui64 previousNonce) { - bool isWhole = CommonLogger->SectorBytesFree >= sizeof(TNonceJumpLogPageHeader2); - Y_VERIFY(isWhole); - - Y_VERIFY(CommonLogger->NextChunks.size() == 0); - if (!PreallocateLogChunks(CommonLogger->SectorBytesFree, OwnerSystem, 0, EOwnerGroupType::Static, true)) { - return false; - } - TVector<ui32> logChunksToCommit; - if (CommonLogger->NextChunks.size()) { - logChunksToCommit.push_back(CommonLogger->ChunkIdx); - } - - TNonceJumpLogPageHeader2 nonceJump(LogPageNonceJump2, previousNonce, LastNonceJumpLogPageHeader2, LogChunks); - - if (ActorSystem) { - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " ChunkIdx# " << CommonLogger->ChunkIdx - << " previousNonce# " << previousNonce - << " Nonce# " << CommonLogger->Nonce - << " LogNonceJump NonceJump# " << nonceJump.ToString(false)); - } - - CommonLogger->RecordBytesLeft = sizeof(TNonceJumpLogPageHeader2); - CommonLogger->Write(&nonceJump, sizeof(TNonceJumpLogPageHeader2), TReqId(TReqId::LogNonceJumpWriteHeader2, 0), {}); - CommonLogger->TerminateLog(TReqId(TReqId::LogNonceJumpTerminateLog, 0), {}); - OnNonceChange(NonceLog, TReqId(TReqId::NonceChangeForNonceJump, 0), {}); - auto write = MakeHolder<TCompletionLogWrite>(this, TVector<TLogWrite*>(), TVector<TLogWrite*>(), - std::move(logChunksToCommit)); +#include "blobstorage_pdisk_impl.h" + +#include "blobstorage_pdisk_completion_impl.h" +#include "blobstorage_pdisk_logreader.h" +#include "blobstorage_pdisk_syslogreader.h" + +#include <util/random/entropy.h> +#include <util/random/mersenne64.h> + +namespace NKikimr::NPDisk { + +void TPDisk::InitSysLogger() { + ui64 writeSectorIdx = (ui64) ((InitialSysLogWritePosition + Format.SectorSize - 1) / Format.SectorSize); + ui64 beginSectorIdx = (ui64)((FormatSectorSize * ReplicationFactor + Format.SectorSize - 1) / + Format.SectorSize); + ui64 endSectorIdx = beginSectorIdx + Format.SysLogSectorCount * ReplicationFactor; + SysLogger.Reset(new TSysLogWriter(Mon, *BlockDevice.Get(), Format, + SysLogRecord.Nonces.Value[NonceSysLog], Format.SysLogKey, BufferPool.Get(), + beginSectorIdx, endSectorIdx, Format.MagicSysLogChunk, 0, nullptr, writeSectorIdx, nullptr, ActorSystem, PDiskId, + &DriveModel, Cfg->UseT1ha0HashInFooter, Cfg->EnableSectorEncryption)); +} + +bool TPDisk::InitCommonLogger() { + TGuard<TMutex> guard(StateMutex); + const ui32 chunkIdx = InitialLogPosition.ChunkIdx; + ui64 sectorIdx = (InitialLogPosition.OffsetInChunk + Format.SectorSize - 1) / Format.SectorSize; + + TLogChunkInfo *info = &*std::find_if(LogChunks.begin(), LogChunks.end(), [=](const TLogChunkInfo& i) { + return i.ChunkIdx == chunkIdx; + }); + + if (sectorIdx >= UsableSectorsPerLogChunk() && InitialTailBuffer) { + InitialTailBuffer->Release(ActorSystem); + InitialTailBuffer = nullptr; + } + CommonLogger.Reset(new TLogWriter(Mon, *BlockDevice.Get(), Format, + SysLogRecord.Nonces.Value[NonceLog], Format.LogKey, BufferPool.Get(), 0, UsableSectorsPerLogChunk(), + Format.MagicLogChunk, chunkIdx, info, std::min(sectorIdx, UsableSectorsPerLogChunk()), + InitialTailBuffer, ActorSystem, PDiskId, &DriveModel, Cfg->UseT1ha0HashInFooter, Cfg->EnableSectorEncryption)); + InitialTailBuffer = nullptr; + if (sectorIdx >= UsableSectorsPerLogChunk()) { + if (!AllocateLogChunks(1, 0, OwnerSystem, 0, EOwnerGroupType::Static, true)) { + return false; + } + CommonLogger->SwitchToNewChunk(TReqId(TReqId::InitCommonLoggerSwitchToNewChunk, 0), nullptr); + + // Log chunk can be collected as soon as noone needs it + ChunkState[chunkIdx].CommitState = TChunkState::DATA_COMMITTED; + } + bool isOk = LogNonceJump(InitialPreviousNonce); + return isOk; +} + +void TPDisk::InitLogChunksInfo() { + TGuard<TMutex> guard(StateMutex); + for (auto it = LogChunks.begin(); it != LogChunks.end(); ++it) { + for (ui32 owner = 0; owner < it->OwnerLsnRange.size(); ++owner) { + if (OwnerData[owner].VDiskId != TVDiskID::InvalidId) { + bool keep = true; + if (SysLogFirstNoncesToKeep.FirstNonceToKeep[owner] > it->LastNonce) { + keep = false; + } else if (it->OwnerLsnRange.size() > owner) { + if (OwnerData[owner].CurrentFirstLsnToKeep > it->OwnerLsnRange[owner].LastLsn) { + keep = false; + } + } + if (!keep && it->OwnerLsnRange.size() > owner && it->OwnerLsnRange[owner].IsPresent) { + TLogChunkInfo::TLsnRange &range = it->OwnerLsnRange[owner]; + range.IsPresent = false; + Y_VERIFY(it->CurrentUserCount > 0); + it->CurrentUserCount--; + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " InitLogChunksInfo chunkIdx# " << it->ChunkIdx + << " Lsn range [" << range.FirstLsn << ", " << range.LastLsn << "]" + << " present nonces[" << it->FirstNonce << ", " << it->LastNonce << "]" + << " dereferenced by ownerId# " << ui32(owner) + << " CurrentFirstLsnToKeep# " << OwnerData[owner].CurrentFirstLsnToKeep + << " CurrentUserCount# " << it->CurrentUserCount); + } + } + } + } + + for (auto info : LogChunks) { + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " InitLogChunksInfo state. LogChunk# " << info); + } + + PrintLogChunksInfo("startup"); +} + +void TPDisk::PrintLogChunksInfo(const TString& msg) { + auto debugPrint = [&] () { + TStringStream str; + str << "PDiskId# " << PDiskId << " PrintLogChunksInfo " << msg; + str << " ["; + for (auto it = LogChunks.begin(); it != LogChunks.end(); ++it) { + str << "{"; + str << "chunkIdx# " << it->ChunkIdx; + str << " users# " << it->CurrentUserCount; + str << " endOfSplice# " << it->IsEndOfSplice; + + for (ui32 owner = 0; owner < it->OwnerLsnRange.size(); ++owner) { + auto &range = it->OwnerLsnRange[owner]; + if (range.IsPresent) { + str << " {"; + str << "owner# " << owner; + str << " lsn# " << range.FirstLsn << "-" << range.LastLsn; + str << " firstLsnToKeep# " << OwnerData[owner].CurrentFirstLsnToKeep; + str << "},"; + } + } + str << "},"; + } + str << "]"; + return str.Str(); + }; + + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK_TEST, debugPrint()); +} + +bool TPDisk::LogNonceJump(ui64 previousNonce) { + bool isWhole = CommonLogger->SectorBytesFree >= sizeof(TNonceJumpLogPageHeader2); + Y_VERIFY(isWhole); + + Y_VERIFY(CommonLogger->NextChunks.size() == 0); + if (!PreallocateLogChunks(CommonLogger->SectorBytesFree, OwnerSystem, 0, EOwnerGroupType::Static, true)) { + return false; + } + TVector<ui32> logChunksToCommit; + if (CommonLogger->NextChunks.size()) { + logChunksToCommit.push_back(CommonLogger->ChunkIdx); + } + + TNonceJumpLogPageHeader2 nonceJump(LogPageNonceJump2, previousNonce, LastNonceJumpLogPageHeader2, LogChunks); + + if (ActorSystem) { + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " ChunkIdx# " << CommonLogger->ChunkIdx + << " previousNonce# " << previousNonce + << " Nonce# " << CommonLogger->Nonce + << " LogNonceJump NonceJump# " << nonceJump.ToString(false)); + } + + CommonLogger->RecordBytesLeft = sizeof(TNonceJumpLogPageHeader2); + CommonLogger->Write(&nonceJump, sizeof(TNonceJumpLogPageHeader2), TReqId(TReqId::LogNonceJumpWriteHeader2, 0), {}); + CommonLogger->TerminateLog(TReqId(TReqId::LogNonceJumpTerminateLog, 0), {}); + OnNonceChange(NonceLog, TReqId(TReqId::NonceChangeForNonceJump, 0), {}); + auto write = MakeHolder<TCompletionLogWrite>(this, TVector<TLogWrite*>(), TVector<TLogWrite*>(), + std::move(logChunksToCommit)); CommonLogger->Flush(TReqId(TReqId::LogNonceJumpFlush, 0), {}, write.Release()); - - return true; -} - -void TPDisk::GetStartingPoints(NPDisk::TOwner owner, TMap<TLogSignature, NPDisk::TLogRecord> &outStartingPoints) { - TGuard<TMutex> guard(StateMutex); - if (OwnerData[owner].VDiskId != TVDiskID::InvalidId) { - outStartingPoints = OwnerData[owner].StartingPoints; - //OwnerData[owner].StartingPoints.clear(); - } else { - outStartingPoints.clear(); - } -} - -void TPDisk::ReadSysLog(const TActorId &pDiskActor) { - TIntrusivePtr<TSysLogReader> sysLogReader(new TSysLogReader(this, ActorSystem, pDiskActor, - TReqId(TReqId::ReadSysLog, 0))); - sysLogReader->Start(); - return; -} - -void TPDisk::ProcessChunk0(const NPDisk::TEvReadLogResult &readLogResult) { - TGuard<TMutex> guard(StateMutex); - ui64 writePosition = 0; - ui64 lastLsn = 0; - TString lastSysLogRecord = ProcessReadSysLogResult(writePosition, lastLsn, readLogResult); - if (lastSysLogRecord.size() == 0) { - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << " lastSysLogRecord.Size()# 0 writePosition# " << writePosition - << " lastLsn# " << lastLsn - << " readLogResult# " << readLogResult.ToString() - << " Marker# BPD47"); - return; - } - ui64 remainingSize = lastSysLogRecord.size(); - if (remainingSize < sizeof(TSysLogRecord)) { - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << " remainingSize# " << remainingSize - << " < sizeof(TSysLogRecord)# " << sizeof(TSysLogRecord) - << " writePosition# " << writePosition - << " lastLsn# " << lastLsn - << " readLogResult# " << readLogResult.ToString() - << " Marker# BPD48"); - return; - } - TSysLogRecord *sysLogRecord = (TSysLogRecord*)(lastSysLogRecord.data()); - - if (sysLogRecord->Version < PDISK_SYS_LOG_RECORD_INCOMPATIBLE_VERSION_1000) { - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << sysLogRecord->ToString().c_str() - << " Marker# BPD49"); - } else { - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << " Incompatible SysLogRecord Version# " << sysLogRecord->Version - << " Marker# BPD50"); - return; - } - - SysLogLsn = lastLsn + 1; - - // Parse VDiskOwners - ui32 ownerCount = sizeof(sysLogRecord->OwnerVDisks) / sizeof(TVDiskID); - Y_VERIFY(ownerCount <= 256); - for (ui32 i = 0; i < ownerCount; ++i) { - TVDiskID &id = sysLogRecord->OwnerVDisks[i]; - id.GroupGeneration = -1; // Clear GroupGeneration in sys log record (for compatibility) - OwnerData[i].VDiskId = id; - OwnerData[i].Status = TOwnerData::VDISK_STATUS_HASNT_COME; - if (id != TVDiskID::InvalidId) { - VDiskOwners[id] = TOwner(i); - AtomicIncrement(TotalOwners); - } - } - SysLogRecord = *sysLogRecord; - SysLogRecord.Version = PDISK_SYS_LOG_RECORD_VERSION_6; - - LOG_NOTICE(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " Read SysLogRecord# %s", - (ui32)PDiskId, SysLogRecord.ToString().data()); - - // Set initial chunk owners - // Use actual format info to set busy chunks mask - ui32 chunkCount = (ui32)(Format.DiskSize / (ui64)Format.ChunkSize); - Y_VERIFY_DEBUG(ChunkState.size() == 0); - ChunkState = TVector<TChunkState>(chunkCount); - for (ui32 i = 0; i < Format.SystemChunkCount; ++i) { - ChunkState[i].OwnerId = OwnerSystem; - } - - DriveModel.SetTotalChunksCount(Format.DiskSizeChunks()); - - // Parse chunk owners - TChunkInfo* chunkOwners = (TChunkInfo*)(sysLogRecord + 1); - - // Make sure it is not out of bounds - remainingSize -= sizeof(TSysLogRecord); - ui64 expectedSize = chunkCount * sizeof(TChunkInfo); - if (remainingSize < expectedSize) { - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << " remainingSize# " << remainingSize - << " < expectedSize# " << expectedSize - << " writePosition# " << writePosition - << " lastLsn# " << lastLsn - << " readLogResult# " << readLogResult.ToString() - << " Marker# BPD51"); - return; - } - - // Checks are passed, so initialize position - InitialSysLogWritePosition = writePosition; - - for (ui32 i = Format.SystemChunkCount; i < chunkCount; ++i) { - TOwner owner = chunkOwners[i].OwnerId; - ChunkState[i].OwnerId = owner; - if (IsOwnerAllocated(owner)) { - if (IsOwnerUser(owner)) { - ChunkState[i].CommitState = TChunkState::DATA_COMMITTED; - Mon.CommitedDataChunks->Inc(); - LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 - " ++CommitedDataChunks# %" PRIi64 " chunkIdx# %" PRIu32 " ownerId# %" PRIu32, - (ui32)PDiskId, (i64)Mon.CommitedDataChunks->Val(), (ui32)i, - (ui32)owner); - } else { - ChunkState[i].CommitState = TChunkState::LOG_COMMITTED; - } - } else { - ChunkState[i].CommitState = TChunkState::FREE; - } - ChunkState[i].Nonce = chunkOwners[i].Nonce; - } - - // TODO: check for log/data chunk intersections while parsing common log, giving priority to syslog as chunks - if (IsOwnerUser(ChunkState[SysLogRecord.LogHeadChunkIdx].OwnerId) && - ChunkState[SysLogRecord.LogHeadChunkIdx].CommitState == TChunkState::DATA_COMMITTED) { - Mon.CommitedDataChunks->Dec(); - LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 - " Line# %" PRIu32 " --CommitedDataChunks# %" PRIi64 " chunkIdx# %" PRIu32 " prev ownerId# %" PRIu32, - (ui32)PDiskId, (ui32)__LINE__, (i64)Mon.CommitedDataChunks->Val(), (ui32)SysLogRecord.LogHeadChunkIdx, - (ui32)ChunkState[SysLogRecord.LogHeadChunkIdx].OwnerId); - } - - // might come and go. But make sure each coming chunk goes away! - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " Line# " << __LINE__ - << " Forcing log head owner to system, chunkIdx# " << SysLogRecord.LogHeadChunkIdx - << " Previous ownerId# " << (ui32)ChunkState[SysLogRecord.LogHeadChunkIdx].OwnerId - << " Marker# BPD52"); - ChunkState[SysLogRecord.LogHeadChunkIdx].OwnerId = OwnerSystem; - ChunkState[SysLogRecord.LogHeadChunkIdx].CommitState = TChunkState::DATA_COMMITTED; - ChunkState[SysLogRecord.LogHeadChunkIdx].PreviousNonce = SysLogRecord.LogHeadChunkPreviousNonce; - LoggedNonces = SysLogRecord.Nonces; - - // Parse first nonce to keep - TSysLogFirstNoncesToKeep *firstNoncesToKeep = nullptr; - if (sysLogRecord->Version == PDISK_SYS_LOG_RECORD_VERSION_2) { - SysLogFirstNoncesToKeep.Clear(); - } else { - firstNoncesToKeep = (TSysLogFirstNoncesToKeep*)(chunkOwners + chunkCount); - // Make sure it is not out of bounds - ui64 noneSize = (ui64)((char*)firstNoncesToKeep - (char*)sysLogRecord); - if (lastSysLogRecord.size() == noneSize) { - LOG_WARN_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << " SysLogRecord size=noneSize" - << " Marker# BPD53"); - SysLogFirstNoncesToKeep.Clear(); - } else { - ui64 minSize = noneSize + sizeof(TSysLogFirstNoncesToKeep); - Y_VERIFY_S(lastSysLogRecord.size() >= minSize, - "SysLogRecord is too small, minSize# " << minSize << " size# " << lastSysLogRecord.size()); - memcpy(&SysLogFirstNoncesToKeep, firstNoncesToKeep, sizeof(TSysLogFirstNoncesToKeep)); - } - } - - TChunkTrimInfo *trimStateEnd = nullptr; - if (sysLogRecord->Version >= PDISK_SYS_LOG_RECORD_VERSION_4) { - Y_VERIFY(firstNoncesToKeep); - ui64 *trimInfoBytesPtr = (ui64*)(firstNoncesToKeep + 1); - ui64 minSize = (ui64)((char*)(trimInfoBytesPtr + 1) - (char*)sysLogRecord); - Y_VERIFY_S(lastSysLogRecord.size() >= minSize, - "SysLogRecord is too small, minSize# " << minSize << " size# " << lastSysLogRecord.size()); - ui64 trimInfoBytes = ReadUnaligned<ui64>(trimInfoBytesPtr); - TChunkTrimInfo *trimState = (TChunkTrimInfo*)(trimInfoBytesPtr + 1); - trimStateEnd = trimState + trimInfoBytes / sizeof(TChunkTrimInfo); - minSize = (ui64)((char*)trimStateEnd - (char*)sysLogRecord); - Y_VERIFY_S(lastSysLogRecord.size() >= minSize, - "SysLogRecord is too small, minSize# " << minSize << " size# " << lastSysLogRecord.size()); - Y_VERIFY_S(trimInfoBytes == 0 || trimInfoBytes == TChunkTrimInfo::SizeForChunkCount(chunkCount), - "SysLogRecord's ChunkTrimInfo has size# " << trimInfoBytes - << " different from expeceted #" << TChunkTrimInfo::SizeForChunkCount(chunkCount)); - for (ui32 i = 0; i < chunkCount; i++) { - if (trimState[i / 8].IsChunkTrimmed(i % 8) && ChunkState[i].OwnerId == OwnerUnallocated) { - ChunkState[i].OwnerId = OwnerUnallocatedTrimmed; - } - } - } - - // Fill with default value to parse log form the start on old versions - FirstLogChunkToParseCommits = SysLogRecord.LogHeadChunkIdx; - - if (sysLogRecord->Version >= PDISK_SYS_LOG_RECORD_VERSION_6) { - Y_VERIFY(trimStateEnd); - ui32 *firstChunk = reinterpret_cast<ui32*>(trimStateEnd); - ui64 minSize = (ui64)((char*)(firstChunk + 1) - (char*)sysLogRecord); - Y_VERIFY_S(lastSysLogRecord.size() >= minSize, - "SysLogRecord is too small, minSize# " << minSize << " size# " << lastSysLogRecord.size()); - FirstLogChunkToParseCommits = ReadUnaligned<ui32>(firstChunk); - } - - PrintChunksDebugInfo(); -} - -void TPDisk::PrintChunksDebugInfo() { - auto print = [&] () { - std::map<TOwner, std::vector<ui32>> ownerToChunks; - - for (ui32 i = 0; i < ChunkState.size(); ++i) { - const auto& state = ChunkState[i]; - ownerToChunks[state.OwnerId].push_back(i); - } - - TStringStream str; - str << "PDiskId# " << PDiskId << " PrintChunksDebugInfo; "; - for (auto& [owner, chunks] : ownerToChunks) { - std::sort(chunks.begin(), chunks.end()); - str << " Owner# " << owner << " ["; - bool first = true; - for (auto idx : chunks) { - str << (std::exchange(first, false) ? "" : " ") << idx; - } - str << "];"; - } - return str.Str(); - }; - - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, print()); -} - -TString TPDisk::ProcessReadSysLogResult(ui64 &outWritePosition, ui64 &outLsn, - const NPDisk::TEvReadLogResult &readLogResult) { - ui64 sectorIdx = (readLogResult.NextPosition.OffsetInChunk + Format.SectorSize - 1) / Format.SectorSize; - ui64 firstSysLogSectorIdx = Format.FirstSysLogSectorIdx(); - ui64 sectorGroup = (sectorIdx - firstSysLogSectorIdx) / ReplicationFactor; - - outWritePosition = (firstSysLogSectorIdx + sectorGroup % Format.SysLogSectorCount * ReplicationFactor) - * Format.SectorSize; - Y_VERIFY(outWritePosition > 0); - - if (!readLogResult.Results.size()) { - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << " ProcessReadSysLogResult Results.size()# 0" - << " Marker# BPD54"); - outLsn = 0; - TString data; - return data; - } - ui64 lastSysLogLsn = readLogResult.Results[0].Lsn; - TString data = readLogResult.Results[0].Data; - for (ui32 i = 1; i < readLogResult.Results.size(); ++i) { - if (lastSysLogLsn < readLogResult.Results[i].Lsn) { - lastSysLogLsn = readLogResult.Results[i].Lsn; - data = readLogResult.Results[i].Data; - } - } - outLsn = lastSysLogLsn; - return data; -} - -void TPDisk::ReadAndParseMainLog(const TActorId &pDiskActor) { - TVector<TChunkIdx> chunksToRead; - TIntrusivePtr<TLogReaderBase> logReader(new TLogReader(true, this, ActorSystem, pDiskActor, 0, TLogPosition{0, 0}, - EOwnerGroupType::Static, TLogPosition{0, 0}, (ui64)-1, SysLogRecord.LogHeadChunkPreviousNonce, 0, 0, - TReqId(TReqId::ReadAndParseMainLog, 0), std::move(chunksToRead), 0, 0, TVDiskID::InvalidId)); - TVector<ui64> badOffsets; - // Emits subrequests TCompletionLogReadPart which contains TIntrusivePtr to logReader - logReader->Exec(0, badOffsets, ActorSystem); -} - -void TPDisk::ProcessLogReadQueue() { - for (auto& req : JointLogReads) { - switch (req->GetType()) { - case ERequestType::RequestLogRead: - { - TLogRead &logRead = *static_cast<TLogRead*>(req); - auto& ownerData = OwnerData[logRead.Owner]; - ownerData.Status = TOwnerData::VDISK_STATUS_READING_LOG; - TLogPosition logStartPosition{0, 0}; - if (logRead.Owner < OwnerData.size() && ownerData.VDiskId != TVDiskID::InvalidId) { - logStartPosition = ownerData.LogStartPosition; - } - TVector<TChunkIdx> chunksToRead; - for (auto it = LogChunks.begin(); it != LogChunks.end(); ++it) { - if (it->OwnerLsnRange.size() > logRead.Owner && it->OwnerLsnRange[logRead.Owner].IsPresent) { - chunksToRead.push_back(it->ChunkIdx); - } - } - ui64 firstLsnToKeep = 0; - ui64 firstNonceToKeep = 0; - if (ownerData.VDiskId != TVDiskID::InvalidId) { - firstLsnToKeep = ownerData.CurrentFirstLsnToKeep; - firstNonceToKeep = SysLogFirstNoncesToKeep.FirstNonceToKeep[logRead.Owner]; - LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " PrepareToRead ownerId# %" PRIu32 - " FirstLsnToKeep: %" PRIu64 " FirstNonceToKeep: %" PRIu64, - (ui32)PDiskId, (ui32)logRead.Owner, (ui64)firstLsnToKeep, (ui64)firstNonceToKeep); - } - ui32 endLogChunkIdx = CommonLogger->ChunkIdx; - ui64 endLogSectorIdx = CommonLogger->SectorIdx; - ownerData.LogReader = new TLogReader(false, - this, ActorSystem, logRead.Sender, logRead.Owner, logStartPosition, - logRead.OwnerGroupType,logRead.Position, - logRead.SizeLimit, 0, endLogChunkIdx, endLogSectorIdx, logRead.ReqId, - std::move(chunksToRead), firstLsnToKeep, firstNonceToKeep, - ownerData.VDiskId); - TVector<ui64> badOffsets; - ownerData.LogReader->Exec(0, badOffsets, ActorSystem); - break; - } - case ERequestType::RequestLogReadContinue: - { - TLogReadContinue *read = static_cast<TLogReadContinue*>(req); - read->CompletionAction->CostNs = DriveModel.TimeForSizeNs(read->Size, read->Offset / Format.ChunkSize, - TDriveModel::OP_TYPE_READ); - BlockDevice->PreadAsync(read->Data, read->Size, read->Offset, read->CompletionAction, - read->ReqId, &read->TraceId); // ??? TraceId - break; - } - case ERequestType::RequestLogSectorRestore: - { - TLogSectorRestore *restore = static_cast<TLogSectorRestore*>(req); - BlockDevice->PwriteAsync(restore->Data, restore->Size, restore->Offset, restore->CompletionAction, - restore->ReqId, {}); - break; - } - case ERequestType::RequestLogReadResultProcess: - { - TLogReadResultProcess *result = static_cast<TLogReadResultProcess*>(req); - ProcessReadLogResult(*result->ReadLogResult->Get(), result->Sender); - break; - } - default: - Y_FAIL(); - break; - } - delete req; - } - JointLogReads.clear(); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// SysLog writing -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void TPDisk::WriteSysLogRestorePoint(TCompletionAction *action, TReqId reqId, NWilson::TTraceId *traceId) { - TGuard<TMutex> guard(StateMutex); - LoggedNonces = SysLogRecord.Nonces; - ui32 chunkCount = (ui32)(Format.DiskSize / (ui64)Format.ChunkSize); - ui32 chunkOwnersSize = ui32(sizeof(TChunkInfo)) * chunkCount; - // Must be ui64 - ui64 chunkIsTrimmedSize = TChunkTrimInfo::SizeForChunkCount(chunkCount); - TVector<TChunkInfo> chunkOwners(chunkCount); - TVector<TChunkTrimInfo> chunkIsTrimmed(TChunkTrimInfo::RecordsForChunkCount(chunkCount), TChunkTrimInfo(0)); - for (ui32 i = 0; i < chunkCount; ++i) { - if (ChunkState.size() > i - && (ChunkState[i].CommitState == TChunkState::DATA_COMMITTED - || ChunkState[i].CommitState == TChunkState::DATA_COMMITTED_DELETE_IN_PROGRESS - || ChunkState[i].CommitState == TChunkState::DATA_COMMITTED_ON_QUARANTINE) - && IsOwnerUser(ChunkState[i].OwnerId)) { - chunkOwners[i].OwnerId = ChunkState[i].OwnerId; - chunkOwners[i].Nonce = ChunkState[i].Nonce; - } else { - if (ChunkState.size() > i && ChunkState[i].OwnerId == OwnerUnallocatedTrimmed) { - chunkIsTrimmed[i / 8].SetChunkTrimmed(i % 8); - } - // Write OwnerUnallocated for forward compatibility - chunkOwners[i].OwnerId = OwnerUnallocated; - chunkOwners[i].Nonce = 0; - } - } - - if (CommonLogger) { - std::optional<TChunkIdx> firstChunk; - for (auto rit = LogChunks.crbegin(); rit != LogChunks.crend(); ++rit) { - if (rit->IsEndOfSplice) { - firstChunk = rit->ChunkIdx; - break; - } - } - if (!firstChunk && !LogChunks.empty()) { - firstChunk = LogChunks.front().ChunkIdx; - } - FirstLogChunkToParseCommits = firstChunk.value_or(SysLogRecord.LogHeadChunkIdx); - } - - ui32 recordSize = sizeof(TSysLogRecord) + chunkOwnersSize + sizeof(TSysLogFirstNoncesToKeep) - + sizeof(ui64) + chunkIsTrimmedSize + sizeof(ui32); - ui64 beginSectorIdx = SysLogger->SectorIdx; - *Mon.BandwidthPSysLogPayload += recordSize; - *Mon.BandwidthPSysLogRecordHeader += sizeof(TFirstLogPageHeader); - - SysLogger->LogHeader(0, 0, SysLogLsn, recordSize, reqId, traceId); - SysLogger->LogDataPart(&SysLogRecord, sizeof(TSysLogRecord), reqId, traceId); - SysLogger->LogDataPart(&chunkOwners[0], chunkOwnersSize, reqId, traceId); - SysLogger->LogDataPart(&SysLogFirstNoncesToKeep, sizeof(TSysLogFirstNoncesToKeep), reqId, traceId); - SysLogger->LogDataPart(&chunkIsTrimmedSize, sizeof(chunkIsTrimmedSize), reqId, traceId); - SysLogger->LogDataPart(&chunkIsTrimmed[0], chunkIsTrimmedSize, reqId, traceId); - SysLogger->LogDataPart(&FirstLogChunkToParseCommits, sizeof(FirstLogChunkToParseCommits), reqId, traceId); - SysLogger->TerminateLog(reqId, traceId); + + return true; +} + +void TPDisk::GetStartingPoints(NPDisk::TOwner owner, TMap<TLogSignature, NPDisk::TLogRecord> &outStartingPoints) { + TGuard<TMutex> guard(StateMutex); + if (OwnerData[owner].VDiskId != TVDiskID::InvalidId) { + outStartingPoints = OwnerData[owner].StartingPoints; + //OwnerData[owner].StartingPoints.clear(); + } else { + outStartingPoints.clear(); + } +} + +void TPDisk::ReadSysLog(const TActorId &pDiskActor) { + TIntrusivePtr<TSysLogReader> sysLogReader(new TSysLogReader(this, ActorSystem, pDiskActor, + TReqId(TReqId::ReadSysLog, 0))); + sysLogReader->Start(); + return; +} + +void TPDisk::ProcessChunk0(const NPDisk::TEvReadLogResult &readLogResult) { + TGuard<TMutex> guard(StateMutex); + ui64 writePosition = 0; + ui64 lastLsn = 0; + TString lastSysLogRecord = ProcessReadSysLogResult(writePosition, lastLsn, readLogResult); + if (lastSysLogRecord.size() == 0) { + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << " lastSysLogRecord.Size()# 0 writePosition# " << writePosition + << " lastLsn# " << lastLsn + << " readLogResult# " << readLogResult.ToString() + << " Marker# BPD47"); + return; + } + ui64 remainingSize = lastSysLogRecord.size(); + if (remainingSize < sizeof(TSysLogRecord)) { + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << " remainingSize# " << remainingSize + << " < sizeof(TSysLogRecord)# " << sizeof(TSysLogRecord) + << " writePosition# " << writePosition + << " lastLsn# " << lastLsn + << " readLogResult# " << readLogResult.ToString() + << " Marker# BPD48"); + return; + } + TSysLogRecord *sysLogRecord = (TSysLogRecord*)(lastSysLogRecord.data()); + + if (sysLogRecord->Version < PDISK_SYS_LOG_RECORD_INCOMPATIBLE_VERSION_1000) { + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << sysLogRecord->ToString().c_str() + << " Marker# BPD49"); + } else { + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << " Incompatible SysLogRecord Version# " << sysLogRecord->Version + << " Marker# BPD50"); + return; + } + + SysLogLsn = lastLsn + 1; + + // Parse VDiskOwners + ui32 ownerCount = sizeof(sysLogRecord->OwnerVDisks) / sizeof(TVDiskID); + Y_VERIFY(ownerCount <= 256); + for (ui32 i = 0; i < ownerCount; ++i) { + TVDiskID &id = sysLogRecord->OwnerVDisks[i]; + id.GroupGeneration = -1; // Clear GroupGeneration in sys log record (for compatibility) + OwnerData[i].VDiskId = id; + OwnerData[i].Status = TOwnerData::VDISK_STATUS_HASNT_COME; + if (id != TVDiskID::InvalidId) { + VDiskOwners[id] = TOwner(i); + AtomicIncrement(TotalOwners); + } + } + SysLogRecord = *sysLogRecord; + SysLogRecord.Version = PDISK_SYS_LOG_RECORD_VERSION_6; + + LOG_NOTICE(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " Read SysLogRecord# %s", + (ui32)PDiskId, SysLogRecord.ToString().data()); + + // Set initial chunk owners + // Use actual format info to set busy chunks mask + ui32 chunkCount = (ui32)(Format.DiskSize / (ui64)Format.ChunkSize); + Y_VERIFY_DEBUG(ChunkState.size() == 0); + ChunkState = TVector<TChunkState>(chunkCount); + for (ui32 i = 0; i < Format.SystemChunkCount; ++i) { + ChunkState[i].OwnerId = OwnerSystem; + } + + DriveModel.SetTotalChunksCount(Format.DiskSizeChunks()); + + // Parse chunk owners + TChunkInfo* chunkOwners = (TChunkInfo*)(sysLogRecord + 1); + + // Make sure it is not out of bounds + remainingSize -= sizeof(TSysLogRecord); + ui64 expectedSize = chunkCount * sizeof(TChunkInfo); + if (remainingSize < expectedSize) { + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << " remainingSize# " << remainingSize + << " < expectedSize# " << expectedSize + << " writePosition# " << writePosition + << " lastLsn# " << lastLsn + << " readLogResult# " << readLogResult.ToString() + << " Marker# BPD51"); + return; + } + + // Checks are passed, so initialize position + InitialSysLogWritePosition = writePosition; + + for (ui32 i = Format.SystemChunkCount; i < chunkCount; ++i) { + TOwner owner = chunkOwners[i].OwnerId; + ChunkState[i].OwnerId = owner; + if (IsOwnerAllocated(owner)) { + if (IsOwnerUser(owner)) { + ChunkState[i].CommitState = TChunkState::DATA_COMMITTED; + Mon.CommitedDataChunks->Inc(); + LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 + " ++CommitedDataChunks# %" PRIi64 " chunkIdx# %" PRIu32 " ownerId# %" PRIu32, + (ui32)PDiskId, (i64)Mon.CommitedDataChunks->Val(), (ui32)i, + (ui32)owner); + } else { + ChunkState[i].CommitState = TChunkState::LOG_COMMITTED; + } + } else { + ChunkState[i].CommitState = TChunkState::FREE; + } + ChunkState[i].Nonce = chunkOwners[i].Nonce; + } + + // TODO: check for log/data chunk intersections while parsing common log, giving priority to syslog as chunks + if (IsOwnerUser(ChunkState[SysLogRecord.LogHeadChunkIdx].OwnerId) && + ChunkState[SysLogRecord.LogHeadChunkIdx].CommitState == TChunkState::DATA_COMMITTED) { + Mon.CommitedDataChunks->Dec(); + LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 + " Line# %" PRIu32 " --CommitedDataChunks# %" PRIi64 " chunkIdx# %" PRIu32 " prev ownerId# %" PRIu32, + (ui32)PDiskId, (ui32)__LINE__, (i64)Mon.CommitedDataChunks->Val(), (ui32)SysLogRecord.LogHeadChunkIdx, + (ui32)ChunkState[SysLogRecord.LogHeadChunkIdx].OwnerId); + } + + // might come and go. But make sure each coming chunk goes away! + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " Line# " << __LINE__ + << " Forcing log head owner to system, chunkIdx# " << SysLogRecord.LogHeadChunkIdx + << " Previous ownerId# " << (ui32)ChunkState[SysLogRecord.LogHeadChunkIdx].OwnerId + << " Marker# BPD52"); + ChunkState[SysLogRecord.LogHeadChunkIdx].OwnerId = OwnerSystem; + ChunkState[SysLogRecord.LogHeadChunkIdx].CommitState = TChunkState::DATA_COMMITTED; + ChunkState[SysLogRecord.LogHeadChunkIdx].PreviousNonce = SysLogRecord.LogHeadChunkPreviousNonce; + LoggedNonces = SysLogRecord.Nonces; + + // Parse first nonce to keep + TSysLogFirstNoncesToKeep *firstNoncesToKeep = nullptr; + if (sysLogRecord->Version == PDISK_SYS_LOG_RECORD_VERSION_2) { + SysLogFirstNoncesToKeep.Clear(); + } else { + firstNoncesToKeep = (TSysLogFirstNoncesToKeep*)(chunkOwners + chunkCount); + // Make sure it is not out of bounds + ui64 noneSize = (ui64)((char*)firstNoncesToKeep - (char*)sysLogRecord); + if (lastSysLogRecord.size() == noneSize) { + LOG_WARN_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << " SysLogRecord size=noneSize" + << " Marker# BPD53"); + SysLogFirstNoncesToKeep.Clear(); + } else { + ui64 minSize = noneSize + sizeof(TSysLogFirstNoncesToKeep); + Y_VERIFY_S(lastSysLogRecord.size() >= minSize, + "SysLogRecord is too small, minSize# " << minSize << " size# " << lastSysLogRecord.size()); + memcpy(&SysLogFirstNoncesToKeep, firstNoncesToKeep, sizeof(TSysLogFirstNoncesToKeep)); + } + } + + TChunkTrimInfo *trimStateEnd = nullptr; + if (sysLogRecord->Version >= PDISK_SYS_LOG_RECORD_VERSION_4) { + Y_VERIFY(firstNoncesToKeep); + ui64 *trimInfoBytesPtr = (ui64*)(firstNoncesToKeep + 1); + ui64 minSize = (ui64)((char*)(trimInfoBytesPtr + 1) - (char*)sysLogRecord); + Y_VERIFY_S(lastSysLogRecord.size() >= minSize, + "SysLogRecord is too small, minSize# " << minSize << " size# " << lastSysLogRecord.size()); + ui64 trimInfoBytes = ReadUnaligned<ui64>(trimInfoBytesPtr); + TChunkTrimInfo *trimState = (TChunkTrimInfo*)(trimInfoBytesPtr + 1); + trimStateEnd = trimState + trimInfoBytes / sizeof(TChunkTrimInfo); + minSize = (ui64)((char*)trimStateEnd - (char*)sysLogRecord); + Y_VERIFY_S(lastSysLogRecord.size() >= minSize, + "SysLogRecord is too small, minSize# " << minSize << " size# " << lastSysLogRecord.size()); + Y_VERIFY_S(trimInfoBytes == 0 || trimInfoBytes == TChunkTrimInfo::SizeForChunkCount(chunkCount), + "SysLogRecord's ChunkTrimInfo has size# " << trimInfoBytes + << " different from expeceted #" << TChunkTrimInfo::SizeForChunkCount(chunkCount)); + for (ui32 i = 0; i < chunkCount; i++) { + if (trimState[i / 8].IsChunkTrimmed(i % 8) && ChunkState[i].OwnerId == OwnerUnallocated) { + ChunkState[i].OwnerId = OwnerUnallocatedTrimmed; + } + } + } + + // Fill with default value to parse log form the start on old versions + FirstLogChunkToParseCommits = SysLogRecord.LogHeadChunkIdx; + + if (sysLogRecord->Version >= PDISK_SYS_LOG_RECORD_VERSION_6) { + Y_VERIFY(trimStateEnd); + ui32 *firstChunk = reinterpret_cast<ui32*>(trimStateEnd); + ui64 minSize = (ui64)((char*)(firstChunk + 1) - (char*)sysLogRecord); + Y_VERIFY_S(lastSysLogRecord.size() >= minSize, + "SysLogRecord is too small, minSize# " << minSize << " size# " << lastSysLogRecord.size()); + FirstLogChunkToParseCommits = ReadUnaligned<ui32>(firstChunk); + } + + PrintChunksDebugInfo(); +} + +void TPDisk::PrintChunksDebugInfo() { + auto print = [&] () { + std::map<TOwner, std::vector<ui32>> ownerToChunks; + + for (ui32 i = 0; i < ChunkState.size(); ++i) { + const auto& state = ChunkState[i]; + ownerToChunks[state.OwnerId].push_back(i); + } + + TStringStream str; + str << "PDiskId# " << PDiskId << " PrintChunksDebugInfo; "; + for (auto& [owner, chunks] : ownerToChunks) { + std::sort(chunks.begin(), chunks.end()); + str << " Owner# " << owner << " ["; + bool first = true; + for (auto idx : chunks) { + str << (std::exchange(first, false) ? "" : " ") << idx; + } + str << "];"; + } + return str.Str(); + }; + + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, print()); +} + +TString TPDisk::ProcessReadSysLogResult(ui64 &outWritePosition, ui64 &outLsn, + const NPDisk::TEvReadLogResult &readLogResult) { + ui64 sectorIdx = (readLogResult.NextPosition.OffsetInChunk + Format.SectorSize - 1) / Format.SectorSize; + ui64 firstSysLogSectorIdx = Format.FirstSysLogSectorIdx(); + ui64 sectorGroup = (sectorIdx - firstSysLogSectorIdx) / ReplicationFactor; + + outWritePosition = (firstSysLogSectorIdx + sectorGroup % Format.SysLogSectorCount * ReplicationFactor) + * Format.SectorSize; + Y_VERIFY(outWritePosition > 0); + + if (!readLogResult.Results.size()) { + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << " ProcessReadSysLogResult Results.size()# 0" + << " Marker# BPD54"); + outLsn = 0; + TString data; + return data; + } + ui64 lastSysLogLsn = readLogResult.Results[0].Lsn; + TString data = readLogResult.Results[0].Data; + for (ui32 i = 1; i < readLogResult.Results.size(); ++i) { + if (lastSysLogLsn < readLogResult.Results[i].Lsn) { + lastSysLogLsn = readLogResult.Results[i].Lsn; + data = readLogResult.Results[i].Data; + } + } + outLsn = lastSysLogLsn; + return data; +} + +void TPDisk::ReadAndParseMainLog(const TActorId &pDiskActor) { + TVector<TChunkIdx> chunksToRead; + TIntrusivePtr<TLogReaderBase> logReader(new TLogReader(true, this, ActorSystem, pDiskActor, 0, TLogPosition{0, 0}, + EOwnerGroupType::Static, TLogPosition{0, 0}, (ui64)-1, SysLogRecord.LogHeadChunkPreviousNonce, 0, 0, + TReqId(TReqId::ReadAndParseMainLog, 0), std::move(chunksToRead), 0, 0, TVDiskID::InvalidId)); + TVector<ui64> badOffsets; + // Emits subrequests TCompletionLogReadPart which contains TIntrusivePtr to logReader + logReader->Exec(0, badOffsets, ActorSystem); +} + +void TPDisk::ProcessLogReadQueue() { + for (auto& req : JointLogReads) { + switch (req->GetType()) { + case ERequestType::RequestLogRead: + { + TLogRead &logRead = *static_cast<TLogRead*>(req); + auto& ownerData = OwnerData[logRead.Owner]; + ownerData.Status = TOwnerData::VDISK_STATUS_READING_LOG; + TLogPosition logStartPosition{0, 0}; + if (logRead.Owner < OwnerData.size() && ownerData.VDiskId != TVDiskID::InvalidId) { + logStartPosition = ownerData.LogStartPosition; + } + TVector<TChunkIdx> chunksToRead; + for (auto it = LogChunks.begin(); it != LogChunks.end(); ++it) { + if (it->OwnerLsnRange.size() > logRead.Owner && it->OwnerLsnRange[logRead.Owner].IsPresent) { + chunksToRead.push_back(it->ChunkIdx); + } + } + ui64 firstLsnToKeep = 0; + ui64 firstNonceToKeep = 0; + if (ownerData.VDiskId != TVDiskID::InvalidId) { + firstLsnToKeep = ownerData.CurrentFirstLsnToKeep; + firstNonceToKeep = SysLogFirstNoncesToKeep.FirstNonceToKeep[logRead.Owner]; + LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " PrepareToRead ownerId# %" PRIu32 + " FirstLsnToKeep: %" PRIu64 " FirstNonceToKeep: %" PRIu64, + (ui32)PDiskId, (ui32)logRead.Owner, (ui64)firstLsnToKeep, (ui64)firstNonceToKeep); + } + ui32 endLogChunkIdx = CommonLogger->ChunkIdx; + ui64 endLogSectorIdx = CommonLogger->SectorIdx; + ownerData.LogReader = new TLogReader(false, + this, ActorSystem, logRead.Sender, logRead.Owner, logStartPosition, + logRead.OwnerGroupType,logRead.Position, + logRead.SizeLimit, 0, endLogChunkIdx, endLogSectorIdx, logRead.ReqId, + std::move(chunksToRead), firstLsnToKeep, firstNonceToKeep, + ownerData.VDiskId); + TVector<ui64> badOffsets; + ownerData.LogReader->Exec(0, badOffsets, ActorSystem); + break; + } + case ERequestType::RequestLogReadContinue: + { + TLogReadContinue *read = static_cast<TLogReadContinue*>(req); + read->CompletionAction->CostNs = DriveModel.TimeForSizeNs(read->Size, read->Offset / Format.ChunkSize, + TDriveModel::OP_TYPE_READ); + BlockDevice->PreadAsync(read->Data, read->Size, read->Offset, read->CompletionAction, + read->ReqId, &read->TraceId); // ??? TraceId + break; + } + case ERequestType::RequestLogSectorRestore: + { + TLogSectorRestore *restore = static_cast<TLogSectorRestore*>(req); + BlockDevice->PwriteAsync(restore->Data, restore->Size, restore->Offset, restore->CompletionAction, + restore->ReqId, {}); + break; + } + case ERequestType::RequestLogReadResultProcess: + { + TLogReadResultProcess *result = static_cast<TLogReadResultProcess*>(req); + ProcessReadLogResult(*result->ReadLogResult->Get(), result->Sender); + break; + } + default: + Y_FAIL(); + break; + } + delete req; + } + JointLogReads.clear(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// SysLog writing +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void TPDisk::WriteSysLogRestorePoint(TCompletionAction *action, TReqId reqId, NWilson::TTraceId *traceId) { + TGuard<TMutex> guard(StateMutex); + LoggedNonces = SysLogRecord.Nonces; + ui32 chunkCount = (ui32)(Format.DiskSize / (ui64)Format.ChunkSize); + ui32 chunkOwnersSize = ui32(sizeof(TChunkInfo)) * chunkCount; + // Must be ui64 + ui64 chunkIsTrimmedSize = TChunkTrimInfo::SizeForChunkCount(chunkCount); + TVector<TChunkInfo> chunkOwners(chunkCount); + TVector<TChunkTrimInfo> chunkIsTrimmed(TChunkTrimInfo::RecordsForChunkCount(chunkCount), TChunkTrimInfo(0)); + for (ui32 i = 0; i < chunkCount; ++i) { + if (ChunkState.size() > i + && (ChunkState[i].CommitState == TChunkState::DATA_COMMITTED + || ChunkState[i].CommitState == TChunkState::DATA_COMMITTED_DELETE_IN_PROGRESS + || ChunkState[i].CommitState == TChunkState::DATA_COMMITTED_ON_QUARANTINE) + && IsOwnerUser(ChunkState[i].OwnerId)) { + chunkOwners[i].OwnerId = ChunkState[i].OwnerId; + chunkOwners[i].Nonce = ChunkState[i].Nonce; + } else { + if (ChunkState.size() > i && ChunkState[i].OwnerId == OwnerUnallocatedTrimmed) { + chunkIsTrimmed[i / 8].SetChunkTrimmed(i % 8); + } + // Write OwnerUnallocated for forward compatibility + chunkOwners[i].OwnerId = OwnerUnallocated; + chunkOwners[i].Nonce = 0; + } + } + + if (CommonLogger) { + std::optional<TChunkIdx> firstChunk; + for (auto rit = LogChunks.crbegin(); rit != LogChunks.crend(); ++rit) { + if (rit->IsEndOfSplice) { + firstChunk = rit->ChunkIdx; + break; + } + } + if (!firstChunk && !LogChunks.empty()) { + firstChunk = LogChunks.front().ChunkIdx; + } + FirstLogChunkToParseCommits = firstChunk.value_or(SysLogRecord.LogHeadChunkIdx); + } + + ui32 recordSize = sizeof(TSysLogRecord) + chunkOwnersSize + sizeof(TSysLogFirstNoncesToKeep) + + sizeof(ui64) + chunkIsTrimmedSize + sizeof(ui32); + ui64 beginSectorIdx = SysLogger->SectorIdx; + *Mon.BandwidthPSysLogPayload += recordSize; + *Mon.BandwidthPSysLogRecordHeader += sizeof(TFirstLogPageHeader); + + SysLogger->LogHeader(0, 0, SysLogLsn, recordSize, reqId, traceId); + SysLogger->LogDataPart(&SysLogRecord, sizeof(TSysLogRecord), reqId, traceId); + SysLogger->LogDataPart(&chunkOwners[0], chunkOwnersSize, reqId, traceId); + SysLogger->LogDataPart(&SysLogFirstNoncesToKeep, sizeof(TSysLogFirstNoncesToKeep), reqId, traceId); + SysLogger->LogDataPart(&chunkIsTrimmedSize, sizeof(chunkIsTrimmedSize), reqId, traceId); + SysLogger->LogDataPart(&chunkIsTrimmed[0], chunkIsTrimmedSize, reqId, traceId); + SysLogger->LogDataPart(&FirstLogChunkToParseCommits, sizeof(FirstLogChunkToParseCommits), reqId, traceId); + SysLogger->TerminateLog(reqId, traceId); SysLogger->Flush(reqId, traceId, action); - - ui64 endSectorIdx = SysLogger->SectorIdx; - if (ActorSystem) { - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK_SYSLOG, "PDiskId# " << PDiskId - << " WriteSysLogRestorePoint FirstLogChunkToParseCommits# " << FirstLogChunkToParseCommits - << " CommonLogger# " << (void*)CommonLogger.Get() - << " LogChunks.size()# " << LogChunks.size() - << " LogChunks.front().ChunkIdx# " << (LogChunks.empty() ? -1 : (i64)LogChunks.front().ChunkIdx) - << " beginSectorIdx# " << beginSectorIdx - << " endSectorIdx# " << endSectorIdx - << " Marker# BPD69"); - } - ++SysLogLsn; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Common log writing -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void TPDisk::ProcessLogWriteQueueAndCommits() { - if (JointLogWrites.empty()) - return; - - NHPTimer::STime now = HPNow(); - for (TLogWrite *logCommit : JointCommits) { - Mon.LogQueueTime.Increment(logCommit->LifeDurationMs(now)); - - TStringStream errorReason; - NKikimrProto::EReplyStatus status = ValidateRequest(logCommit, errorReason); - if (status == NKikimrProto::OK) { - status = BeforeLoggingCommitRecord(*logCommit, errorReason); - } - if (status != NKikimrProto::OK) { - PrepareLogError(logCommit, errorReason, status); - } - } - NWilson::TTraceId *traceId = nullptr; - size_t logOperationSizeBytes = 0; - TVector<ui32> logChunksToCommit; - for (TLogWrite *logWrite : JointLogWrites) { - Y_VERIFY_DEBUG(logWrite); - logOperationSizeBytes += logWrite->Data.size(); - TStringStream errorReason; - NKikimrProto::EReplyStatus status = ValidateRequest(logWrite, errorReason); - if (status == NKikimrProto::OK) { - LogWrite(*logWrite, logChunksToCommit); - logWrite->ScheduleTime = HPNow(); - if (logWrite->TraceId) { - traceId = &logWrite->TraceId; - } - } else { - PrepareLogError(logWrite, errorReason, status); - } - } - for (TLogWrite *logWrite : JointLogWrites) { - LWTRACK(PDiskLogWriteFlush, logWrite->Orbit, PDiskId, logWrite->ReqId.Id, HPSecondsFloat(logWrite->CreationTime), - double(logWrite->Cost) / 1000000.0, HPSecondsFloat(logWrite->Deadline), - logWrite->Owner, logWrite->IsFast, logWrite->PriorityClass); - } - TReqId reqId = JointLogWrites.back()->ReqId; - auto write = MakeHolder<TCompletionLogWrite>( - this, std::move(JointLogWrites), std::move(JointCommits), std::move(logChunksToCommit)); - LogFlush(write.Get(), write->GetCommitedLogChunksPtr(), reqId, traceId); - Y_UNUSED(write.Release()); - - JointCommits.clear(); - JointLogWrites.clear(); - - // Check if we can TRIM some chunks that were deleted - TryTrimChunk(false, 0); - - Mon.LogOperationSizeBytes.Increment(logOperationSizeBytes); -} - -bool TPDisk::PreallocateLogChunks(ui64 headedRecordSize, TOwner owner, ui64 lsn, EOwnerGroupType ownerGroupType, - bool isAllowedForSpaceRed) { - ui32 additionalChunksNeeded = 0; - ui32 additionalChunksContainingPayload = 0; - if (CommonLogger->SectorBytesFree < headedRecordSize + sizeof(TFirstLogPageHeader)) { - ui64 additionalDataSize = headedRecordSize + sizeof(TFirstLogPageHeader) - - CommonLogger->SectorBytesFree; - ui64 logPayloadPerSector = Format.SectorPayloadSize() - sizeof(TLogPageHeader); - ui64 additionalPayloadSectors = (additionalDataSize + logPayloadPerSector - 1) / logPayloadPerSector; - ui64 usableSectorsPerLogChunk = UsableSectorsPerLogChunk(); + + ui64 endSectorIdx = SysLogger->SectorIdx; + if (ActorSystem) { + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK_SYSLOG, "PDiskId# " << PDiskId + << " WriteSysLogRestorePoint FirstLogChunkToParseCommits# " << FirstLogChunkToParseCommits + << " CommonLogger# " << (void*)CommonLogger.Get() + << " LogChunks.size()# " << LogChunks.size() + << " LogChunks.front().ChunkIdx# " << (LogChunks.empty() ? -1 : (i64)LogChunks.front().ChunkIdx) + << " beginSectorIdx# " << beginSectorIdx + << " endSectorIdx# " << endSectorIdx + << " Marker# BPD69"); + } + ++SysLogLsn; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Common log writing +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void TPDisk::ProcessLogWriteQueueAndCommits() { + if (JointLogWrites.empty()) + return; + + NHPTimer::STime now = HPNow(); + for (TLogWrite *logCommit : JointCommits) { + Mon.LogQueueTime.Increment(logCommit->LifeDurationMs(now)); + + TStringStream errorReason; + NKikimrProto::EReplyStatus status = ValidateRequest(logCommit, errorReason); + if (status == NKikimrProto::OK) { + status = BeforeLoggingCommitRecord(*logCommit, errorReason); + } + if (status != NKikimrProto::OK) { + PrepareLogError(logCommit, errorReason, status); + } + } + NWilson::TTraceId *traceId = nullptr; + size_t logOperationSizeBytes = 0; + TVector<ui32> logChunksToCommit; + for (TLogWrite *logWrite : JointLogWrites) { + Y_VERIFY_DEBUG(logWrite); + logOperationSizeBytes += logWrite->Data.size(); + TStringStream errorReason; + NKikimrProto::EReplyStatus status = ValidateRequest(logWrite, errorReason); + if (status == NKikimrProto::OK) { + LogWrite(*logWrite, logChunksToCommit); + logWrite->ScheduleTime = HPNow(); + if (logWrite->TraceId) { + traceId = &logWrite->TraceId; + } + } else { + PrepareLogError(logWrite, errorReason, status); + } + } + for (TLogWrite *logWrite : JointLogWrites) { + LWTRACK(PDiskLogWriteFlush, logWrite->Orbit, PDiskId, logWrite->ReqId.Id, HPSecondsFloat(logWrite->CreationTime), + double(logWrite->Cost) / 1000000.0, HPSecondsFloat(logWrite->Deadline), + logWrite->Owner, logWrite->IsFast, logWrite->PriorityClass); + } + TReqId reqId = JointLogWrites.back()->ReqId; + auto write = MakeHolder<TCompletionLogWrite>( + this, std::move(JointLogWrites), std::move(JointCommits), std::move(logChunksToCommit)); + LogFlush(write.Get(), write->GetCommitedLogChunksPtr(), reqId, traceId); + Y_UNUSED(write.Release()); + + JointCommits.clear(); + JointLogWrites.clear(); + + // Check if we can TRIM some chunks that were deleted + TryTrimChunk(false, 0); + + Mon.LogOperationSizeBytes.Increment(logOperationSizeBytes); +} + +bool TPDisk::PreallocateLogChunks(ui64 headedRecordSize, TOwner owner, ui64 lsn, EOwnerGroupType ownerGroupType, + bool isAllowedForSpaceRed) { + ui32 additionalChunksNeeded = 0; + ui32 additionalChunksContainingPayload = 0; + if (CommonLogger->SectorBytesFree < headedRecordSize + sizeof(TFirstLogPageHeader)) { + ui64 additionalDataSize = headedRecordSize + sizeof(TFirstLogPageHeader) - + CommonLogger->SectorBytesFree; + ui64 logPayloadPerSector = Format.SectorPayloadSize() - sizeof(TLogPageHeader); + ui64 additionalPayloadSectors = (additionalDataSize + logPayloadPerSector - 1) / logPayloadPerSector; + ui64 usableSectorsPerLogChunk = UsableSectorsPerLogChunk(); ui64 sectorsUnusedPayload = usableSectorsPerLogChunk - CommonLogger->SectorIdx - 1; - if (sectorsUnusedPayload <= additionalPayloadSectors) { - ui64 extrachunkSectors = additionalPayloadSectors - sectorsUnusedPayload; + if (sectorsUnusedPayload <= additionalPayloadSectors) { + ui64 extrachunkSectors = additionalPayloadSectors - sectorsUnusedPayload; ui64 chunkPayloadSectors = usableSectorsPerLogChunk; - additionalChunksContainingPayload = (ui32)((extrachunkSectors + chunkPayloadSectors - 1) / - chunkPayloadSectors); - additionalChunksNeeded = (ui32)((extrachunkSectors + chunkPayloadSectors - 1) / chunkPayloadSectors); - } - } - - return AllocateLogChunks( - additionalChunksNeeded, additionalChunksContainingPayload, owner, lsn, ownerGroupType, isAllowedForSpaceRed); -} - -bool TPDisk::AllocateLogChunks(ui32 chunksNeeded, ui32 chunksContainingPayload, TOwner owner, ui64 lsn, - EOwnerGroupType ownerGroupType, bool isAllowedForSpaceRed) { - TGuard<TMutex> guard(StateMutex); - TOwner keeperOwner = (ownerGroupType == EOwnerGroupType::Dynamic ? OwnerSystem : OwnerCommonStaticLog); - - // Check space and free it if needed - using TColor = NKikimrBlobStorage::TPDiskSpaceColor; - TColor::E color = Keeper.EstimateSpaceColor(keeperOwner, chunksNeeded); - if (color >= TColor::RED && !isAllowedForSpaceRed) { - return false; - } - if (color == TColor::BLACK) { - return false; - } - - if (IsOwnerUser(owner)) { - Y_VERIFY_S(LogChunks.empty() || chunksNeeded > 0 || LogChunks.back().ChunkIdx == CommonLogger->ChunkIdx, - "PDiskId# " << PDiskId << " Chunk idx mismatch! back# " << LogChunks.back().ChunkIdx - << " pre-back# " << (LogChunks.rbegin()->ChunkIdx == LogChunks.begin()->ChunkIdx ? - 0 : (++LogChunks.rbegin())->ChunkIdx) - << " logger# " << CommonLogger->ChunkIdx); - if (OwnerData[owner].VDiskId != TVDiskID::InvalidId) { - LogChunks.back().RegisterLogSector<true>(owner, lsn); - } - } - if (chunksNeeded == 0) { - return true; - } - - ui32 usableSectors = UsableSectorsPerLogChunk(); - ui64 noncesPerChunk = usableSectors + 1; - ui64 sectorsToLast = usableSectors > CommonLogger->SectorIdx ? - (usableSectors - CommonLogger->SectorIdx) : 1; - ui64 lastNonce = CommonLogger->Nonce + sectorsToLast + noncesPerChunk * CommonLogger->NextChunks.size(); - - TString errorReason; - for (ui32 i = 0; i < chunksNeeded; ++i) { - ui32 chunkIdx = Keeper.PopOwnerFreeChunk(keeperOwner, errorReason); - Y_VERIFY_S(chunkIdx, "errorReason# " << errorReason); - Y_VERIFY_S(ChunkState[chunkIdx].OwnerId == OwnerUnallocated || - ChunkState[chunkIdx].OwnerId == OwnerUnallocatedTrimmed, "PDiskId# " << PDiskId << - " Unexpected ownerId# " << ui32(ChunkState[chunkIdx].OwnerId)); - ChunkState[chunkIdx].CommitState = TChunkState::LOG_RESERVED; - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " AllocateLogChunks for owner# " << (ui32)owner - << " Lsn# " << lsn << " chunkIdx# " << chunkIdx << " LogChunks.size()# " << LogChunks.size()); - ChunkState[chunkIdx].OwnerId = OwnerSystem; - ChunkState[chunkIdx].PreviousNonce = lastNonce + noncesPerChunk * (ui64)i; - LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, - "PDiskId# %" PRIu32 " AllocateLogChunks chunkIdx# %" PRIu32 - " for Lsn# %" PRIu64, (ui32)PDiskId, (ui32)chunkIdx, (ui64)lsn); - // Mark newly allocated log chunks as chunks containing this owners record - LogChunks.push_back(TLogChunkInfo(chunkIdx, (ui32)OwnerData.size())); - Mon.LogChunks->Inc(); - if (IsOwnerUser(owner) && i < chunksContainingPayload && OwnerData[owner].VDiskId != TVDiskID::InvalidId) { - LogChunks.back().RegisterLogSector<true>(owner, lsn); - } - CommonLogger->NextChunks.push_back(TChunkIdxWithInfo{chunkIdx, &LogChunks.back()}); - AtomicSet(EstimatedLogChunkIdx, chunkIdx); - } - - AskVDisksToCutLogs(OwnerSystem, false); - return true; -} - -void TPDisk::LogWrite(TLogWrite &evLog, TVector<ui32> &logChunksToCommit) { - Y_VERIFY_DEBUG(!evLog.Result); - OwnerData[evLog.Owner].Status = TOwnerData::VDISK_STATUS_LOGGED; - - bool isCommitRecord = evLog.Signature.HasCommitRecord(); - ui64 payloadSize = evLog.Data.size(); - *Mon.BandwidthPLogPayload += payloadSize; - if (isCommitRecord) { - ui64 commitSize = (sizeof(ui32) + sizeof(ui64)) * evLog.CommitRecord.CommitChunks.size() + - sizeof(ui32) * evLog.CommitRecord.DeleteChunks.size() + - sizeof(NPDisk::TCommitRecordFooter); - payloadSize += commitSize; - *Mon.BandwidthPLogCommit += commitSize; - } - - ui64 headedRecordSize = payloadSize + sizeof(TFirstLogPageHeader); - *Mon.BandwidthPLogRecordHeader += sizeof(TFirstLogPageHeader); - bool isAllowedForSpaceRed = isCommitRecord && (evLog.CommitRecord.DeleteChunks.size() > 0); - if (!PreallocateLogChunks(headedRecordSize, evLog.Owner, evLog.Lsn, evLog.OwnerGroupType, isAllowedForSpaceRed)) { - // TODO: make sure that commit records that delete chunks are applied atomically even if this error occurs. - TStringStream str; - str << "PDiskId# " << PDiskId << " Can't preallocate log chunks!" - << " Marker# BPD70"; - LOG_ERROR(*ActorSystem, NKikimrServices::BS_PDISK, "%s", str.Str().c_str()); - evLog.Result.Reset(new NPDisk::TEvLogResult(NKikimrProto::OUT_OF_SPACE, - NotEnoughDiskSpaceStatusFlags(evLog.Owner, evLog.OwnerGroupType), str.Str())); - evLog.Result->Results.push_back(NPDisk::TEvLogResult::TRecord(evLog.Lsn, evLog.Cookie)); - Y_VERIFY(evLog.Result.Get()); - return; - } - if (!CommonLogger->NextChunks.empty()) { - size_t sizeToCommit = CommonLogger->NextChunks.size() - 1; - logChunksToCommit.reserve(logChunksToCommit.size() + 1 + sizeToCommit); - logChunksToCommit.push_back(CommonLogger->ChunkIdx); - for (size_t i = 0; i < sizeToCommit; ++i) { - logChunksToCommit.push_back(CommonLogger->NextChunks[i].Idx); - } - } - - // Write to log - CommonLogger->LogHeader(evLog.Owner, evLog.Signature, evLog.Lsn, payloadSize, evLog.ReqId, &evLog.TraceId); - OnNonceChange(NonceLog, evLog.ReqId, &evLog.TraceId); - if (evLog.Data.size()) { - CommonLogger->LogDataPart(evLog.Data.data(), evLog.Data.size(), evLog.ReqId, &evLog.TraceId); - } - if (isCommitRecord) { - ui32 commitChunksCount = evLog.CommitRecord.CommitChunks.size(); - if (commitChunksCount) { - CommonLogger->LogDataPart(evLog.CommitRecord.CommitChunks.data(), commitChunksCount * sizeof(ui32), - evLog.ReqId, &evLog.TraceId); - TVector<ui64> commitChunkNonces(commitChunksCount); - for (ui32 idx = 0; idx < commitChunksCount; ++idx) { - commitChunkNonces[idx] = ChunkState[evLog.CommitRecord.CommitChunks[idx]].Nonce; - } - CommonLogger->LogDataPart(&commitChunkNonces[0], sizeof(ui64) * commitChunksCount, evLog.ReqId, &evLog.TraceId); - } - ui32 deleteChunksCount = evLog.CommitRecord.DeleteChunks.size(); - if (deleteChunksCount) { - CommonLogger->LogDataPart(evLog.CommitRecord.DeleteChunks.data(), deleteChunksCount * sizeof(ui32), - evLog.ReqId, &evLog.TraceId); - } - NPDisk::TCommitRecordFooter footer(evLog.Data.size(), evLog.CommitRecord.FirstLsnToKeep, - evLog.CommitRecord.CommitChunks.size(), evLog.CommitRecord.DeleteChunks.size(), - evLog.CommitRecord.IsStartingPoint); - CommonLogger->LogDataPart(&footer, sizeof(footer), evLog.ReqId, &evLog.TraceId); - - { - TGuard<TMutex> guard(StateMutex); - if (evLog.CommitRecord.IsStartingPoint) { - TLogSignature unmasked = evLog.Signature.GetUnmasked(); - OwnerData[evLog.Owner].StartingPoints[unmasked] = - TLogRecord(unmasked, evLog.Data, evLog.Lsn); - } - if (evLog.CommitRecord.FirstLsnToKeep >= OwnerData[evLog.Owner].CurrentFirstLsnToKeep) { - if (evLog.CommitRecord.FirstLsnToKeep > OwnerData[evLog.Owner].CurrentFirstLsnToKeep) { - OwnerData[evLog.Owner].CutLogAt = TInstant::Now(); - } - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << " Setting new FirstLsnToKeep# " << (ui64)OwnerData[evLog.Owner].CurrentFirstLsnToKeep - << " -> " << (ui64)evLog.CommitRecord.FirstLsnToKeep - << " caused by Lsn# " << (ui64)evLog.Lsn - << " ownerId# " << evLog.Owner - << " Marker# BPD71"); - OwnerData[evLog.Owner].CurrentFirstLsnToKeep = evLog.CommitRecord.FirstLsnToKeep; - } - } - } - Y_VERIFY(CommonLogger->NextChunks.empty()); - - evLog.Result.Reset(new NPDisk::TEvLogResult(NKikimrProto::OK, - GetStatusFlags(evLog.Owner, evLog.OwnerGroupType), nullptr)); - Y_VERIFY(evLog.Result.Get()); - evLog.Result->Results.push_back(NPDisk::TEvLogResult::TRecord(evLog.Lsn, evLog.Cookie)); -} - -void TPDisk::LogFlush(TCompletionAction *action, TVector<ui32> *logChunksToCommit, TReqId reqId, - NWilson::TTraceId *traceId) { - - if (!CommonLogger->IsEmptySector()) { - size_t prevPreallocatedSize = CommonLogger->NextChunks.size(); - if (!PreallocateLogChunks(CommonLogger->SectorBytesFree, OwnerSystem, 0, EOwnerGroupType::Static, true)) { - Y_FAIL("Last chunk is over, how did you do that?!"); - } - size_t nextPreallocatedSize = CommonLogger->NextChunks.size(); - if (nextPreallocatedSize != prevPreallocatedSize && logChunksToCommit) { - if (prevPreallocatedSize == 0) { - logChunksToCommit->push_back(CommonLogger->ChunkIdx); - } - size_t endIdx = nextPreallocatedSize - 1; - for (size_t i = prevPreallocatedSize; i < endIdx; ++i) { - logChunksToCommit->push_back(CommonLogger->NextChunks[i].Idx); - } - } - } - - CommonLogger->TerminateLog(reqId, traceId); + additionalChunksContainingPayload = (ui32)((extrachunkSectors + chunkPayloadSectors - 1) / + chunkPayloadSectors); + additionalChunksNeeded = (ui32)((extrachunkSectors + chunkPayloadSectors - 1) / chunkPayloadSectors); + } + } + + return AllocateLogChunks( + additionalChunksNeeded, additionalChunksContainingPayload, owner, lsn, ownerGroupType, isAllowedForSpaceRed); +} + +bool TPDisk::AllocateLogChunks(ui32 chunksNeeded, ui32 chunksContainingPayload, TOwner owner, ui64 lsn, + EOwnerGroupType ownerGroupType, bool isAllowedForSpaceRed) { + TGuard<TMutex> guard(StateMutex); + TOwner keeperOwner = (ownerGroupType == EOwnerGroupType::Dynamic ? OwnerSystem : OwnerCommonStaticLog); + + // Check space and free it if needed + using TColor = NKikimrBlobStorage::TPDiskSpaceColor; + TColor::E color = Keeper.EstimateSpaceColor(keeperOwner, chunksNeeded); + if (color >= TColor::RED && !isAllowedForSpaceRed) { + return false; + } + if (color == TColor::BLACK) { + return false; + } + + if (IsOwnerUser(owner)) { + Y_VERIFY_S(LogChunks.empty() || chunksNeeded > 0 || LogChunks.back().ChunkIdx == CommonLogger->ChunkIdx, + "PDiskId# " << PDiskId << " Chunk idx mismatch! back# " << LogChunks.back().ChunkIdx + << " pre-back# " << (LogChunks.rbegin()->ChunkIdx == LogChunks.begin()->ChunkIdx ? + 0 : (++LogChunks.rbegin())->ChunkIdx) + << " logger# " << CommonLogger->ChunkIdx); + if (OwnerData[owner].VDiskId != TVDiskID::InvalidId) { + LogChunks.back().RegisterLogSector<true>(owner, lsn); + } + } + if (chunksNeeded == 0) { + return true; + } + + ui32 usableSectors = UsableSectorsPerLogChunk(); + ui64 noncesPerChunk = usableSectors + 1; + ui64 sectorsToLast = usableSectors > CommonLogger->SectorIdx ? + (usableSectors - CommonLogger->SectorIdx) : 1; + ui64 lastNonce = CommonLogger->Nonce + sectorsToLast + noncesPerChunk * CommonLogger->NextChunks.size(); + + TString errorReason; + for (ui32 i = 0; i < chunksNeeded; ++i) { + ui32 chunkIdx = Keeper.PopOwnerFreeChunk(keeperOwner, errorReason); + Y_VERIFY_S(chunkIdx, "errorReason# " << errorReason); + Y_VERIFY_S(ChunkState[chunkIdx].OwnerId == OwnerUnallocated || + ChunkState[chunkIdx].OwnerId == OwnerUnallocatedTrimmed, "PDiskId# " << PDiskId << + " Unexpected ownerId# " << ui32(ChunkState[chunkIdx].OwnerId)); + ChunkState[chunkIdx].CommitState = TChunkState::LOG_RESERVED; + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " AllocateLogChunks for owner# " << (ui32)owner + << " Lsn# " << lsn << " chunkIdx# " << chunkIdx << " LogChunks.size()# " << LogChunks.size()); + ChunkState[chunkIdx].OwnerId = OwnerSystem; + ChunkState[chunkIdx].PreviousNonce = lastNonce + noncesPerChunk * (ui64)i; + LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, + "PDiskId# %" PRIu32 " AllocateLogChunks chunkIdx# %" PRIu32 + " for Lsn# %" PRIu64, (ui32)PDiskId, (ui32)chunkIdx, (ui64)lsn); + // Mark newly allocated log chunks as chunks containing this owners record + LogChunks.push_back(TLogChunkInfo(chunkIdx, (ui32)OwnerData.size())); + Mon.LogChunks->Inc(); + if (IsOwnerUser(owner) && i < chunksContainingPayload && OwnerData[owner].VDiskId != TVDiskID::InvalidId) { + LogChunks.back().RegisterLogSector<true>(owner, lsn); + } + CommonLogger->NextChunks.push_back(TChunkIdxWithInfo{chunkIdx, &LogChunks.back()}); + AtomicSet(EstimatedLogChunkIdx, chunkIdx); + } + + AskVDisksToCutLogs(OwnerSystem, false); + return true; +} + +void TPDisk::LogWrite(TLogWrite &evLog, TVector<ui32> &logChunksToCommit) { + Y_VERIFY_DEBUG(!evLog.Result); + OwnerData[evLog.Owner].Status = TOwnerData::VDISK_STATUS_LOGGED; + + bool isCommitRecord = evLog.Signature.HasCommitRecord(); + ui64 payloadSize = evLog.Data.size(); + *Mon.BandwidthPLogPayload += payloadSize; + if (isCommitRecord) { + ui64 commitSize = (sizeof(ui32) + sizeof(ui64)) * evLog.CommitRecord.CommitChunks.size() + + sizeof(ui32) * evLog.CommitRecord.DeleteChunks.size() + + sizeof(NPDisk::TCommitRecordFooter); + payloadSize += commitSize; + *Mon.BandwidthPLogCommit += commitSize; + } + + ui64 headedRecordSize = payloadSize + sizeof(TFirstLogPageHeader); + *Mon.BandwidthPLogRecordHeader += sizeof(TFirstLogPageHeader); + bool isAllowedForSpaceRed = isCommitRecord && (evLog.CommitRecord.DeleteChunks.size() > 0); + if (!PreallocateLogChunks(headedRecordSize, evLog.Owner, evLog.Lsn, evLog.OwnerGroupType, isAllowedForSpaceRed)) { + // TODO: make sure that commit records that delete chunks are applied atomically even if this error occurs. + TStringStream str; + str << "PDiskId# " << PDiskId << " Can't preallocate log chunks!" + << " Marker# BPD70"; + LOG_ERROR(*ActorSystem, NKikimrServices::BS_PDISK, "%s", str.Str().c_str()); + evLog.Result.Reset(new NPDisk::TEvLogResult(NKikimrProto::OUT_OF_SPACE, + NotEnoughDiskSpaceStatusFlags(evLog.Owner, evLog.OwnerGroupType), str.Str())); + evLog.Result->Results.push_back(NPDisk::TEvLogResult::TRecord(evLog.Lsn, evLog.Cookie)); + Y_VERIFY(evLog.Result.Get()); + return; + } + if (!CommonLogger->NextChunks.empty()) { + size_t sizeToCommit = CommonLogger->NextChunks.size() - 1; + logChunksToCommit.reserve(logChunksToCommit.size() + 1 + sizeToCommit); + logChunksToCommit.push_back(CommonLogger->ChunkIdx); + for (size_t i = 0; i < sizeToCommit; ++i) { + logChunksToCommit.push_back(CommonLogger->NextChunks[i].Idx); + } + } + + // Write to log + CommonLogger->LogHeader(evLog.Owner, evLog.Signature, evLog.Lsn, payloadSize, evLog.ReqId, &evLog.TraceId); + OnNonceChange(NonceLog, evLog.ReqId, &evLog.TraceId); + if (evLog.Data.size()) { + CommonLogger->LogDataPart(evLog.Data.data(), evLog.Data.size(), evLog.ReqId, &evLog.TraceId); + } + if (isCommitRecord) { + ui32 commitChunksCount = evLog.CommitRecord.CommitChunks.size(); + if (commitChunksCount) { + CommonLogger->LogDataPart(evLog.CommitRecord.CommitChunks.data(), commitChunksCount * sizeof(ui32), + evLog.ReqId, &evLog.TraceId); + TVector<ui64> commitChunkNonces(commitChunksCount); + for (ui32 idx = 0; idx < commitChunksCount; ++idx) { + commitChunkNonces[idx] = ChunkState[evLog.CommitRecord.CommitChunks[idx]].Nonce; + } + CommonLogger->LogDataPart(&commitChunkNonces[0], sizeof(ui64) * commitChunksCount, evLog.ReqId, &evLog.TraceId); + } + ui32 deleteChunksCount = evLog.CommitRecord.DeleteChunks.size(); + if (deleteChunksCount) { + CommonLogger->LogDataPart(evLog.CommitRecord.DeleteChunks.data(), deleteChunksCount * sizeof(ui32), + evLog.ReqId, &evLog.TraceId); + } + NPDisk::TCommitRecordFooter footer(evLog.Data.size(), evLog.CommitRecord.FirstLsnToKeep, + evLog.CommitRecord.CommitChunks.size(), evLog.CommitRecord.DeleteChunks.size(), + evLog.CommitRecord.IsStartingPoint); + CommonLogger->LogDataPart(&footer, sizeof(footer), evLog.ReqId, &evLog.TraceId); + + { + TGuard<TMutex> guard(StateMutex); + if (evLog.CommitRecord.IsStartingPoint) { + TLogSignature unmasked = evLog.Signature.GetUnmasked(); + OwnerData[evLog.Owner].StartingPoints[unmasked] = + TLogRecord(unmasked, evLog.Data, evLog.Lsn); + } + if (evLog.CommitRecord.FirstLsnToKeep >= OwnerData[evLog.Owner].CurrentFirstLsnToKeep) { + if (evLog.CommitRecord.FirstLsnToKeep > OwnerData[evLog.Owner].CurrentFirstLsnToKeep) { + OwnerData[evLog.Owner].CutLogAt = TInstant::Now(); + } + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << " Setting new FirstLsnToKeep# " << (ui64)OwnerData[evLog.Owner].CurrentFirstLsnToKeep + << " -> " << (ui64)evLog.CommitRecord.FirstLsnToKeep + << " caused by Lsn# " << (ui64)evLog.Lsn + << " ownerId# " << evLog.Owner + << " Marker# BPD71"); + OwnerData[evLog.Owner].CurrentFirstLsnToKeep = evLog.CommitRecord.FirstLsnToKeep; + } + } + } + Y_VERIFY(CommonLogger->NextChunks.empty()); + + evLog.Result.Reset(new NPDisk::TEvLogResult(NKikimrProto::OK, + GetStatusFlags(evLog.Owner, evLog.OwnerGroupType), nullptr)); + Y_VERIFY(evLog.Result.Get()); + evLog.Result->Results.push_back(NPDisk::TEvLogResult::TRecord(evLog.Lsn, evLog.Cookie)); +} + +void TPDisk::LogFlush(TCompletionAction *action, TVector<ui32> *logChunksToCommit, TReqId reqId, + NWilson::TTraceId *traceId) { + + if (!CommonLogger->IsEmptySector()) { + size_t prevPreallocatedSize = CommonLogger->NextChunks.size(); + if (!PreallocateLogChunks(CommonLogger->SectorBytesFree, OwnerSystem, 0, EOwnerGroupType::Static, true)) { + Y_FAIL("Last chunk is over, how did you do that?!"); + } + size_t nextPreallocatedSize = CommonLogger->NextChunks.size(); + if (nextPreallocatedSize != prevPreallocatedSize && logChunksToCommit) { + if (prevPreallocatedSize == 0) { + logChunksToCommit->push_back(CommonLogger->ChunkIdx); + } + size_t endIdx = nextPreallocatedSize - 1; + for (size_t i = prevPreallocatedSize; i < endIdx; ++i) { + logChunksToCommit->push_back(CommonLogger->NextChunks[i].Idx); + } + } + } + + CommonLogger->TerminateLog(reqId, traceId); CommonLogger->Flush(reqId, traceId, action); - - OnNonceChange(NonceLog, reqId, traceId); -} - - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Chunk commit log writing -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -NKikimrProto::EReplyStatus TPDisk::BeforeLoggingCommitRecord(const TLogWrite &logWrite, TStringStream& outErrorReason) { - TGuard<TMutex> guard(StateMutex); - for (ui32 i = 0; i < logWrite.CommitRecord.CommitChunks.size(); ++i) { - if (!ValidateCommitChunk(logWrite.CommitRecord.CommitChunks[i], logWrite.Owner, outErrorReason)) { - return NKikimrProto::ERROR; - } - } - for (ui32 i = 0; i < logWrite.CommitRecord.DeleteChunks.size(); ++i) { - if (!ValidateDeleteChunk(logWrite.CommitRecord.DeleteChunks[i], logWrite.Owner, outErrorReason)) { - return NKikimrProto::ERROR; - } - } - - for (ui32 chunkIdx : logWrite.CommitRecord.CommitChunks) { - if (ChunkState[chunkIdx].CommitState == TChunkState::DATA_RESERVED) { - Mon.UncommitedDataChunks->Dec(); - Mon.CommitedDataChunks->Inc(); - LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 - " Commit ++CommitedDataChunks# %" PRIi64 " chunkIdx# %" PRIu32 " ownerId# %" PRIu32, - (ui32)PDiskId, (i64)Mon.CommitedDataChunks->Val(), (ui32)chunkIdx, - (ui32)ChunkState[chunkIdx].OwnerId); - } - ++ChunkState[chunkIdx].CommitsInProgress; - } - for (ui32 chunkIdx : logWrite.CommitRecord.DeleteChunks) { - TChunkState& state = ChunkState[chunkIdx]; - if (state.HasAnyOperationsInProgress()) { - switch (state.CommitState) { - case TChunkState::DATA_RESERVED: - Mon.UncommitedDataChunks->Dec(); - state.CommitState = TChunkState::DATA_ON_QUARANTINE; - break; - case TChunkState::DATA_COMMITTED: - Mon.CommitedDataChunks->Dec(); - LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 - " Line# %" PRIu32 " --CommitedDataChunks# %" PRIi64 " chunkIdx# %" PRIu32 " Marker# BPD10", - (ui32)PDiskId, (ui32)__LINE__, (i64)Mon.CommitedDataChunks->Val(), (ui32)chunkIdx); - state.CommitState = TChunkState::DATA_COMMITTED_ON_QUARANTINE; - break; - default: - state.CommitState = TChunkState::DATA_ON_QUARANTINE; - break; - } - QuarantineChunks.push_back(chunkIdx); - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " push chunk on QuarantineChunks because it has operations in flight" - << " chunkIdx# " << chunkIdx - << " ownerId# " << logWrite.Owner - << " state# " << state.ToString() - << " Marker# BPD78"); - } else if (state.CommitState == TChunkState::DATA_RESERVED) { - Mon.UncommitedDataChunks->Dec(); - state.CommitState = TChunkState::DATA_RESERVED_DELETE_IN_PROGRESS; - } else if (state.CommitState == TChunkState::DATA_COMMITTED) { - Mon.CommitedDataChunks->Dec(); - state.CommitState = TChunkState::DATA_COMMITTED_DELETE_IN_PROGRESS; - } else { - Y_FAIL_S("PDiskID# " << PDiskId << " can't delete chunkIdx# " << chunkIdx - << " as it is in unexpected CommitState# " << state.ToString()); - } - } - - return NKikimrProto::OK; -} - -bool TPDisk::ValidateCommitChunk(ui32 chunkIdx, TOwner owner, TStringStream& outErrorReason) { - TGuard<TMutex> guard(StateMutex); - if (chunkIdx >= ChunkState.size()) { - outErrorReason << "PDiskId# " << PDiskId - << " Can't commit chunkIdx# " << chunkIdx - << " > total# " << ChunkState.size() - << " ownerId# " << owner - << " Marker# BPD74"; - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, outErrorReason.Str()); - return false; - } - if (ChunkState[chunkIdx].OwnerId != owner) { - outErrorReason << "PDiskId# " << PDiskId - << " Can't commit chunkIdx# " << chunkIdx - << ", ownerId# " << owner - << " != real ownerId# " << ChunkState[chunkIdx].OwnerId - << " Marker# BPD75"; - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, outErrorReason.Str()); - return false; - } - if (ChunkState[chunkIdx].CommitState != TChunkState::DATA_RESERVED - && ChunkState[chunkIdx].CommitState != TChunkState::DATA_COMMITTED) { - outErrorReason << "PDiskId# " << PDiskId - << " Can't commit chunkIdx# " << chunkIdx - << " in CommitState# " << ChunkState[chunkIdx].CommitState - << " ownerId# " << owner << " Marker# BPD83"; - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, outErrorReason.Str()); - return false; - } - return true; -} - -void TPDisk::CommitChunk(ui32 chunkIdx) { - TGuard<TMutex> guard(StateMutex); - TChunkState &state = ChunkState[chunkIdx]; - Y_VERIFY(state.CommitsInProgress > 0); - --state.CommitsInProgress; - - switch (state.CommitState) { - case TChunkState::DATA_RESERVED: - [[fallthrough]]; - case TChunkState::DATA_COMMITTED: - state.CommitState = TChunkState::DATA_COMMITTED; - break; - case TChunkState::DATA_ON_QUARANTINE: - case TChunkState::DATA_COMMITTED_ON_QUARANTINE: - // Do nothing - break; - default: - Y_FAIL_S("PDiskID# " << PDiskId << " can't commit chunkIdx# " << chunkIdx - << " as it is in unexpected CommitState# " << state.ToString()); - break; - } -} - -bool TPDisk::ValidateDeleteChunk(ui32 chunkIdx, TOwner owner, TStringStream& outErrorReason) { - TGuard<TMutex> guard(StateMutex); - if (chunkIdx >= ChunkState.size()) { - outErrorReason << "PDiskId# " << (ui32)PDiskId - << " Can't delete chunkIdx# " << (ui32)chunkIdx - << " > total# " << (ui32)ChunkState.size() - << " ownerId# " << (ui32)owner << "!" - << " Marker# BPD76"; - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, outErrorReason.Str()); - return false; - } - if (ChunkState[chunkIdx].OwnerId != owner) { - outErrorReason << "PDiskId# " << (ui32)PDiskId - << " Can't delete chunkIdx# " << (ui32)chunkIdx - << " ownerId# " << (ui32)owner - << " != trueOwnerId# " << (ui32)ChunkState[chunkIdx].OwnerId << "!" - << " Marker# BPD77"; - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, outErrorReason.Str()); - return false; - } - if (ChunkState[chunkIdx].CommitState != TChunkState::DATA_RESERVED - && ChunkState[chunkIdx].CommitState != TChunkState::DATA_COMMITTED) { - outErrorReason << "PDiskId# " << (ui32)PDiskId - << " Can't delete chunkIdx# " << (ui32)chunkIdx - << " in CommitState# " << ChunkState[chunkIdx].CommitState - << " ownerId# " << (ui32)owner << " Marker# BPD82"; - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, outErrorReason.Str()); - return false; - } - LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 - " Line# %" PRIu32 " Deletion of chunkIdx# %" PRIu32 " by ownerId# %" PRIu32 " is validated", - (ui32)PDiskId, (ui32)__LINE__, (ui32)chunkIdx, (ui32)owner); - return true; -} - -// Marks chunk deleted but does not move it to the free list. -void TPDisk::DeleteChunk(ui32 chunkIdx, TOwner owner) { - TGuard<TMutex> guard(StateMutex); - TChunkState &state = ChunkState[chunkIdx]; - switch (state.CommitState) { - // Chunk will be freed in TPDisk::ForceDeleteChunk() and may be released already - case TChunkState::FREE: - case TChunkState::DATA_ON_QUARANTINE: - break; - case TChunkState::DATA_RESERVED_DELETE_IN_PROGRESS: - [[fallthrough]]; - case TChunkState::DATA_COMMITTED_DELETE_IN_PROGRESS: - Y_VERIFY_S(state.CommitsInProgress == 0, - "PDiskId# " << PDiskId << " chunkIdx# " << chunkIdx << " state# " << state.ToString()); - LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " chunkIdx# %" PRIu32 - " deleted, ownerId# %" PRIu32 " -> %" PRIu32, - (ui32)PDiskId, (ui32)chunkIdx, (ui32)state.OwnerId, (ui32)OwnerUnallocated); - Y_VERIFY(state.OwnerId == owner); // TODO DELETE - state.OwnerId = OwnerUnallocated; - state.CommitState = TChunkState::FREE; - Keeper.PushFreeOwnerChunk(owner, chunkIdx); - break; - case TChunkState::DATA_COMMITTED_ON_QUARANTINE: - // Mark chunk as quarantine, so it will be released through default quarantine way - state.CommitState = TChunkState::DATA_ON_QUARANTINE; - break; - default: - Y_FAIL_S("PDiskID# " << PDiskId << " can't delete chunkIdx# " << chunkIdx - << " as it is in unexpected CommitState# " << state.ToString()); - } -} - -void TPDisk::OnLogCommitDone(TLogCommitDone &req) { - TGuard<TMutex> guard(StateMutex); - - for (ui32 chunk : req.CommitedChunks) { - CommitChunk(chunk); - } - for (ui32 chunk : req.DeletedChunks) { - DeleteChunk(chunk, req.OwnerId); - } - - // Decrement log chunk user counters and release unused log chunks - TOwnerData &ownerData = OwnerData[req.OwnerId]; - ui64 currentFirstLsnToKeep = ownerData.CurrentFirstLsnToKeep; - auto it = LogChunks.begin(); - bool isChunkReleased = false; - if (req.Lsn <= ownerData.LastWrittenCommitLsn) { - LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId << " owner# " << req.OwnerId - << " VDiskId# " << ownerData.VDiskId.ToStringWOGeneration() << " found EvLog with lsn# " << req.Lsn - << " less then or equals to LastWrittenCommitLsn#" << ownerData.LastWrittenCommitLsn); - } - ownerData.LastWrittenCommitLsn = req.Lsn; - while (it != LogChunks.end() && it->OwnerLsnRange.size() > req.OwnerId) { - TLogChunkInfo::TLsnRange &range = it->OwnerLsnRange[req.OwnerId]; - if (range.IsPresent && range.LastLsn < currentFirstLsnToKeep) { - //Y_VERIFY(range.FirstLsn != range.LastLsn); - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << " Log chunkIdx# " << (ui32)it->ChunkIdx << " [" << (ui64)range.FirstLsn - << ", " << (ui64)range.LastLsn << "] dereferenced by ownerId# " << (ui32)req.OwnerId - << " CurrentLsnToKeep# " << (ui64)currentFirstLsnToKeep << " caused by Lsn# " << (ui64)req.Lsn - << " previous CurrentUserCount# " << it->CurrentUserCount - << " Marker# BPD27"); - range.IsPresent = false; - Y_VERIFY(it->CurrentUserCount > 0); - it->CurrentUserCount--; - if (it->CurrentUserCount == 0) { - isChunkReleased = true; - } - } - ++it; - } - if (isChunkReleased) { - THolder<TCompletionEventSender> completion(new TCompletionEventSender(this)); - if (ReleaseUnusedLogChunks(completion.Get())) { - WriteSysLogRestorePoint(completion.Release(), req.ReqId, {}); // FIXME: wilson - } - } - TryTrimChunk(false, 0); -} - -void TPDisk::MarkChunksAsReleased(TReleaseChunks& req) { - TGuard<TMutex> guard(StateMutex); - - if (req.IsChunksFromLogSplice) { - auto *releaseReq = ReqCreator.CreateFromArgs<TReleaseChunks>(std::move(req.ChunksToRelease)); - - auto flushAction = MakeHolder<TCompletionEventSender>(this, THolder<TReleaseChunks>(releaseReq)); - - ui64 nonce = req.GapStart->LastNonce; - ui32 desiredSectorIdx = UsableSectorsPerLogChunk(); - ui32 dataChunkSizeSectors = Format.ChunkSize / Format.SectorSize; - TLogWriter writer(Mon, *BlockDevice.Get(), Format, nonce, Format.LogKey, BufferPool.Get(), desiredSectorIdx, - dataChunkSizeSectors, Format.MagicLogChunk, req.GapStart->ChunkIdx, nullptr, desiredSectorIdx, - nullptr, ActorSystem, PDiskId, &DriveModel, Cfg->UseT1ha0HashInFooter, Cfg->EnableSectorEncryption); - - Y_VERIFY_S(req.GapEnd->PrevChunkLastNonce, "PDiskId# " << PDiskId - << "Zero GapEnd->PrevChunkLastNonce, chunkInfo# " << *req.GapEnd); - // +1 stands for -1 in logreader in old versions of pdisk - ui64 expectedNonce = req.GapEnd->PrevChunkLastNonce + 1; - req.GapEnd->IsEndOfSplice = true; - writer.WriteNextChunkReference(req.GapEnd->ChunkIdx, expectedNonce, flushAction.Release(), {}, {}); - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " write nextChunkReference, from GapStart chunkIdx# " << req.GapStart->ChunkIdx - << " to GapEnd# " << *req.GapEnd << " Marker# BPD81"); - } else { - for (const auto& chunkIdx : req.ChunksToRelease) { - Keeper.PushFreeOwnerChunk(OwnerSystem, chunkIdx); - } - IsLogChunksReleaseInflight = false; - - TryTrimChunk(false, 0); - } -} - -// Schedules EvReadLogResult event for the system log -void TPDisk::InitiateReadSysLog(const TActorId &pDiskActor) { - Y_VERIFY_S(PDiskThread.Running(), "expect PDiskThread to be running"); - Y_VERIFY_S(InitPhase == EInitPhase::Uninitialized, "expect InitPhase to be Uninitialized, but InitPhase# " - << InitPhase); - ui32 formatSectorsSize = FormatSectorSize * ReplicationFactor; - THolder<TEvReadFormatResult> evReadFormatResult(new TEvReadFormatResult(formatSectorsSize, UseHugePages)); - ui8 *formatSectors = evReadFormatResult->FormatSectors.Get(); - BlockDevice->PreadAsync(formatSectors, formatSectorsSize, 0, - new TCompletionEventSender(this, pDiskActor, evReadFormatResult.Release()), TReqId(TReqId::InitialFormatRead, 0), {}); + + OnNonceChange(NonceLog, reqId, traceId); +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Chunk commit log writing +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +NKikimrProto::EReplyStatus TPDisk::BeforeLoggingCommitRecord(const TLogWrite &logWrite, TStringStream& outErrorReason) { + TGuard<TMutex> guard(StateMutex); + for (ui32 i = 0; i < logWrite.CommitRecord.CommitChunks.size(); ++i) { + if (!ValidateCommitChunk(logWrite.CommitRecord.CommitChunks[i], logWrite.Owner, outErrorReason)) { + return NKikimrProto::ERROR; + } + } + for (ui32 i = 0; i < logWrite.CommitRecord.DeleteChunks.size(); ++i) { + if (!ValidateDeleteChunk(logWrite.CommitRecord.DeleteChunks[i], logWrite.Owner, outErrorReason)) { + return NKikimrProto::ERROR; + } + } + + for (ui32 chunkIdx : logWrite.CommitRecord.CommitChunks) { + if (ChunkState[chunkIdx].CommitState == TChunkState::DATA_RESERVED) { + Mon.UncommitedDataChunks->Dec(); + Mon.CommitedDataChunks->Inc(); + LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 + " Commit ++CommitedDataChunks# %" PRIi64 " chunkIdx# %" PRIu32 " ownerId# %" PRIu32, + (ui32)PDiskId, (i64)Mon.CommitedDataChunks->Val(), (ui32)chunkIdx, + (ui32)ChunkState[chunkIdx].OwnerId); + } + ++ChunkState[chunkIdx].CommitsInProgress; + } + for (ui32 chunkIdx : logWrite.CommitRecord.DeleteChunks) { + TChunkState& state = ChunkState[chunkIdx]; + if (state.HasAnyOperationsInProgress()) { + switch (state.CommitState) { + case TChunkState::DATA_RESERVED: + Mon.UncommitedDataChunks->Dec(); + state.CommitState = TChunkState::DATA_ON_QUARANTINE; + break; + case TChunkState::DATA_COMMITTED: + Mon.CommitedDataChunks->Dec(); + LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 + " Line# %" PRIu32 " --CommitedDataChunks# %" PRIi64 " chunkIdx# %" PRIu32 " Marker# BPD10", + (ui32)PDiskId, (ui32)__LINE__, (i64)Mon.CommitedDataChunks->Val(), (ui32)chunkIdx); + state.CommitState = TChunkState::DATA_COMMITTED_ON_QUARANTINE; + break; + default: + state.CommitState = TChunkState::DATA_ON_QUARANTINE; + break; + } + QuarantineChunks.push_back(chunkIdx); + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " push chunk on QuarantineChunks because it has operations in flight" + << " chunkIdx# " << chunkIdx + << " ownerId# " << logWrite.Owner + << " state# " << state.ToString() + << " Marker# BPD78"); + } else if (state.CommitState == TChunkState::DATA_RESERVED) { + Mon.UncommitedDataChunks->Dec(); + state.CommitState = TChunkState::DATA_RESERVED_DELETE_IN_PROGRESS; + } else if (state.CommitState == TChunkState::DATA_COMMITTED) { + Mon.CommitedDataChunks->Dec(); + state.CommitState = TChunkState::DATA_COMMITTED_DELETE_IN_PROGRESS; + } else { + Y_FAIL_S("PDiskID# " << PDiskId << " can't delete chunkIdx# " << chunkIdx + << " as it is in unexpected CommitState# " << state.ToString()); + } + } + + return NKikimrProto::OK; +} + +bool TPDisk::ValidateCommitChunk(ui32 chunkIdx, TOwner owner, TStringStream& outErrorReason) { + TGuard<TMutex> guard(StateMutex); + if (chunkIdx >= ChunkState.size()) { + outErrorReason << "PDiskId# " << PDiskId + << " Can't commit chunkIdx# " << chunkIdx + << " > total# " << ChunkState.size() + << " ownerId# " << owner + << " Marker# BPD74"; + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, outErrorReason.Str()); + return false; + } + if (ChunkState[chunkIdx].OwnerId != owner) { + outErrorReason << "PDiskId# " << PDiskId + << " Can't commit chunkIdx# " << chunkIdx + << ", ownerId# " << owner + << " != real ownerId# " << ChunkState[chunkIdx].OwnerId + << " Marker# BPD75"; + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, outErrorReason.Str()); + return false; + } + if (ChunkState[chunkIdx].CommitState != TChunkState::DATA_RESERVED + && ChunkState[chunkIdx].CommitState != TChunkState::DATA_COMMITTED) { + outErrorReason << "PDiskId# " << PDiskId + << " Can't commit chunkIdx# " << chunkIdx + << " in CommitState# " << ChunkState[chunkIdx].CommitState + << " ownerId# " << owner << " Marker# BPD83"; + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, outErrorReason.Str()); + return false; + } + return true; +} + +void TPDisk::CommitChunk(ui32 chunkIdx) { + TGuard<TMutex> guard(StateMutex); + TChunkState &state = ChunkState[chunkIdx]; + Y_VERIFY(state.CommitsInProgress > 0); + --state.CommitsInProgress; + + switch (state.CommitState) { + case TChunkState::DATA_RESERVED: + [[fallthrough]]; + case TChunkState::DATA_COMMITTED: + state.CommitState = TChunkState::DATA_COMMITTED; + break; + case TChunkState::DATA_ON_QUARANTINE: + case TChunkState::DATA_COMMITTED_ON_QUARANTINE: + // Do nothing + break; + default: + Y_FAIL_S("PDiskID# " << PDiskId << " can't commit chunkIdx# " << chunkIdx + << " as it is in unexpected CommitState# " << state.ToString()); + break; + } +} + +bool TPDisk::ValidateDeleteChunk(ui32 chunkIdx, TOwner owner, TStringStream& outErrorReason) { + TGuard<TMutex> guard(StateMutex); + if (chunkIdx >= ChunkState.size()) { + outErrorReason << "PDiskId# " << (ui32)PDiskId + << " Can't delete chunkIdx# " << (ui32)chunkIdx + << " > total# " << (ui32)ChunkState.size() + << " ownerId# " << (ui32)owner << "!" + << " Marker# BPD76"; + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, outErrorReason.Str()); + return false; + } + if (ChunkState[chunkIdx].OwnerId != owner) { + outErrorReason << "PDiskId# " << (ui32)PDiskId + << " Can't delete chunkIdx# " << (ui32)chunkIdx + << " ownerId# " << (ui32)owner + << " != trueOwnerId# " << (ui32)ChunkState[chunkIdx].OwnerId << "!" + << " Marker# BPD77"; + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, outErrorReason.Str()); + return false; + } + if (ChunkState[chunkIdx].CommitState != TChunkState::DATA_RESERVED + && ChunkState[chunkIdx].CommitState != TChunkState::DATA_COMMITTED) { + outErrorReason << "PDiskId# " << (ui32)PDiskId + << " Can't delete chunkIdx# " << (ui32)chunkIdx + << " in CommitState# " << ChunkState[chunkIdx].CommitState + << " ownerId# " << (ui32)owner << " Marker# BPD82"; + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, outErrorReason.Str()); + return false; + } + LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 + " Line# %" PRIu32 " Deletion of chunkIdx# %" PRIu32 " by ownerId# %" PRIu32 " is validated", + (ui32)PDiskId, (ui32)__LINE__, (ui32)chunkIdx, (ui32)owner); + return true; +} + +// Marks chunk deleted but does not move it to the free list. +void TPDisk::DeleteChunk(ui32 chunkIdx, TOwner owner) { + TGuard<TMutex> guard(StateMutex); + TChunkState &state = ChunkState[chunkIdx]; + switch (state.CommitState) { + // Chunk will be freed in TPDisk::ForceDeleteChunk() and may be released already + case TChunkState::FREE: + case TChunkState::DATA_ON_QUARANTINE: + break; + case TChunkState::DATA_RESERVED_DELETE_IN_PROGRESS: + [[fallthrough]]; + case TChunkState::DATA_COMMITTED_DELETE_IN_PROGRESS: + Y_VERIFY_S(state.CommitsInProgress == 0, + "PDiskId# " << PDiskId << " chunkIdx# " << chunkIdx << " state# " << state.ToString()); + LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " chunkIdx# %" PRIu32 + " deleted, ownerId# %" PRIu32 " -> %" PRIu32, + (ui32)PDiskId, (ui32)chunkIdx, (ui32)state.OwnerId, (ui32)OwnerUnallocated); + Y_VERIFY(state.OwnerId == owner); // TODO DELETE + state.OwnerId = OwnerUnallocated; + state.CommitState = TChunkState::FREE; + Keeper.PushFreeOwnerChunk(owner, chunkIdx); + break; + case TChunkState::DATA_COMMITTED_ON_QUARANTINE: + // Mark chunk as quarantine, so it will be released through default quarantine way + state.CommitState = TChunkState::DATA_ON_QUARANTINE; + break; + default: + Y_FAIL_S("PDiskID# " << PDiskId << " can't delete chunkIdx# " << chunkIdx + << " as it is in unexpected CommitState# " << state.ToString()); + } +} + +void TPDisk::OnLogCommitDone(TLogCommitDone &req) { + TGuard<TMutex> guard(StateMutex); + + for (ui32 chunk : req.CommitedChunks) { + CommitChunk(chunk); + } + for (ui32 chunk : req.DeletedChunks) { + DeleteChunk(chunk, req.OwnerId); + } + + // Decrement log chunk user counters and release unused log chunks + TOwnerData &ownerData = OwnerData[req.OwnerId]; + ui64 currentFirstLsnToKeep = ownerData.CurrentFirstLsnToKeep; + auto it = LogChunks.begin(); + bool isChunkReleased = false; + if (req.Lsn <= ownerData.LastWrittenCommitLsn) { + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId << " owner# " << req.OwnerId + << " VDiskId# " << ownerData.VDiskId.ToStringWOGeneration() << " found EvLog with lsn# " << req.Lsn + << " less then or equals to LastWrittenCommitLsn#" << ownerData.LastWrittenCommitLsn); + } + ownerData.LastWrittenCommitLsn = req.Lsn; + while (it != LogChunks.end() && it->OwnerLsnRange.size() > req.OwnerId) { + TLogChunkInfo::TLsnRange &range = it->OwnerLsnRange[req.OwnerId]; + if (range.IsPresent && range.LastLsn < currentFirstLsnToKeep) { + //Y_VERIFY(range.FirstLsn != range.LastLsn); + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << " Log chunkIdx# " << (ui32)it->ChunkIdx << " [" << (ui64)range.FirstLsn + << ", " << (ui64)range.LastLsn << "] dereferenced by ownerId# " << (ui32)req.OwnerId + << " CurrentLsnToKeep# " << (ui64)currentFirstLsnToKeep << " caused by Lsn# " << (ui64)req.Lsn + << " previous CurrentUserCount# " << it->CurrentUserCount + << " Marker# BPD27"); + range.IsPresent = false; + Y_VERIFY(it->CurrentUserCount > 0); + it->CurrentUserCount--; + if (it->CurrentUserCount == 0) { + isChunkReleased = true; + } + } + ++it; + } + if (isChunkReleased) { + THolder<TCompletionEventSender> completion(new TCompletionEventSender(this)); + if (ReleaseUnusedLogChunks(completion.Get())) { + WriteSysLogRestorePoint(completion.Release(), req.ReqId, {}); // FIXME: wilson + } + } + TryTrimChunk(false, 0); +} + +void TPDisk::MarkChunksAsReleased(TReleaseChunks& req) { + TGuard<TMutex> guard(StateMutex); + + if (req.IsChunksFromLogSplice) { + auto *releaseReq = ReqCreator.CreateFromArgs<TReleaseChunks>(std::move(req.ChunksToRelease)); + + auto flushAction = MakeHolder<TCompletionEventSender>(this, THolder<TReleaseChunks>(releaseReq)); + + ui64 nonce = req.GapStart->LastNonce; + ui32 desiredSectorIdx = UsableSectorsPerLogChunk(); + ui32 dataChunkSizeSectors = Format.ChunkSize / Format.SectorSize; + TLogWriter writer(Mon, *BlockDevice.Get(), Format, nonce, Format.LogKey, BufferPool.Get(), desiredSectorIdx, + dataChunkSizeSectors, Format.MagicLogChunk, req.GapStart->ChunkIdx, nullptr, desiredSectorIdx, + nullptr, ActorSystem, PDiskId, &DriveModel, Cfg->UseT1ha0HashInFooter, Cfg->EnableSectorEncryption); + + Y_VERIFY_S(req.GapEnd->PrevChunkLastNonce, "PDiskId# " << PDiskId + << "Zero GapEnd->PrevChunkLastNonce, chunkInfo# " << *req.GapEnd); + // +1 stands for -1 in logreader in old versions of pdisk + ui64 expectedNonce = req.GapEnd->PrevChunkLastNonce + 1; + req.GapEnd->IsEndOfSplice = true; + writer.WriteNextChunkReference(req.GapEnd->ChunkIdx, expectedNonce, flushAction.Release(), {}, {}); + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " write nextChunkReference, from GapStart chunkIdx# " << req.GapStart->ChunkIdx + << " to GapEnd# " << *req.GapEnd << " Marker# BPD81"); + } else { + for (const auto& chunkIdx : req.ChunksToRelease) { + Keeper.PushFreeOwnerChunk(OwnerSystem, chunkIdx); + } + IsLogChunksReleaseInflight = false; + + TryTrimChunk(false, 0); + } +} + +// Schedules EvReadLogResult event for the system log +void TPDisk::InitiateReadSysLog(const TActorId &pDiskActor) { + Y_VERIFY_S(PDiskThread.Running(), "expect PDiskThread to be running"); + Y_VERIFY_S(InitPhase == EInitPhase::Uninitialized, "expect InitPhase to be Uninitialized, but InitPhase# " + << InitPhase); + ui32 formatSectorsSize = FormatSectorSize * ReplicationFactor; + THolder<TEvReadFormatResult> evReadFormatResult(new TEvReadFormatResult(formatSectorsSize, UseHugePages)); + ui8 *formatSectors = evReadFormatResult->FormatSectors.Get(); + BlockDevice->PreadAsync(formatSectors, formatSectorsSize, 0, + new TCompletionEventSender(this, pDiskActor, evReadFormatResult.Release()), TReqId(TReqId::InitialFormatRead, 0), {}); *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::InitialFormatRead; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::BootingFormatRead; - InitPhase = EInitPhase::ReadingSysLog; -} - -void TPDisk::ProcessReadLogResult(const NPDisk::TEvReadLogResult &evReadLogResult, const TActorId &pDiskActor) { - TStringStream errStr; - if (evReadLogResult.Status != NKikimrProto::OK) { - LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << " evReadLogResult# " << evReadLogResult.ToString() - << " InitPhase# " << InitPhase - << " Marker# BPD01"); - switch (InitPhase) { - case EInitPhase::ReadingSysLog: + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::BootingFormatRead; + InitPhase = EInitPhase::ReadingSysLog; +} + +void TPDisk::ProcessReadLogResult(const NPDisk::TEvReadLogResult &evReadLogResult, const TActorId &pDiskActor) { + TStringStream errStr; + if (evReadLogResult.Status != NKikimrProto::OK) { + LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << " evReadLogResult# " << evReadLogResult.ToString() + << " InitPhase# " << InitPhase + << " Marker# BPD01"); + switch (InitPhase) { + case EInitPhase::ReadingSysLog: *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::InitialSysLogReadError; - *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorInitialSysLogRead; - errStr << "Error in initial sys log read" << Endl; - break; - case EInitPhase::ReadingLog: + *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorInitialSysLogRead; + errStr << "Error in initial sys log read" << Endl; + break; + case EInitPhase::ReadingLog: *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::InitialCommonLogReadError; - *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorInitialCommonLogRead; - errStr << "Error in initial common log read" << Endl; - break; - default: - break; - - } - errStr << "evReadLogResult# " << evReadLogResult.ToString() << " Marker# BPD88"; - ActorSystem->Send(pDiskActor, new TEvLogInitResult(false, errStr.Str())); - return; - } - - switch (InitPhase) { - case EInitPhase::ReadingSysLog: - { - ProcessChunk0(evReadLogResult); - - if (InitialSysLogWritePosition == 0) { + *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorInitialCommonLogRead; + errStr << "Error in initial common log read" << Endl; + break; + default: + break; + + } + errStr << "evReadLogResult# " << evReadLogResult.ToString() << " Marker# BPD88"; + ActorSystem->Send(pDiskActor, new TEvLogInitResult(false, errStr.Str())); + return; + } + + switch (InitPhase) { + case EInitPhase::ReadingSysLog: + { + ProcessChunk0(evReadLogResult); + + if (InitialSysLogWritePosition == 0) { *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::InitialSysLogParseError; - *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorInitialSysLogParse; - ActorSystem->Send(pDiskActor, new TEvLogInitResult(false, - "Error while parsing sys log at booting state")); - return; - } - // Parse the main log to obtain busy/free chunk lists + *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorInitialSysLogParse; + ActorSystem->Send(pDiskActor, new TEvLogInitResult(false, + "Error while parsing sys log at booting state")); + return; + } + // Parse the main log to obtain busy/free chunk lists *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::InitialCommonLogRead; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::BootingCommonLogRead; - ReadAndParseMainLog(pDiskActor); - InitPhase = EInitPhase::ReadingLog; - return; - } - case EInitPhase::ReadingLog: - { - InitialLogPosition = evReadLogResult.NextPosition; - if (InitialLogPosition == TLogPosition{0, 0}) { + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::BootingCommonLogRead; + ReadAndParseMainLog(pDiskActor); + InitPhase = EInitPhase::ReadingLog; + return; + } + case EInitPhase::ReadingLog: + { + InitialLogPosition = evReadLogResult.NextPosition; + if (InitialLogPosition == TLogPosition{0, 0}) { *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::InitialCommonLogParseError; - *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorInitialCommonLogParse; - ActorSystem->Send(pDiskActor, new TEvLogInitResult(false, - "Error while parsing common log at booting state")); - return; - } - // Prepare the FreeChunks list - InitFreeChunks(); - // Actualize LogChunks counters according to OwnerData - InitLogChunksInfo(); - - { - TGuard<TMutex> guard(StateMutex); - - // Check that there are no chunks for owners with no starting points - TSet<TOwner> chunkOwners; - for (size_t chunkIdx = 0; chunkIdx < ChunkState.size(); ++chunkIdx) { - TChunkState &state = ChunkState[chunkIdx]; - if (IsOwnerUser(state.OwnerId)) { - chunkOwners.insert(state.OwnerId); - } - } - for (auto it = chunkOwners.begin(); it != chunkOwners.end(); ++it) { - TOwnerData &data = OwnerData[*it]; - Y_VERIFY(data.VDiskId != TVDiskID::InvalidId); - if (data.StartingPoints.empty()) { - TStringStream str; - str << "PDiskId# " << (ui32)PDiskId - << " ownerId# " << (ui32)*it - << " Owns chunks, but has no starting points! ownedChunks# ["; - for (size_t chunkIdx = 0; chunkIdx < ChunkState.size(); ++chunkIdx) { - TChunkState &state = ChunkState[chunkIdx]; - if (state.OwnerId == *it) { - str << chunkIdx << ", "; - } - } - str << "]" << Endl; - Y_FAIL_S(str.Str()); - } - } - - // Set up UsedChunkCount for each owner - TVector<ui32> usedForOwner; - usedForOwner.resize(OwnerEndUser); - for (ui32 ownerId = OwnerBeginUser; ownerId < OwnerEndUser; ++ownerId) { - usedForOwner[ownerId] = 0; - } - for (size_t chunkIdx = 0; chunkIdx < ChunkState.size(); ++chunkIdx) { - TChunkState &state = ChunkState[chunkIdx]; - if (IsOwnerUser(state.OwnerId)) { - usedForOwner[state.OwnerId]++; - } - } - - // Reset chunk trackers - TKeeperParams params; - params.TotalChunks = Format.DiskSizeChunks(); - params.ExpectedOwnerCount = Cfg->ExpectedSlotCount; - params.SysLogSize = Format.SystemChunkCount; // sysLogSize = chunk 0 + additional SysLog chunks - params.CommonLogSize = LogChunks.size(); - params.SpaceColorBorder = Cfg->SpaceColorBorder; - for (ui32 ownerId = OwnerBeginUser; ownerId < OwnerEndUser; ++ownerId) { - if (OwnerData[ownerId].VDiskId != TVDiskID::InvalidId) { - params.OwnersInfo[ownerId] = {usedForOwner[ownerId], OwnerData[ownerId].VDiskId}; - if (OwnerData[ownerId].IsStaticGroupOwner()) { - params.HasStaticGroups = true; - } - } - } - - TString errorReason; - bool isOk = Keeper.Reset(params, errorReason); - - if (!isOk) { + *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorInitialCommonLogParse; + ActorSystem->Send(pDiskActor, new TEvLogInitResult(false, + "Error while parsing common log at booting state")); + return; + } + // Prepare the FreeChunks list + InitFreeChunks(); + // Actualize LogChunks counters according to OwnerData + InitLogChunksInfo(); + + { + TGuard<TMutex> guard(StateMutex); + + // Check that there are no chunks for owners with no starting points + TSet<TOwner> chunkOwners; + for (size_t chunkIdx = 0; chunkIdx < ChunkState.size(); ++chunkIdx) { + TChunkState &state = ChunkState[chunkIdx]; + if (IsOwnerUser(state.OwnerId)) { + chunkOwners.insert(state.OwnerId); + } + } + for (auto it = chunkOwners.begin(); it != chunkOwners.end(); ++it) { + TOwnerData &data = OwnerData[*it]; + Y_VERIFY(data.VDiskId != TVDiskID::InvalidId); + if (data.StartingPoints.empty()) { + TStringStream str; + str << "PDiskId# " << (ui32)PDiskId + << " ownerId# " << (ui32)*it + << " Owns chunks, but has no starting points! ownedChunks# ["; + for (size_t chunkIdx = 0; chunkIdx < ChunkState.size(); ++chunkIdx) { + TChunkState &state = ChunkState[chunkIdx]; + if (state.OwnerId == *it) { + str << chunkIdx << ", "; + } + } + str << "]" << Endl; + Y_FAIL_S(str.Str()); + } + } + + // Set up UsedChunkCount for each owner + TVector<ui32> usedForOwner; + usedForOwner.resize(OwnerEndUser); + for (ui32 ownerId = OwnerBeginUser; ownerId < OwnerEndUser; ++ownerId) { + usedForOwner[ownerId] = 0; + } + for (size_t chunkIdx = 0; chunkIdx < ChunkState.size(); ++chunkIdx) { + TChunkState &state = ChunkState[chunkIdx]; + if (IsOwnerUser(state.OwnerId)) { + usedForOwner[state.OwnerId]++; + } + } + + // Reset chunk trackers + TKeeperParams params; + params.TotalChunks = Format.DiskSizeChunks(); + params.ExpectedOwnerCount = Cfg->ExpectedSlotCount; + params.SysLogSize = Format.SystemChunkCount; // sysLogSize = chunk 0 + additional SysLog chunks + params.CommonLogSize = LogChunks.size(); + params.SpaceColorBorder = Cfg->SpaceColorBorder; + for (ui32 ownerId = OwnerBeginUser; ownerId < OwnerEndUser; ++ownerId) { + if (OwnerData[ownerId].VDiskId != TVDiskID::InvalidId) { + params.OwnersInfo[ownerId] = {usedForOwner[ownerId], OwnerData[ownerId].VDiskId}; + if (OwnerData[ownerId].IsStaticGroupOwner()) { + params.HasStaticGroups = true; + } + } + } + + TString errorReason; + bool isOk = Keeper.Reset(params, errorReason); + + if (!isOk) { *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::ChunkQuotaError; - *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorCalculatingChunkQuotas; - ActorSystem->Send(pDiskActor, new TEvLogInitResult(false, errorReason)); - return; - } - } - - // Increase Nonces to prevent collisions - NPrivate::TMersenne64 randGen(Seed()); - do { - for (ui32 i = 0; i < NonceCount; ++i) { - SysLogRecord.Nonces.Value[i] += ForceLogNonceDiff.Value[i] + 1 + randGen.GenRand() % ForceLogNonceDiff.Value[i]; - } - } while (SysLogRecord.Nonces.Value[NonceLog] <= InitialPreviousNonce); - InitSysLogger(); - - InitPhase = EInitPhase::Initialized; - if (!InitCommonLogger()) { - // TODO: report red zone + *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorCalculatingChunkQuotas; + ActorSystem->Send(pDiskActor, new TEvLogInitResult(false, errorReason)); + return; + } + } + + // Increase Nonces to prevent collisions + NPrivate::TMersenne64 randGen(Seed()); + do { + for (ui32 i = 0; i < NonceCount; ++i) { + SysLogRecord.Nonces.Value[i] += ForceLogNonceDiff.Value[i] + 1 + randGen.GenRand() % ForceLogNonceDiff.Value[i]; + } + } while (SysLogRecord.Nonces.Value[NonceLog] <= InitialPreviousNonce); + InitSysLogger(); + + InitPhase = EInitPhase::Initialized; + if (!InitCommonLogger()) { + // TODO: report red zone *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::CommonLoggerInitError; - *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorCommonLoggerInit; - ActorSystem->Send(pDiskActor, new TEvLogInitResult(false, "Error in common logger init")); - return; - } - - // Now it's ok to write both logs and data. + *Mon.PDiskBriefState = TPDiskMon::TPDisk::Error; + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::ErrorCommonLoggerInit; + ActorSystem->Send(pDiskActor, new TEvLogInitResult(false, "Error in common logger init")); + return; + } + + // Now it's ok to write both logs and data. *Mon.PDiskState = NKikimrBlobStorage::TPDiskState::Normal; - *Mon.PDiskBriefState = TPDiskMon::TPDisk::OK; - *Mon.PDiskDetailedState = TPDiskMon::TPDisk::EverythingIsOk; - - auto completion = MakeHolder<TCompletionEventSender>(this, pDiskActor, new TEvLogInitResult(true, "OK")); - ReleaseUnusedLogChunks(completion.Get()); - WriteSysLogRestorePoint(completion.Release(), TReqId(TReqId::AfterInitCommonLoggerSysLog, 0), {}); - - // Output the fully initialized state for each owner and each chunk. - LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " Successfully started"); - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " Startup owner info# " << StartupOwnerInfo()); - - return; - } - default: - Y_FAIL_S("Unexpected InitPhase# " << InitPhase); - } -} - -} + *Mon.PDiskBriefState = TPDiskMon::TPDisk::OK; + *Mon.PDiskDetailedState = TPDiskMon::TPDisk::EverythingIsOk; + + auto completion = MakeHolder<TCompletionEventSender>(this, pDiskActor, new TEvLogInitResult(true, "OK")); + ReleaseUnusedLogChunks(completion.Get()); + WriteSysLogRestorePoint(completion.Release(), TReqId(TReqId::AfterInitCommonLoggerSysLog, 0), {}); + + // Output the fully initialized state for each owner and each chunk. + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " Successfully started"); + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " Startup owner info# " << StartupOwnerInfo()); + + return; + } + default: + Y_FAIL_S("Unexpected InitPhase# " << InitPhase); + } +} + +} diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_internal_interface.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_internal_interface.cpp index 0bdf61d761..43b0c69ae9 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_internal_interface.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_internal_interface.cpp @@ -1,31 +1,31 @@ #include <ydb/core/node_whiteboard/node_whiteboard.h> #include <ydb/core/blobstorage/base/blobstorage_events.h> #include <ydb/core/protos/blobstorage.pb.h> -#include <util/stream/str.h> - -#include "blobstorage_pdisk_internal_interface.h" - -namespace NKikimr { -namespace NPDisk { - +#include <util/stream/str.h> + +#include "blobstorage_pdisk_internal_interface.h" + +namespace NKikimr { +namespace NPDisk { + TEvWhiteboardReportResult::~TEvWhiteboardReportResult() = default; -TString TEvWhiteboardReportResult::ToString(const TEvWhiteboardReportResult &record) { - TStringStream str; - str << "{"; - if (record.PDiskState) { - str << "PDiskState# " << record.PDiskState->Record; - } +TString TEvWhiteboardReportResult::ToString(const TEvWhiteboardReportResult &record) { + TStringStream str; + str << "{"; + if (record.PDiskState) { + str << "PDiskState# " << record.PDiskState->Record; + } for (const auto& p : record.VDiskStateVect) { - str << " VDiskState# " << std::get<1>(p); - } - if (record.DiskMetrics) { + str << " VDiskState# " << std::get<1>(p); + } + if (record.DiskMetrics) { str << " DiskMetrics# " << record.DiskMetrics->Record; - } - str << "}"; - return str.Str(); -} - -} // NPDisk -} // NKikimr - + } + str << "}"; + return str.Str(); +} + +} // NPDisk +} // NKikimr + diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_internal_interface.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_internal_interface.h index 0defb442d8..df2f088569 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_internal_interface.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_internal_interface.h @@ -1,54 +1,54 @@ -#pragma once -#include "defs.h" +#pragma once +#include "defs.h" #include "blobstorage_pdisk_defs.h" - + #include <ydb/core/node_whiteboard/node_whiteboard.h> #include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice.h> - + #include <ydb/library/pdisk_io/buffers.h> #include <library/cpp/actors/core/mon.h> - - -namespace NKikimr { -namespace NPDisk { + + +namespace NKikimr { +namespace NPDisk { //////////////////////////////////////////////////////////////////////////// // Whiteboard report // //////////////////////////////////////////////////////////////////////////// - + struct TEvWhiteboardReportResult : public TEventLocal<TEvWhiteboardReportResult, TEvBlobStorage::EvWhiteboardReportResult> { - THolder<NNodeWhiteboard::TEvWhiteboard::TEvPDiskStateUpdate> PDiskState; - TVector<std::tuple<TActorId, NKikimrWhiteboard::TVDiskStateInfo>> VDiskStateVect; - THolder<TEvBlobStorage::TEvControllerUpdateDiskStatus> DiskMetrics; - + THolder<NNodeWhiteboard::TEvWhiteboard::TEvPDiskStateUpdate> PDiskState; + TVector<std::tuple<TActorId, NKikimrWhiteboard::TVDiskStateInfo>> VDiskStateVect; + THolder<TEvBlobStorage::TEvControllerUpdateDiskStatus> DiskMetrics; + ~TEvWhiteboardReportResult(); - + TString ToString() const { return ToString(*this); } - + static TString ToString(const TEvWhiteboardReportResult &record); }; - + //////////////////////////////////////////////////////////////////////////// // Http result // //////////////////////////////////////////////////////////////////////////// - + struct TEvHttpInfoResult : public TEventLocal<TEvHttpInfoResult, TEvBlobStorage::EvHttpInfoResult> { TAutoPtr<NMon::TEvHttpInfoRes> HttpInfoRes; const TActorId EndCustomer; - + TEvHttpInfoResult(const TActorId &endCustomer) : HttpInfoRes(nullptr) , EndCustomer(endCustomer) {} - + TString ToString() const { return ToString(*this); } - + static TString ToString(const TEvHttpInfoResult &record) { TStringStream str; str << "{"; @@ -57,144 +57,144 @@ struct TEvHttpInfoResult : public TEventLocal<TEvHttpInfoResult, TEvBlobStorage: return str.Str(); } }; - -struct TEvPDiskFormattingFinished : public TEventLocal<TEvPDiskFormattingFinished, TEvBlobStorage::EvPDiskFormattingFinished> { - bool IsSucceed; - TString ErrorStr; - - TEvPDiskFormattingFinished(bool isSucceed, const TString &errorStr) - : IsSucceed(isSucceed) - , ErrorStr(errorStr) - {} - - TString ToString() const { - return ToString(*this); - } - - static TString ToString(const TEvPDiskFormattingFinished &record) { - Y_UNUSED(record); - TStringStream str; - str << "{"; - str << "EvFormattingFinished"; - str << "}"; - return str.Str(); - } -}; - - -//////////////////////////////////////////////////////////////////////////// -// This event is used for continuing log reading if it is not possible -// to read in one IO device operation -//////////////////////////////////////////////////////////////////////////// - -struct TEvReadLogContinue : public TEventLocal<TEvReadLogContinue, TEvBlobStorage::EvReadLogContinue> { - void *Data; - ui32 Size; - ui64 Offset; - TCompletionAction *CompletionAction; + +struct TEvPDiskFormattingFinished : public TEventLocal<TEvPDiskFormattingFinished, TEvBlobStorage::EvPDiskFormattingFinished> { + bool IsSucceed; + TString ErrorStr; + + TEvPDiskFormattingFinished(bool isSucceed, const TString &errorStr) + : IsSucceed(isSucceed) + , ErrorStr(errorStr) + {} + + TString ToString() const { + return ToString(*this); + } + + static TString ToString(const TEvPDiskFormattingFinished &record) { + Y_UNUSED(record); + TStringStream str; + str << "{"; + str << "EvFormattingFinished"; + str << "}"; + return str.Str(); + } +}; + + +//////////////////////////////////////////////////////////////////////////// +// This event is used for continuing log reading if it is not possible +// to read in one IO device operation +//////////////////////////////////////////////////////////////////////////// + +struct TEvReadLogContinue : public TEventLocal<TEvReadLogContinue, TEvBlobStorage::EvReadLogContinue> { + void *Data; + ui32 Size; + ui64 Offset; + TCompletionAction *CompletionAction; TReqId ReqId; - + TEvReadLogContinue(void *data, ui32 size, ui64 offset, TCompletionAction *completionAction, TReqId reqId) - : Data(data) - , Size(size) - , Offset(offset) - , CompletionAction(completionAction) - , ReqId(reqId) - {} - - TString ToString() const { - return ToString(*this); - } - - static TString ToString(const TEvReadLogContinue &record) { - TStringStream str; - str << "{"; - str << "EvReadLogContinue "; - str << "Size# " << record.Size << " "; - str << "Offset# " << record.Offset << " "; - str << "ReqId# " << record.ReqId << " "; - str << "}"; - return str.Str(); - } -}; - -//////////////////////////////////////////////////////////////////////////// -// This event is used for restoring broken sectors found while reading log -// -//////////////////////////////////////////////////////////////////////////// - -struct TEvLogSectorRestore : public TEventLocal<TEvLogSectorRestore, TEvBlobStorage::EvLogSectorRestore> { - void *Data; - ui32 Size; - ui64 Offset; - TCompletionAction *CompletionAction; - - TEvLogSectorRestore(void *data, ui32 size, ui64 offset, TCompletionAction *completionAction) - : Data(data) - , Size(size) - , Offset(offset) - , CompletionAction(completionAction) - {} - - TString ToString() const { - return ToString(*this); - } - - static TString ToString(const TEvLogSectorRestore &record) { - TStringStream str; - str << "{"; - str << "EvLogSectorRestore "; - str << "Size# " << record.Size << " "; - str << "Offset# " << record.Offset << " "; - str << "}"; - return str.Str(); - } -}; - -//////////////////////////////////////////////////////////////////////////// -// Event is used for returning result of processing previously read log -// -//////////////////////////////////////////////////////////////////////////// - -struct TEvLogInitResult : public TEventLocal<TEvLogInitResult, TEvBlobStorage::EvLogInitResult> { - bool IsInitializedGood = false; - TString ErrorStr; - - TEvLogInitResult(bool isInitializedGood, TString error) - : IsInitializedGood(isInitializedGood) - , ErrorStr(error) - {} - - TString ToString() const { - return ToString(*this); - } - - static TString ToString(const TEvLogInitResult &record) { - Y_UNUSED(record); - TStringStream str; - str << "{"; - str << "EvLogInitResult"; - str << "}"; - return str.Str(); - } -}; - -struct TEvReadFormatResult : public TEventLocal<TEvReadFormatResult, TEvBlobStorage::EvReadFormatResult> { - TAlignedData FormatSectors; - ui32 FormatSectorsSize; - TEvReadFormatResult(ui32 formatSectorsSize, bool useHugePages) - : FormatSectors(formatSectorsSize, useHugePages) - , FormatSectorsSize(formatSectorsSize) - {} -}; - -struct TEvDeviceError : public TEventLocal<TEvDeviceError, TEvBlobStorage::EvDeviceError> { - TString Info; - - TEvDeviceError(const TString& info) - : Info(info) - {} -}; - -} // NPDisk -} // NKikimr + : Data(data) + , Size(size) + , Offset(offset) + , CompletionAction(completionAction) + , ReqId(reqId) + {} + + TString ToString() const { + return ToString(*this); + } + + static TString ToString(const TEvReadLogContinue &record) { + TStringStream str; + str << "{"; + str << "EvReadLogContinue "; + str << "Size# " << record.Size << " "; + str << "Offset# " << record.Offset << " "; + str << "ReqId# " << record.ReqId << " "; + str << "}"; + return str.Str(); + } +}; + +//////////////////////////////////////////////////////////////////////////// +// This event is used for restoring broken sectors found while reading log +// +//////////////////////////////////////////////////////////////////////////// + +struct TEvLogSectorRestore : public TEventLocal<TEvLogSectorRestore, TEvBlobStorage::EvLogSectorRestore> { + void *Data; + ui32 Size; + ui64 Offset; + TCompletionAction *CompletionAction; + + TEvLogSectorRestore(void *data, ui32 size, ui64 offset, TCompletionAction *completionAction) + : Data(data) + , Size(size) + , Offset(offset) + , CompletionAction(completionAction) + {} + + TString ToString() const { + return ToString(*this); + } + + static TString ToString(const TEvLogSectorRestore &record) { + TStringStream str; + str << "{"; + str << "EvLogSectorRestore "; + str << "Size# " << record.Size << " "; + str << "Offset# " << record.Offset << " "; + str << "}"; + return str.Str(); + } +}; + +//////////////////////////////////////////////////////////////////////////// +// Event is used for returning result of processing previously read log +// +//////////////////////////////////////////////////////////////////////////// + +struct TEvLogInitResult : public TEventLocal<TEvLogInitResult, TEvBlobStorage::EvLogInitResult> { + bool IsInitializedGood = false; + TString ErrorStr; + + TEvLogInitResult(bool isInitializedGood, TString error) + : IsInitializedGood(isInitializedGood) + , ErrorStr(error) + {} + + TString ToString() const { + return ToString(*this); + } + + static TString ToString(const TEvLogInitResult &record) { + Y_UNUSED(record); + TStringStream str; + str << "{"; + str << "EvLogInitResult"; + str << "}"; + return str.Str(); + } +}; + +struct TEvReadFormatResult : public TEventLocal<TEvReadFormatResult, TEvBlobStorage::EvReadFormatResult> { + TAlignedData FormatSectors; + ui32 FormatSectorsSize; + TEvReadFormatResult(ui32 formatSectorsSize, bool useHugePages) + : FormatSectors(formatSectorsSize, useHugePages) + , FormatSectorsSize(formatSectorsSize) + {} +}; + +struct TEvDeviceError : public TEventLocal<TEvDeviceError, TEvBlobStorage::EvDeviceError> { + TString Info; + + TEvDeviceError(const TString& info) + : Info(info) + {} +}; + +} // NPDisk +} // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_keeper.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_keeper.h index 646e7d7654..8c298a672d 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_keeper.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_keeper.h @@ -1,12 +1,12 @@ #pragma once #include "defs.h" -#include "blobstorage_pdisk_defs.h" -#include "blobstorage_pdisk_config.h" -#include "blobstorage_pdisk_chunk_tracker.h" -#include "blobstorage_pdisk_free_chunks.h" -#include "blobstorage_pdisk_keeper_params.h" -#include "blobstorage_pdisk_mon.h" +#include "blobstorage_pdisk_defs.h" +#include "blobstorage_pdisk_config.h" +#include "blobstorage_pdisk_chunk_tracker.h" +#include "blobstorage_pdisk_free_chunks.h" +#include "blobstorage_pdisk_keeper_params.h" +#include "blobstorage_pdisk_mon.h" namespace NKikimr { namespace NPDisk { @@ -33,8 +33,8 @@ public: , Cfg(cfg) , UntrimmedFreeChunks(Mon.UntrimmedFreeChunks, cfg->SortFreeChunksPerItems) , TrimmedFreeChunks(Mon.FreeChunks, cfg->SortFreeChunksPerItems) - , ChunkTracker() - {} + , ChunkTracker() + {} // // Initialization @@ -56,8 +56,8 @@ public: // Add/remove owner // - void AddOwner(TOwner owner, TVDiskID vdiskId) { - ChunkTracker.AddOwner(owner, vdiskId); + void AddOwner(TOwner owner, TVDiskID vdiskId) { + ChunkTracker.AddOwner(owner, vdiskId); } void RemoveOwner(TOwner owner) { @@ -75,18 +75,18 @@ public: return TrimmedFreeChunks.Size(); } - i64 GetOwnerHardLimit(TOwner owner) const { + i64 GetOwnerHardLimit(TOwner owner) const { return ChunkTracker.GetOwnerHardLimit(owner); } - i64 GetOwnerFree(TOwner owner) const { + i64 GetOwnerFree(TOwner owner) const { return ChunkTracker.GetOwnerFree(owner); } - i64 GetOwnerUsed(TOwner owner) const { - return ChunkTracker.GetOwnerUsed(owner); - } - + i64 GetOwnerUsed(TOwner owner) const { + return ChunkTracker.GetOwnerUsed(owner); + } + TChunkIdx PopOwnerFreeChunk(TOwner owner, TString &outErrorReason) { if (ChunkTracker.TryAllocate(owner, 1, outErrorReason)) { TChunkIdx idx = PopFree(outErrorReason); @@ -99,23 +99,23 @@ public: } } - TVector<TChunkIdx> PopOwnerFreeChunks(TOwner owner, ui32 chunkCount, TString &outErrorReason) { - TVector<TChunkIdx> chunks; + TVector<TChunkIdx> PopOwnerFreeChunks(TOwner owner, ui32 chunkCount, TString &outErrorReason) { + TVector<TChunkIdx> chunks; if (ChunkTracker.TryAllocate(owner, chunkCount, outErrorReason)) { - chunks.resize(chunkCount); + chunks.resize(chunkCount); for (ui32 i = 0; i < chunkCount; ++i) { TChunkIdx idx = PopFree(outErrorReason); if (idx == 0) { for (ui32 f = 0; f < i; ++f) { - UntrimmedFreeChunks.Push(chunks[f]); + UntrimmedFreeChunks.Push(chunks[f]); } ChunkTracker.Release(owner, chunkCount); - return {}; + return {}; } - chunks[i] = idx; + chunks[i] = idx; } } - return chunks; + return chunks; } void PushFreeOwnerChunk(TOwner owner, TChunkIdx chunkIdx) { @@ -124,12 +124,12 @@ public: ChunkTracker.Release(owner, 1); } - TStatusFlags GetSpaceStatusFlags(TOwner owner) const { + TStatusFlags GetSpaceStatusFlags(TOwner owner) const { return ChunkTracker.GetSpaceStatusFlags(owner); } - NKikimrBlobStorage::TPDiskSpaceColor::E EstimateSpaceColor(TOwner owner, i64 allocationSize) const { - return ChunkTracker.EstimateSpaceColor(owner, allocationSize); + NKikimrBlobStorage::TPDiskSpaceColor::E EstimateSpaceColor(TOwner owner, i64 allocationSize) const { + return ChunkTracker.EstimateSpaceColor(owner, allocationSize); } // @@ -157,8 +157,8 @@ public: // // GUI // - void PrintHTML(IOutputStream &str) { - ChunkTracker.PrintHTML(str); + void PrintHTML(IOutputStream &str) { + ChunkTracker.PrintHTML(str); } protected: diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_keeper_params.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_keeper_params.h index dc386f8ba3..cc35e80b81 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_keeper_params.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_keeper_params.h @@ -12,11 +12,11 @@ namespace NPDisk { // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct TOwnerInfo { - i64 ChunksOwned; - TVDiskID VDiskId; -}; - +struct TOwnerInfo { + i64 ChunksOwned; + TVDiskID VDiskId; +}; + struct TKeeperParams { // Total number of chunks of the disk i64 TotalChunks = 0; @@ -34,9 +34,9 @@ struct TKeeperParams { bool HasStaticGroups = false; // Initially owned chunk count for each owner, must be present for all currently present owners - TMap<TOwner, TOwnerInfo> OwnersInfo; - - NKikimrBlobStorage::TPDiskSpaceColor::E SpaceColorBorder = NKikimrBlobStorage::TPDiskSpaceColor::GREEN; + TMap<TOwner, TOwnerInfo> OwnersInfo; + + NKikimrBlobStorage::TPDiskSpaceColor::E SpaceColorBorder = NKikimrBlobStorage::TPDiskSpaceColor::GREEN; }; } // NPDisk diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader.cpp index d8b74c022b..caf4b581fa 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader.cpp @@ -1,1215 +1,1215 @@ -#include "defs.h" -#include "blobstorage_pdisk_crypto.h" -#include "blobstorage_pdisk_impl.h" -#include "blobstorage_pdisk_logreader.h" -#include "blobstorage_pdisk_sectorrestorator.h" - - -namespace NKikimr { -namespace NPDisk { - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TPDisk log reading -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -// -// Called by the TLogReader on success with the current chunkOwnerMap -// -void TPDisk::ProcessChunkOwnerMap(TMap<ui32, TChunkState> &chunkOwnerMap) { - TGuard<TMutex> guard(StateMutex); - auto print = [&] () { - std::map<TOwner, std::vector<ui32>> ownerToChunks; - for (const auto& [idx, state] : chunkOwnerMap) { - ownerToChunks[state.OwnerId].push_back(idx); - } - - TStringStream str; - str << "PDiskId# " << PDiskId << " ProcessChunkOwnerMap; "; - for (auto& [owner, chunks] : ownerToChunks) { - std::sort(chunks.begin(), chunks.end()); - str << " Owner# " << owner << " ["; - bool first = true; - for (auto idx : chunks) { - str << (std::exchange(first, false) ? "" : " ") << idx; - } - str << "];"; - - } - return str.Str(); - }; - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, print()); - - for (TMap<ui32, TChunkState>::iterator it = chunkOwnerMap.begin(); it != chunkOwnerMap.end(); ++it) { - ui32 chunkIdx = it->first; - TOwner ownerId = it->second.OwnerId; - ui64 chunkNonce = it->second.Nonce; - TChunkState &state = ChunkState[chunkIdx]; - - if (state.OwnerId == OwnerSystem && IsOwnerAllocated(ownerId)) { - // The situation is: - // ChunkState record states that the chunk is used by System (log or syslog/format) - // OwnerMap states that the chunk is used by some user - - // Make sure the chunk is not really a part of syslog/format - Y_VERIFY_S(chunkIdx > Format.SystemChunkCount, "PDiskId# " << PDiskId << " chunkIdx# " << chunkIdx - << " SystemChunkCount# " << Format.SystemChunkCount); - - // Make sure the chunk is not really a part of the log - for (const auto& logChunk : LogChunks) { - if (logChunk.ChunkIdx == chunkIdx) { - TStringStream out; - out << "PDiskId# " << PDiskId << " chunkIdx# " << chunkIdx; - out << " is a part of the log and is owned by user, ownerIdx# " << ownerId; - out << " LogChunks# {"; - for (const auto& chunk : LogChunks) { - out << chunk << ", "; - } - out << "}"; - Y_FAIL_S(out.Str()); - } - } - // After all, it looks like it's OK - } - - if (state.OwnerId != OwnerSystem || state.OwnerId == ownerId) { - if (IsOwnerUser(state.OwnerId) && state.CommitState == TChunkState::DATA_COMMITTED) { - Mon.CommitedDataChunks->Dec(); - LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 - " Line# %" PRIu32 " --CommitedDataChunks# %" PRIi64 " chunkIdx# %" PRIu32 " prev ownerId# %" PRIu32, - (ui32)PDiskId, (ui32)__LINE__, (i64)Mon.CommitedDataChunks->Val(), (ui32)chunkIdx, - (ui32)state.OwnerId); - } - state.OwnerId = ownerId; - state.Nonce = chunkNonce; - if (IsOwnerAllocated(ownerId)) { - state.CommitState = TChunkState::DATA_COMMITTED; - if (IsOwnerUser(ownerId)) { - Mon.CommitedDataChunks->Inc(); - LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 - " Line# %" PRIu32 " ++CommitedDataChunks# %" PRIi64 " chunkIdx# %" PRIu32 " ownerId# %" PRIu32, - (ui32)PDiskId, (ui32)__LINE__, (i64)Mon.CommitedDataChunks->Val(), (ui32)chunkIdx, - (ui32)ownerId); - } - } else { - state.CommitState = TChunkState::FREE; - } - } - } -} - -void TPDisk::ProcessReadLogRecord(TLogRecordHeader &header, TString &data, NPDisk::TOwner owner, ui64 nonce, - NPDisk::TEvReadLogResult* result, TMap<ui32, TChunkState> *outChunkOwnerMap, bool isInitial, - bool parseCommitMessage) { - - if (isInitial || header.OwnerId == owner) { - if (header.Signature.HasCommitRecord()) { - TCommitRecordFooter *footer = (TCommitRecordFooter*) - ((ui8*)data.data() + data.size() - sizeof(TCommitRecordFooter)); - ui32 *deletes = (ui32*)((ui8*)footer - footer->DeleteCount * sizeof(ui32)); - ui64 *commitNonces = (ui64*)((ui8*)deletes - footer->CommitCount * sizeof(ui64)); - ui32 *commits = (ui32*)((ui8*)commitNonces - footer->CommitCount * sizeof(ui32)); - { - TGuard<TMutex> guard(StateMutex); - TOwnerData &ownerData = OwnerData[header.OwnerId]; - if (isInitial) { - if (parseCommitMessage) { - for (ui32 i = 0; i < footer->DeleteCount; ++i) { - const ui32 chunkId = ReadUnaligned<ui32>(&deletes[i]); - (*outChunkOwnerMap)[chunkId].OwnerId = OwnerUnallocated; - } - for (ui32 i = 0; i < footer->CommitCount; ++i) { - const ui32 chunkId = ReadUnaligned<ui32>(&commits[i]); - (*outChunkOwnerMap)[chunkId].OwnerId = header.OwnerId; - (*outChunkOwnerMap)[chunkId].Nonce = ReadUnaligned<ui64>(&commitNonces[i]); - } - } - if (ownerData.VDiskId != TVDiskID::InvalidId) { - if (ownerData.CurrentFirstLsnToKeep < footer->FirstLsnToKeep) { - LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 - " ProcessReadLogRecord ownerId# %" PRIu32 " set FirstLsnToKeep# %" PRIu64 - " caused by Lsn# %" PRIu64, (ui32)PDiskId, (ui32)header.OwnerId, - (ui64)footer->FirstLsnToKeep, (ui64)header.OwnerLsn); - ownerData.CurrentFirstLsnToKeep = footer->FirstLsnToKeep; - } - ownerData.LogRecordsInitiallyRead++; - } - } else { - if (ownerData.VDiskId != TVDiskID::InvalidId) { - ownerData.LogRecordsConsequentlyRead++; - } - } - } - - data.resize(footer->UserDataSize); - if (isInitial && footer->IsStartingPoint) { - TGuard<TMutex> guard(StateMutex); - TOwnerData &ownerData = OwnerData[header.OwnerId]; - if (ownerData.VDiskId != TVDiskID::InvalidId) { - TLogSignature unmasked = header.Signature.GetUnmasked(); - ownerData.StartingPoints[unmasked] = NPDisk::TLogRecord(unmasked, data, header.OwnerLsn); - } - } - } else { - TGuard<TMutex> guard(StateMutex); - TOwnerData &ownerData = OwnerData[header.OwnerId]; - if (ownerData.VDiskId != TVDiskID::InvalidId) { - if (isInitial) { - ownerData.LogRecordsInitiallyRead++; - } else { - ownerData.LogRecordsConsequentlyRead++; - } - } - } - - if (header.OwnerId == owner) { - { - TGuard<TMutex> guard(StateMutex); - TOwnerData &ownerData = OwnerData[header.OwnerId]; - if (ownerData.VDiskId != TVDiskID::InvalidId) { - if (!ownerData.IsNextLsnOk(header.OwnerLsn)) { - TStringStream str; - str << "Lsn reversal! PDiskId# " << (ui32)PDiskId - << " ownerId# " << (ui32)owner - << " LogStartPosition# " << ownerData.LogStartPosition - << " LastSeenLsn# " << ownerData.LastSeenLsn - << " header.OwnerLsn# " << header.OwnerLsn - << " nonce# " << nonce - << Endl; - Y_FAIL_S(str.Str()); - } - ownerData.LastSeenLsn = header.OwnerLsn; - } - } - result->Results.push_back(NPDisk::TLogRecord(header.Signature.GetUnmasked(), data, header.OwnerLsn)); - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TLogReader part -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -struct TLogReader::TSectorData { - ui64 Offset; - TBuffer::TPtr Buffer; - ui32 DataSize; - ui64 PreparedOffset; - ui64 PreparedSize; - bool IsScheduled; - - TSectorData(TBuffer *buffer, ui32 dataSize) - : Offset(0) - , Buffer(buffer) - , DataSize(dataSize) - , PreparedOffset(0) - , PreparedSize(0) - , IsScheduled(false) - {} - - TString ToString() { - TStringStream str; - str << "{Offset# " << Offset - << " Buffer->Data()# " << (void*)Buffer->Data() - << " DataSize# " << DataSize - << " PreparedOffset# " << PreparedOffset - << " PreparedSize# " << PreparedSize - << " IsScheduled# " << (IsScheduled ? "true" : "false") - << "}"; - return str.Str(); - } - - ui8* GetData() { - return Buffer->Data() + (Offset - PreparedOffset); - } - - bool IsAvailable(ui64 offset, ui64 size) const { - return PreparedOffset <= offset && offset + size <= PreparedOffset + PreparedSize; - } - - bool IsAvailable(ui64 size) const { - return IsAvailable(Offset, size); - } - - ui8* GetDataIfAvailable(ui64 offset, ui64 size) { - return IsAvailable(offset, size) ? GetData() : nullptr; - } - - bool SetOffset(ui64 newOffset) { - bool isValid = false; - if (PreparedOffset <= newOffset && newOffset < PreparedOffset + PreparedSize) { - if (newOffset < Offset) { - PreparedOffset = 0; - PreparedSize = 0; - } else { - isValid = true; - } - } - Offset = newOffset; - return isValid; - } - - void Prepare(ui64 size) { - PreparedOffset = Offset; - PreparedSize = size; - } -}; - -class TLogReader::TDoubleBuffer { - THolder<TSectorData> SectorA; - THolder<TSectorData> SectorB; - const ui64 PDiskSectorSize; - -public: - TDoubleBuffer(TPDisk *pdisk) - : SectorA(MakeHolder<TSectorData>(pdisk->BufferPool->Pop(), pdisk->Format.SectorSize * BufferSizeSectors)) - , SectorB(MakeHolder<TSectorData>(pdisk->BufferPool->Pop(), pdisk->Format.SectorSize * BufferSizeSectors)) - , PDiskSectorSize(pdisk->Format.SectorSize) - {} - - ui64 BufferIdxFromOffset(ui64 innerOffset) const { - return (innerOffset / (PDiskSectorSize * BufferSizeSectors)) % 2; - } - - TSectorData *DataByOffset(ui64 offset) const { - return DataByIdx(BufferIdxFromOffset(offset)); - } - - TSectorData *DataByIdx(ui32 idx) const { - if (idx % 2) { - return SectorB.Get(); - } else { - return SectorA.Get(); - } - } -}; - +#include "defs.h" +#include "blobstorage_pdisk_crypto.h" +#include "blobstorage_pdisk_impl.h" +#include "blobstorage_pdisk_logreader.h" +#include "blobstorage_pdisk_sectorrestorator.h" + + +namespace NKikimr { +namespace NPDisk { + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// TPDisk log reading +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// +// Called by the TLogReader on success with the current chunkOwnerMap +// +void TPDisk::ProcessChunkOwnerMap(TMap<ui32, TChunkState> &chunkOwnerMap) { + TGuard<TMutex> guard(StateMutex); + auto print = [&] () { + std::map<TOwner, std::vector<ui32>> ownerToChunks; + for (const auto& [idx, state] : chunkOwnerMap) { + ownerToChunks[state.OwnerId].push_back(idx); + } + + TStringStream str; + str << "PDiskId# " << PDiskId << " ProcessChunkOwnerMap; "; + for (auto& [owner, chunks] : ownerToChunks) { + std::sort(chunks.begin(), chunks.end()); + str << " Owner# " << owner << " ["; + bool first = true; + for (auto idx : chunks) { + str << (std::exchange(first, false) ? "" : " ") << idx; + } + str << "];"; + + } + return str.Str(); + }; + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, print()); + + for (TMap<ui32, TChunkState>::iterator it = chunkOwnerMap.begin(); it != chunkOwnerMap.end(); ++it) { + ui32 chunkIdx = it->first; + TOwner ownerId = it->second.OwnerId; + ui64 chunkNonce = it->second.Nonce; + TChunkState &state = ChunkState[chunkIdx]; + + if (state.OwnerId == OwnerSystem && IsOwnerAllocated(ownerId)) { + // The situation is: + // ChunkState record states that the chunk is used by System (log or syslog/format) + // OwnerMap states that the chunk is used by some user + + // Make sure the chunk is not really a part of syslog/format + Y_VERIFY_S(chunkIdx > Format.SystemChunkCount, "PDiskId# " << PDiskId << " chunkIdx# " << chunkIdx + << " SystemChunkCount# " << Format.SystemChunkCount); + + // Make sure the chunk is not really a part of the log + for (const auto& logChunk : LogChunks) { + if (logChunk.ChunkIdx == chunkIdx) { + TStringStream out; + out << "PDiskId# " << PDiskId << " chunkIdx# " << chunkIdx; + out << " is a part of the log and is owned by user, ownerIdx# " << ownerId; + out << " LogChunks# {"; + for (const auto& chunk : LogChunks) { + out << chunk << ", "; + } + out << "}"; + Y_FAIL_S(out.Str()); + } + } + // After all, it looks like it's OK + } + + if (state.OwnerId != OwnerSystem || state.OwnerId == ownerId) { + if (IsOwnerUser(state.OwnerId) && state.CommitState == TChunkState::DATA_COMMITTED) { + Mon.CommitedDataChunks->Dec(); + LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 + " Line# %" PRIu32 " --CommitedDataChunks# %" PRIi64 " chunkIdx# %" PRIu32 " prev ownerId# %" PRIu32, + (ui32)PDiskId, (ui32)__LINE__, (i64)Mon.CommitedDataChunks->Val(), (ui32)chunkIdx, + (ui32)state.OwnerId); + } + state.OwnerId = ownerId; + state.Nonce = chunkNonce; + if (IsOwnerAllocated(ownerId)) { + state.CommitState = TChunkState::DATA_COMMITTED; + if (IsOwnerUser(ownerId)) { + Mon.CommitedDataChunks->Inc(); + LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 + " Line# %" PRIu32 " ++CommitedDataChunks# %" PRIi64 " chunkIdx# %" PRIu32 " ownerId# %" PRIu32, + (ui32)PDiskId, (ui32)__LINE__, (i64)Mon.CommitedDataChunks->Val(), (ui32)chunkIdx, + (ui32)ownerId); + } + } else { + state.CommitState = TChunkState::FREE; + } + } + } +} + +void TPDisk::ProcessReadLogRecord(TLogRecordHeader &header, TString &data, NPDisk::TOwner owner, ui64 nonce, + NPDisk::TEvReadLogResult* result, TMap<ui32, TChunkState> *outChunkOwnerMap, bool isInitial, + bool parseCommitMessage) { + + if (isInitial || header.OwnerId == owner) { + if (header.Signature.HasCommitRecord()) { + TCommitRecordFooter *footer = (TCommitRecordFooter*) + ((ui8*)data.data() + data.size() - sizeof(TCommitRecordFooter)); + ui32 *deletes = (ui32*)((ui8*)footer - footer->DeleteCount * sizeof(ui32)); + ui64 *commitNonces = (ui64*)((ui8*)deletes - footer->CommitCount * sizeof(ui64)); + ui32 *commits = (ui32*)((ui8*)commitNonces - footer->CommitCount * sizeof(ui32)); + { + TGuard<TMutex> guard(StateMutex); + TOwnerData &ownerData = OwnerData[header.OwnerId]; + if (isInitial) { + if (parseCommitMessage) { + for (ui32 i = 0; i < footer->DeleteCount; ++i) { + const ui32 chunkId = ReadUnaligned<ui32>(&deletes[i]); + (*outChunkOwnerMap)[chunkId].OwnerId = OwnerUnallocated; + } + for (ui32 i = 0; i < footer->CommitCount; ++i) { + const ui32 chunkId = ReadUnaligned<ui32>(&commits[i]); + (*outChunkOwnerMap)[chunkId].OwnerId = header.OwnerId; + (*outChunkOwnerMap)[chunkId].Nonce = ReadUnaligned<ui64>(&commitNonces[i]); + } + } + if (ownerData.VDiskId != TVDiskID::InvalidId) { + if (ownerData.CurrentFirstLsnToKeep < footer->FirstLsnToKeep) { + LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 + " ProcessReadLogRecord ownerId# %" PRIu32 " set FirstLsnToKeep# %" PRIu64 + " caused by Lsn# %" PRIu64, (ui32)PDiskId, (ui32)header.OwnerId, + (ui64)footer->FirstLsnToKeep, (ui64)header.OwnerLsn); + ownerData.CurrentFirstLsnToKeep = footer->FirstLsnToKeep; + } + ownerData.LogRecordsInitiallyRead++; + } + } else { + if (ownerData.VDiskId != TVDiskID::InvalidId) { + ownerData.LogRecordsConsequentlyRead++; + } + } + } + + data.resize(footer->UserDataSize); + if (isInitial && footer->IsStartingPoint) { + TGuard<TMutex> guard(StateMutex); + TOwnerData &ownerData = OwnerData[header.OwnerId]; + if (ownerData.VDiskId != TVDiskID::InvalidId) { + TLogSignature unmasked = header.Signature.GetUnmasked(); + ownerData.StartingPoints[unmasked] = NPDisk::TLogRecord(unmasked, data, header.OwnerLsn); + } + } + } else { + TGuard<TMutex> guard(StateMutex); + TOwnerData &ownerData = OwnerData[header.OwnerId]; + if (ownerData.VDiskId != TVDiskID::InvalidId) { + if (isInitial) { + ownerData.LogRecordsInitiallyRead++; + } else { + ownerData.LogRecordsConsequentlyRead++; + } + } + } + + if (header.OwnerId == owner) { + { + TGuard<TMutex> guard(StateMutex); + TOwnerData &ownerData = OwnerData[header.OwnerId]; + if (ownerData.VDiskId != TVDiskID::InvalidId) { + if (!ownerData.IsNextLsnOk(header.OwnerLsn)) { + TStringStream str; + str << "Lsn reversal! PDiskId# " << (ui32)PDiskId + << " ownerId# " << (ui32)owner + << " LogStartPosition# " << ownerData.LogStartPosition + << " LastSeenLsn# " << ownerData.LastSeenLsn + << " header.OwnerLsn# " << header.OwnerLsn + << " nonce# " << nonce + << Endl; + Y_FAIL_S(str.Str()); + } + ownerData.LastSeenLsn = header.OwnerLsn; + } + } + result->Results.push_back(NPDisk::TLogRecord(header.Signature.GetUnmasked(), data, header.OwnerLsn)); + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// TLogReader part +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct TLogReader::TSectorData { + ui64 Offset; + TBuffer::TPtr Buffer; + ui32 DataSize; + ui64 PreparedOffset; + ui64 PreparedSize; + bool IsScheduled; + + TSectorData(TBuffer *buffer, ui32 dataSize) + : Offset(0) + , Buffer(buffer) + , DataSize(dataSize) + , PreparedOffset(0) + , PreparedSize(0) + , IsScheduled(false) + {} + + TString ToString() { + TStringStream str; + str << "{Offset# " << Offset + << " Buffer->Data()# " << (void*)Buffer->Data() + << " DataSize# " << DataSize + << " PreparedOffset# " << PreparedOffset + << " PreparedSize# " << PreparedSize + << " IsScheduled# " << (IsScheduled ? "true" : "false") + << "}"; + return str.Str(); + } + + ui8* GetData() { + return Buffer->Data() + (Offset - PreparedOffset); + } + + bool IsAvailable(ui64 offset, ui64 size) const { + return PreparedOffset <= offset && offset + size <= PreparedOffset + PreparedSize; + } + + bool IsAvailable(ui64 size) const { + return IsAvailable(Offset, size); + } + + ui8* GetDataIfAvailable(ui64 offset, ui64 size) { + return IsAvailable(offset, size) ? GetData() : nullptr; + } + + bool SetOffset(ui64 newOffset) { + bool isValid = false; + if (PreparedOffset <= newOffset && newOffset < PreparedOffset + PreparedSize) { + if (newOffset < Offset) { + PreparedOffset = 0; + PreparedSize = 0; + } else { + isValid = true; + } + } + Offset = newOffset; + return isValid; + } + + void Prepare(ui64 size) { + PreparedOffset = Offset; + PreparedSize = size; + } +}; + +class TLogReader::TDoubleBuffer { + THolder<TSectorData> SectorA; + THolder<TSectorData> SectorB; + const ui64 PDiskSectorSize; + +public: + TDoubleBuffer(TPDisk *pdisk) + : SectorA(MakeHolder<TSectorData>(pdisk->BufferPool->Pop(), pdisk->Format.SectorSize * BufferSizeSectors)) + , SectorB(MakeHolder<TSectorData>(pdisk->BufferPool->Pop(), pdisk->Format.SectorSize * BufferSizeSectors)) + , PDiskSectorSize(pdisk->Format.SectorSize) + {} + + ui64 BufferIdxFromOffset(ui64 innerOffset) const { + return (innerOffset / (PDiskSectorSize * BufferSizeSectors)) % 2; + } + + TSectorData *DataByOffset(ui64 offset) const { + return DataByIdx(BufferIdxFromOffset(offset)); + } + + TSectorData *DataByIdx(ui32 idx) const { + if (idx % 2) { + return SectorB.Get(); + } else { + return SectorA.Get(); + } + } +}; + TLogReader::TLogReader(bool isInitial,TPDisk *pDisk, TActorSystem * const actorSystem, const TActorId &replyTo, NPDisk::TOwner owner, - TLogPosition ownerLogStartPosition, EOwnerGroupType ownerGroupType, TLogPosition position, ui64 sizeLimit, - ui64 lastNonce, ui32 logEndChunkIdx, ui64 logEndSectorIdx, TReqId reqId, - TVector<TChunkIdx> &&chunksToRead, ui64 firstLsnToKeep, ui64 firstNonceToKeep, TVDiskID ownerVDiskId) - : IsInitial(isInitial) - , PDisk(pDisk) - , ActorSystem(actorSystem) - , ReplyTo(replyTo) - , Owner(owner) - , OwnerLogStartPosition(ownerLogStartPosition) - , Position(position) - , SizeLimit(sizeLimit) - , Result(new NPDisk::TEvReadLogResult( - NKikimrProto::ERROR, position, TLogPosition::Invalid(), false, - pDisk->GetStatusFlags(owner, ownerGroupType), nullptr, owner)) - , ChunkInfo(nullptr) - , Sector(new TDoubleBuffer(pDisk)) - , ChunkOwnerMap(IsInitial ? new TMap<ui32, TChunkState>() : nullptr) - , State(ELogReaderState::PrepareToRead) - , IsReplied(false) - , LastGoodToWriteLogPosition{0, 0} - , MaxNonce(lastNonce) - , LastNonce(lastNonce) - , LastDataNonce(lastNonce) - , OnEndOfSplice(false) - , Cypher(pDisk->Cfg->EnableSectorEncryption) - , OffsetInSector(0) - , SetLastGoodToWritePosition(true) - , ChunkIdx(0) - , SectorIdx(0) - , IsLastRecordHeaderValid(false) - , FirstLsnToKeep(firstLsnToKeep) - , FirstNonceToKeep(firstNonceToKeep) - , OwnerVDiskId(ownerVDiskId) - , IsLastRecordSkipped(true) - , ResultSize(0) - , LastRecordHeader(0, 0, 0) - , LastRecordHeaderNonce(0) - , LastRecordDataWritePosition(0) - , MaxCompleteLsnCyclic(0) - , EndSectorIdx(0) - , ExpectedOffset(0) - , LogEndChunkIdx(logEndChunkIdx) - , LogEndSectorIdx(logEndSectorIdx) - , ReqId(reqId) - , ChunksToRead(std::move(chunksToRead)) - , CurrentChunkToRead(ChunksToRead.end()) - , ParseCommits(false) // Actual only if IsInitial -{ - Y_VERIFY(PDisk->PDiskThread.Id() == TThread::CurrentThreadId(), "Constructor of TLogReader must be called" - " from PDiskThread"); - Cypher.SetKey(PDisk->Format.LogKey); - AtomicIncrement(PDisk->InFlightLogRead); - - // If there was no log chunks when SysLog was written FirstLogChunkToParseCommits is equals to LogHeadChunkIdx - ParseCommits = PDisk->FirstLogChunkToParseCommits == PDisk->SysLogRecord.LogHeadChunkIdx; - - auto printChunks = [&]() { - TStringStream ss; - ss << SelfInfo() << " ChunksToRead# ["; - bool first = true; - for (auto idx : ChunksToRead) { - ss << (first ? first = false, "" : " ") << idx; - } - ss << "]"; - return ss.Str(); - }; - - LOG_INFO_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " ChunksToRead# " << printChunks()); -} - -TLogReader::~TLogReader() { - AtomicDecrement(PDisk->InFlightLogRead); -} - -void TLogReader::Exec(ui64 offsetRead, TVector<ui64> &badOffsets, TActorSystem *actorSystem) { - Y_UNUSED(actorSystem); - TGuard<TMutex> guard(ExecMutex); - if (IsReplied.load()) { - return; - } - TDiskFormat &format = PDisk->Format; - if (badOffsets.size()) { - bool isOk = RegisterBadOffsets(badOffsets); - if (!isOk) { - LOG_ERROR(*ActorSystem, NKikimrServices::BS_PDISK, - "PDiskId# %" PRIu32 " Log is damaged and unrevocerable!", - (ui32)PDisk->PDiskId); - ReplyError(); - return; - } - } - ui64 idxRead = Sector->BufferIdxFromOffset(offsetRead); - ui64 idxExpected = Sector->BufferIdxFromOffset(ExpectedOffset); - Sector->DataByIdx(idxRead)->IsScheduled = false; - - if (idxRead != idxExpected && Sector->DataByIdx(idxExpected)->IsScheduled) { - return; - } - - Sector->DataByIdx(idxRead)->SetOffset(ExpectedOffset); - - while (true) { - switch(State) { - case ELogReaderState::PrepareToRead: - if (PrepareToRead()) { - return; // Already replied - } - State = ELogReaderState::NewLogChunk; - break; - case ELogReaderState::NewLogChunk: - if (ChunkIdx == 0) { + TLogPosition ownerLogStartPosition, EOwnerGroupType ownerGroupType, TLogPosition position, ui64 sizeLimit, + ui64 lastNonce, ui32 logEndChunkIdx, ui64 logEndSectorIdx, TReqId reqId, + TVector<TChunkIdx> &&chunksToRead, ui64 firstLsnToKeep, ui64 firstNonceToKeep, TVDiskID ownerVDiskId) + : IsInitial(isInitial) + , PDisk(pDisk) + , ActorSystem(actorSystem) + , ReplyTo(replyTo) + , Owner(owner) + , OwnerLogStartPosition(ownerLogStartPosition) + , Position(position) + , SizeLimit(sizeLimit) + , Result(new NPDisk::TEvReadLogResult( + NKikimrProto::ERROR, position, TLogPosition::Invalid(), false, + pDisk->GetStatusFlags(owner, ownerGroupType), nullptr, owner)) + , ChunkInfo(nullptr) + , Sector(new TDoubleBuffer(pDisk)) + , ChunkOwnerMap(IsInitial ? new TMap<ui32, TChunkState>() : nullptr) + , State(ELogReaderState::PrepareToRead) + , IsReplied(false) + , LastGoodToWriteLogPosition{0, 0} + , MaxNonce(lastNonce) + , LastNonce(lastNonce) + , LastDataNonce(lastNonce) + , OnEndOfSplice(false) + , Cypher(pDisk->Cfg->EnableSectorEncryption) + , OffsetInSector(0) + , SetLastGoodToWritePosition(true) + , ChunkIdx(0) + , SectorIdx(0) + , IsLastRecordHeaderValid(false) + , FirstLsnToKeep(firstLsnToKeep) + , FirstNonceToKeep(firstNonceToKeep) + , OwnerVDiskId(ownerVDiskId) + , IsLastRecordSkipped(true) + , ResultSize(0) + , LastRecordHeader(0, 0, 0) + , LastRecordHeaderNonce(0) + , LastRecordDataWritePosition(0) + , MaxCompleteLsnCyclic(0) + , EndSectorIdx(0) + , ExpectedOffset(0) + , LogEndChunkIdx(logEndChunkIdx) + , LogEndSectorIdx(logEndSectorIdx) + , ReqId(reqId) + , ChunksToRead(std::move(chunksToRead)) + , CurrentChunkToRead(ChunksToRead.end()) + , ParseCommits(false) // Actual only if IsInitial +{ + Y_VERIFY(PDisk->PDiskThread.Id() == TThread::CurrentThreadId(), "Constructor of TLogReader must be called" + " from PDiskThread"); + Cypher.SetKey(PDisk->Format.LogKey); + AtomicIncrement(PDisk->InFlightLogRead); + + // If there was no log chunks when SysLog was written FirstLogChunkToParseCommits is equals to LogHeadChunkIdx + ParseCommits = PDisk->FirstLogChunkToParseCommits == PDisk->SysLogRecord.LogHeadChunkIdx; + + auto printChunks = [&]() { + TStringStream ss; + ss << SelfInfo() << " ChunksToRead# ["; + bool first = true; + for (auto idx : ChunksToRead) { + ss << (first ? first = false, "" : " ") << idx; + } + ss << "]"; + return ss.Str(); + }; + + LOG_INFO_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " ChunksToRead# " << printChunks()); +} + +TLogReader::~TLogReader() { + AtomicDecrement(PDisk->InFlightLogRead); +} + +void TLogReader::Exec(ui64 offsetRead, TVector<ui64> &badOffsets, TActorSystem *actorSystem) { + Y_UNUSED(actorSystem); + TGuard<TMutex> guard(ExecMutex); + if (IsReplied.load()) { + return; + } + TDiskFormat &format = PDisk->Format; + if (badOffsets.size()) { + bool isOk = RegisterBadOffsets(badOffsets); + if (!isOk) { + LOG_ERROR(*ActorSystem, NKikimrServices::BS_PDISK, + "PDiskId# %" PRIu32 " Log is damaged and unrevocerable!", + (ui32)PDisk->PDiskId); + ReplyError(); + return; + } + } + ui64 idxRead = Sector->BufferIdxFromOffset(offsetRead); + ui64 idxExpected = Sector->BufferIdxFromOffset(ExpectedOffset); + Sector->DataByIdx(idxRead)->IsScheduled = false; + + if (idxRead != idxExpected && Sector->DataByIdx(idxExpected)->IsScheduled) { + return; + } + + Sector->DataByIdx(idxRead)->SetOffset(ExpectedOffset); + + while (true) { + switch(State) { + case ELogReaderState::PrepareToRead: + if (PrepareToRead()) { + return; // Already replied + } + State = ELogReaderState::NewLogChunk; + break; + case ELogReaderState::NewLogChunk: + if (ChunkIdx == 0) { ReplyOk(); - return; // Already replied - } - if (IsInitial) { - PDisk->LogChunks.push_back(TLogChunkInfo(ChunkIdx, (ui32)PDisk->OwnerData.size())); - PDisk->Mon.LogChunks->Inc(); - ChunkInfo = &PDisk->LogChunks.back(); - ChunkInfo->IsEndOfSplice = std::exchange(OnEndOfSplice, false); - if (PDisk->LogChunks.size() > 1) { - auto last = PDisk->LogChunks.rbegin(); - // May be set in NonceJump record processing, and if so it should not be changed - if (!last->PrevChunkLastNonce) { - last->PrevChunkLastNonce = std::next(last)->LastNonce; - } - } - UpdateNewChunkInfo(ChunkIdx, {}); - } - State = ELogReaderState::ScheduleForwardReads; - break; - case ELogReaderState::ScheduleForwardReads: - { - if (SectorIdx >= EndSectorIdx) { - // Read next chunk reference - if (IsInitial) { - UpdateLastGoodToWritePosition(); - - ui64 offset = format.Offset(ChunkIdx, SectorIdx); - ui64 idxToRead = Sector->BufferIdxFromOffset(offset); - TSectorData *data = Sector->DataByIdx(idxToRead); - if (!data->IsScheduled) { - State = ELogReaderState::TryProceedNextChunk; - ExpectedOffset = offset; - if (!data->SetOffset(offset)) { - ReleaseUsedBadOffsets(); - ScheduleReadAsync(*data, ReplicationFactor); - return; // Continue when the next sector arrives - } - idxRead = idxToRead; - break; - } - ExpectedOffset = offset; - ReleaseUsedBadOffsets(); - State = ELogReaderState::ScheduleForwardReads; - return; // Continue when the next sector arrives - } else { - ++CurrentChunkToRead; - if (CurrentChunkToRead == ChunksToRead.end()) { + return; // Already replied + } + if (IsInitial) { + PDisk->LogChunks.push_back(TLogChunkInfo(ChunkIdx, (ui32)PDisk->OwnerData.size())); + PDisk->Mon.LogChunks->Inc(); + ChunkInfo = &PDisk->LogChunks.back(); + ChunkInfo->IsEndOfSplice = std::exchange(OnEndOfSplice, false); + if (PDisk->LogChunks.size() > 1) { + auto last = PDisk->LogChunks.rbegin(); + // May be set in NonceJump record processing, and if so it should not be changed + if (!last->PrevChunkLastNonce) { + last->PrevChunkLastNonce = std::next(last)->LastNonce; + } + } + UpdateNewChunkInfo(ChunkIdx, {}); + } + State = ELogReaderState::ScheduleForwardReads; + break; + case ELogReaderState::ScheduleForwardReads: + { + if (SectorIdx >= EndSectorIdx) { + // Read next chunk reference + if (IsInitial) { + UpdateLastGoodToWritePosition(); + + ui64 offset = format.Offset(ChunkIdx, SectorIdx); + ui64 idxToRead = Sector->BufferIdxFromOffset(offset); + TSectorData *data = Sector->DataByIdx(idxToRead); + if (!data->IsScheduled) { + State = ELogReaderState::TryProceedNextChunk; + ExpectedOffset = offset; + if (!data->SetOffset(offset)) { + ReleaseUsedBadOffsets(); + ScheduleReadAsync(*data, ReplicationFactor); + return; // Continue when the next sector arrives + } + idxRead = idxToRead; + break; + } + ExpectedOffset = offset; + ReleaseUsedBadOffsets(); + State = ELogReaderState::ScheduleForwardReads; + return; // Continue when the next sector arrives + } else { + ++CurrentChunkToRead; + if (CurrentChunkToRead == ChunksToRead.end()) { ReplyOk(); - return; // Already replied - } - SwitchToChunk(*CurrentChunkToRead); - State = ELogReaderState::ScheduleForwardReads; - } - } - UpdateLastGoodToWritePosition(); - ui64 offset = format.Offset(ChunkIdx, SectorIdx); - ExpectedOffset = offset; - - ui64 idxToRead = Sector->BufferIdxFromOffset(offset); - TSectorData *data = Sector->DataByIdx(idxToRead); - if (data->IsScheduled) { - ReleaseUsedBadOffsets(); - ScheduleForward(offset); - State = ELogReaderState::ScheduleForwardReads; - return; // Continue when the next sector arrives - } - State = ELogReaderState::ProcessAlreadyReadSectors; - if (!data->SetOffset(offset)) { - ui64 sectorsToRead = Min(EndSectorIdx - SectorIdx + ReplicationFactor, (ui64)BufferSizeSectors); - ReleaseUsedBadOffsets(); - ScheduleReadAsync(*data, sectorsToRead); - ScheduleForward(offset); - return; // Continue when the next sector arrives - } - if (idxRead != idxToRead) { - ScheduleForward(offset); - } - idxRead = idxToRead; - break; - } - case ELogReaderState::ProcessAlreadyReadSectors: - { + return; // Already replied + } + SwitchToChunk(*CurrentChunkToRead); + State = ELogReaderState::ScheduleForwardReads; + } + } + UpdateLastGoodToWritePosition(); + ui64 offset = format.Offset(ChunkIdx, SectorIdx); + ExpectedOffset = offset; + + ui64 idxToRead = Sector->BufferIdxFromOffset(offset); + TSectorData *data = Sector->DataByIdx(idxToRead); + if (data->IsScheduled) { + ReleaseUsedBadOffsets(); + ScheduleForward(offset); + State = ELogReaderState::ScheduleForwardReads; + return; // Continue when the next sector arrives + } + State = ELogReaderState::ProcessAlreadyReadSectors; + if (!data->SetOffset(offset)) { + ui64 sectorsToRead = Min(EndSectorIdx - SectorIdx + ReplicationFactor, (ui64)BufferSizeSectors); + ReleaseUsedBadOffsets(); + ScheduleReadAsync(*data, sectorsToRead); + ScheduleForward(offset); + return; // Continue when the next sector arrives + } + if (idxRead != idxToRead) { + ScheduleForward(offset); + } + idxRead = idxToRead; + break; + } + case ELogReaderState::ProcessAlreadyReadSectors: + { ui64 sizeToProcess = (ui64)format.SectorSize; - TSectorData *data = Sector->DataByIdx(idxRead); - Y_VERIFY(data->IsAvailable(sizeToProcess)); - bool isEndOfLog = ProcessSectorSet(data); - data->SetOffset(data->Offset + sizeToProcess); - if (isEndOfLog) { - return; // Nothing to read - } - State = ELogReaderState::ScheduleForwardReads; - break; - } - case ELogReaderState::TryProceedNextChunk: - { - ui64 sectorsToProcess = format.IsErasureEncodeNextChunkReference() ? ReplicationFactor : 1; - ui64 sizeToProcess = format.SectorSize * sectorsToProcess; - TSectorData *data = Sector->DataByIdx(idxRead); - if (!data->IsAvailable(sizeToProcess)) { - ui64 offset = format.Offset(ChunkIdx, SectorIdx); - ExpectedOffset = offset; - data->SetOffset(offset); - ScheduleReadAsync(*data, sectorsToProcess); - return; // Continue when the next sector arrives - } - if (ProcessNextChunkReference(*data)) { - return; // There is no next chunk, already replied - } - State = ELogReaderState::NewLogChunk; - break; - } - default: - Y_FAIL(); - break; - } - }// while (true) -} - -TString TLogReader::SelfInfo() { - TStringStream ss; - ss << "PDiskId# " << PDisk->PDiskId - << " LogReader" - << " IsInitial# " << IsInitial; - if (!IsInitial) { - ss << " Owner# " << ui32(Owner) - << " VDiskId# " << OwnerVDiskId.ToStringWOGeneration(); - } - ss << " ChunkIdx# " << ChunkIdx - << " SectorIdx# " << SectorIdx - << " OffsetInSector# " << OffsetInSector; - return ss.Str(); -} - -bool TLogReader::PrepareToRead() { - TDiskFormat &format = PDisk->Format; - if (Position == TLogPosition::Invalid()) { - if (IsInitial) { - Y_FAIL(); - } + TSectorData *data = Sector->DataByIdx(idxRead); + Y_VERIFY(data->IsAvailable(sizeToProcess)); + bool isEndOfLog = ProcessSectorSet(data); + data->SetOffset(data->Offset + sizeToProcess); + if (isEndOfLog) { + return; // Nothing to read + } + State = ELogReaderState::ScheduleForwardReads; + break; + } + case ELogReaderState::TryProceedNextChunk: + { + ui64 sectorsToProcess = format.IsErasureEncodeNextChunkReference() ? ReplicationFactor : 1; + ui64 sizeToProcess = format.SectorSize * sectorsToProcess; + TSectorData *data = Sector->DataByIdx(idxRead); + if (!data->IsAvailable(sizeToProcess)) { + ui64 offset = format.Offset(ChunkIdx, SectorIdx); + ExpectedOffset = offset; + data->SetOffset(offset); + ScheduleReadAsync(*data, sectorsToProcess); + return; // Continue when the next sector arrives + } + if (ProcessNextChunkReference(*data)) { + return; // There is no next chunk, already replied + } + State = ELogReaderState::NewLogChunk; + break; + } + default: + Y_FAIL(); + break; + } + }// while (true) +} + +TString TLogReader::SelfInfo() { + TStringStream ss; + ss << "PDiskId# " << PDisk->PDiskId + << " LogReader" + << " IsInitial# " << IsInitial; + if (!IsInitial) { + ss << " Owner# " << ui32(Owner) + << " VDiskId# " << OwnerVDiskId.ToStringWOGeneration(); + } + ss << " ChunkIdx# " << ChunkIdx + << " SectorIdx# " << SectorIdx + << " OffsetInSector# " << OffsetInSector; + return ss.Str(); +} + +bool TLogReader::PrepareToRead() { + TDiskFormat &format = PDisk->Format; + if (Position == TLogPosition::Invalid()) { + if (IsInitial) { + Y_FAIL(); + } ReplyOk(); - return true; - } - if (Position == TLogPosition{0, 0}) { - if (IsInitial) { - Position = PDisk->LogPosition(PDisk->SysLogRecord.LogHeadChunkIdx, 0, 0); - } else { - if (ChunksToRead.size() == 0) { + return true; + } + if (Position == TLogPosition{0, 0}) { + if (IsInitial) { + Position = PDisk->LogPosition(PDisk->SysLogRecord.LogHeadChunkIdx, 0, 0); + } else { + if (ChunksToRead.size() == 0) { ReplyOk(); - return true; - } - if (OwnerLogStartPosition != TLogPosition{0, 0}) { - ui32 startChunkIdx = OwnerLogStartPosition.ChunkIdx; - Y_VERIFY(startChunkIdx == ChunksToRead[0]); - Position = OwnerLogStartPosition; - } else { - Position = PDisk->LogPosition(ChunksToRead[0], 0, 0); - } - CurrentChunkToRead = ChunksToRead.begin(); - } - ChunkIdx = Position.ChunkIdx; - } else { - ChunkIdx = Position.ChunkIdx; - if (!IsInitial) { - auto keepIt = ChunksToRead.begin(); - while (keepIt != ChunksToRead.end() && *keepIt != ChunkIdx) { - ++keepIt; - } - ChunksToRead.erase(ChunksToRead.begin(), keepIt); - if (ChunksToRead.size() == 0) { - LOG_ERROR(*ActorSystem, NKikimrServices::BS_PDISK, - "PDiskId# %" PRIu32 " No chunks to read log from!", - (ui32)PDisk->PDiskId); - ReplyError(); - return true; - } - CurrentChunkToRead = ChunksToRead.begin(); - } - } - EndSectorIdx = PDisk->UsableSectorsPerLogChunk(); - ui64 noErasureSectorIdx = Position.OffsetInChunk / format.SectorSize; - // TODO(cthulhu): Check that Position format can be used with 16 TB HDD + return true; + } + if (OwnerLogStartPosition != TLogPosition{0, 0}) { + ui32 startChunkIdx = OwnerLogStartPosition.ChunkIdx; + Y_VERIFY(startChunkIdx == ChunksToRead[0]); + Position = OwnerLogStartPosition; + } else { + Position = PDisk->LogPosition(ChunksToRead[0], 0, 0); + } + CurrentChunkToRead = ChunksToRead.begin(); + } + ChunkIdx = Position.ChunkIdx; + } else { + ChunkIdx = Position.ChunkIdx; + if (!IsInitial) { + auto keepIt = ChunksToRead.begin(); + while (keepIt != ChunksToRead.end() && *keepIt != ChunkIdx) { + ++keepIt; + } + ChunksToRead.erase(ChunksToRead.begin(), keepIt); + if (ChunksToRead.size() == 0) { + LOG_ERROR(*ActorSystem, NKikimrServices::BS_PDISK, + "PDiskId# %" PRIu32 " No chunks to read log from!", + (ui32)PDisk->PDiskId); + ReplyError(); + return true; + } + CurrentChunkToRead = ChunksToRead.begin(); + } + } + EndSectorIdx = PDisk->UsableSectorsPerLogChunk(); + ui64 noErasureSectorIdx = Position.OffsetInChunk / format.SectorSize; + // TODO(cthulhu): Check that Position format can be used with 16 TB HDD SectorIdx = noErasureSectorIdx; - OffsetInSector = Position.OffsetInChunk - noErasureSectorIdx * format.SectorSize; - - LastGoodToWriteLogPosition = Position; - - if (!IsInitial && ChunkIdx == LogEndChunkIdx && SectorIdx >= LogEndSectorIdx) { + OffsetInSector = Position.OffsetInChunk - noErasureSectorIdx * format.SectorSize; + + LastGoodToWriteLogPosition = Position; + + if (!IsInitial && ChunkIdx == LogEndChunkIdx && SectorIdx >= LogEndSectorIdx) { ReplyOk(); - return true; - } - - return false; -} - -void TLogReader::ScheduleForward(ui64 offset) { - ui64 sectorsToRead = Min((ui64)EndSectorIdx - SectorIdx + ReplicationFactor, (ui64)BufferSizeSectors); - ui64 nextSectorIdx = SectorIdx + sectorsToRead; - if (nextSectorIdx < EndSectorIdx + ReplicationFactor) { - offset += (ui64)sectorsToRead * (ui64)PDisk->Format.SectorSize; - sectorsToRead = Min((ui64)EndSectorIdx - nextSectorIdx + ReplicationFactor, (ui64)BufferSizeSectors); - - TSectorData *data = Sector->DataByOffset(offset); - if (!data->IsScheduled && !data->SetOffset(offset)) { - ScheduleReadAsync(*data, sectorsToRead); - } - } -} - -void TLogReader::UpdateLastGoodToWritePosition() { - // SET ? Initial ? - if (IsInitial || SetLastGoodToWritePosition) { - // TODO: consider fixing offset in sector! + return true; + } + + return false; +} + +void TLogReader::ScheduleForward(ui64 offset) { + ui64 sectorsToRead = Min((ui64)EndSectorIdx - SectorIdx + ReplicationFactor, (ui64)BufferSizeSectors); + ui64 nextSectorIdx = SectorIdx + sectorsToRead; + if (nextSectorIdx < EndSectorIdx + ReplicationFactor) { + offset += (ui64)sectorsToRead * (ui64)PDisk->Format.SectorSize; + sectorsToRead = Min((ui64)EndSectorIdx - nextSectorIdx + ReplicationFactor, (ui64)BufferSizeSectors); + + TSectorData *data = Sector->DataByOffset(offset); + if (!data->IsScheduled && !data->SetOffset(offset)) { + ScheduleReadAsync(*data, sectorsToRead); + } + } +} + +void TLogReader::UpdateLastGoodToWritePosition() { + // SET ? Initial ? + if (IsInitial || SetLastGoodToWritePosition) { + // TODO: consider fixing offset in sector! LastGoodToWriteLogPosition = PDisk->LogPosition(ChunkIdx, SectorIdx + 0, OffsetInSector); - if (IsInitial) { - PDisk->InitialPreviousNonce = MaxNonce; - } - SetLastGoodToWritePosition = false; - } -} - -void TLogReader::LogRawData(ui8* data, ui64 size, TString info) { - TStringStream str; - ui64 lineSize = 20; - ui64 lines = size / lineSize; - str << info << Endl; - str << " : "; - for (ui64 columnIdx = 0; columnIdx < lineSize; ++columnIdx) { - str << Sprintf("%04" PRIu64 " ", (ui64)columnIdx); - } - str << Endl; - for (ui64 lineIdx = 0; lineIdx < lines; ++lineIdx) { - ui64 columns = (size - lineIdx * lineSize < lineSize) ? size % lineSize : lineSize; - str << Sprintf("%04" PRIu64 ": ", (ui64)(lineIdx * lineSize)); - for (ui64 columnIdx = 0; columnIdx < columns; ++columnIdx) { - ui64 byteIdx = lineIdx * lineSize + columnIdx; - ui8 c = data[byteIdx]; - str << Sprintf("%02" PRIx64 "_%c ", (ui64)c, c >= ' ' && c <= 'z' ? (char)c : '?'); - } - str << Endl; - } - Cerr << str.Str(); -} - -void TLogReader::ProcessLogPageTerminator(ui8 *data, ui32 sectorPayloadSize) { - // log terminator offset in sector = OffsetInSector - // The rest of the sector contains no data. - auto *firstPageHeader = reinterpret_cast<TFirstLogPageHeader*>(data); - ui32 sizeLeft = sectorPayloadSize - OffsetInSector; - Y_VERIFY(firstPageHeader->Size + sizeof(TFirstLogPageHeader) == sizeLeft); - OffsetInSector += sizeLeft; - SetLastGoodToWritePosition = true; -} - + if (IsInitial) { + PDisk->InitialPreviousNonce = MaxNonce; + } + SetLastGoodToWritePosition = false; + } +} + +void TLogReader::LogRawData(ui8* data, ui64 size, TString info) { + TStringStream str; + ui64 lineSize = 20; + ui64 lines = size / lineSize; + str << info << Endl; + str << " : "; + for (ui64 columnIdx = 0; columnIdx < lineSize; ++columnIdx) { + str << Sprintf("%04" PRIu64 " ", (ui64)columnIdx); + } + str << Endl; + for (ui64 lineIdx = 0; lineIdx < lines; ++lineIdx) { + ui64 columns = (size - lineIdx * lineSize < lineSize) ? size % lineSize : lineSize; + str << Sprintf("%04" PRIu64 ": ", (ui64)(lineIdx * lineSize)); + for (ui64 columnIdx = 0; columnIdx < columns; ++columnIdx) { + ui64 byteIdx = lineIdx * lineSize + columnIdx; + ui8 c = data[byteIdx]; + str << Sprintf("%02" PRIx64 "_%c ", (ui64)c, c >= ' ' && c <= 'z' ? (char)c : '?'); + } + str << Endl; + } + Cerr << str.Str(); +} + +void TLogReader::ProcessLogPageTerminator(ui8 *data, ui32 sectorPayloadSize) { + // log terminator offset in sector = OffsetInSector + // The rest of the sector contains no data. + auto *firstPageHeader = reinterpret_cast<TFirstLogPageHeader*>(data); + ui32 sizeLeft = sectorPayloadSize - OffsetInSector; + Y_VERIFY(firstPageHeader->Size + sizeof(TFirstLogPageHeader) == sizeLeft); + OffsetInSector += sizeLeft; + SetLastGoodToWritePosition = true; +} + void TLogReader::ProcessLogPageNonceJump2(ui8 *data, const ui64 previousNonce, const ui64 previousDataNonce) { - auto *nonceJumpLogPageHeader2 = reinterpret_cast<TNonceJumpLogPageHeader2*>(data); - OffsetInSector += sizeof(TNonceJumpLogPageHeader2); - - if (IsInitial) { - PDisk->LastNonceJumpLogPageHeader2 = *nonceJumpLogPageHeader2; - - if (SectorIdx == 0) { - // For future log splices PrevChunkLastNonce should be equal to expected in NonceJump record - ChunkInfo->PrevChunkLastNonce = nonceJumpLogPageHeader2->PreviousNonce; - } - - // TODO: Investigate / process error the proper way here. - if (previousNonce > nonceJumpLogPageHeader2->PreviousNonce && - previousDataNonce > nonceJumpLogPageHeader2->PreviousNonce) { - // We just came across an outdated nonce jump. This means the end of the log. - LOG_WARN_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + auto *nonceJumpLogPageHeader2 = reinterpret_cast<TNonceJumpLogPageHeader2*>(data); + OffsetInSector += sizeof(TNonceJumpLogPageHeader2); + + if (IsInitial) { + PDisk->LastNonceJumpLogPageHeader2 = *nonceJumpLogPageHeader2; + + if (SectorIdx == 0) { + // For future log splices PrevChunkLastNonce should be equal to expected in NonceJump record + ChunkInfo->PrevChunkLastNonce = nonceJumpLogPageHeader2->PreviousNonce; + } + + // TODO: Investigate / process error the proper way here. + if (previousNonce > nonceJumpLogPageHeader2->PreviousNonce && + previousDataNonce > nonceJumpLogPageHeader2->PreviousNonce) { + // We just came across an outdated nonce jump. This means the end of the log. + LOG_WARN_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() << " currentSectorIdx# " << SectorIdx - << " previousNonce# " << previousNonce - << " previousDataNonce# " << previousDataNonce - << " nonceJumpLogPageHeader2->PreviousNonce# " << nonceJumpLogPageHeader2->PreviousNonce - << " ReplyOk, marker LR001"); + << " previousNonce# " << previousNonce + << " previousDataNonce# " << previousDataNonce + << " nonceJumpLogPageHeader2->PreviousNonce# " << nonceJumpLogPageHeader2->PreviousNonce + << " ReplyOk, marker LR001"); ReplyOk(); - return; - } else if (previousNonce < nonceJumpLogPageHeader2->PreviousNonce && - previousDataNonce < nonceJumpLogPageHeader2->PreviousNonce) { - TStringStream str; - str << "PDiskId# " << PDisk->PDiskId - << "previousNonce# " << previousNonce - << " and previousDataNonce# " << previousDataNonce - << " != header->PreviousNonce# " << nonceJumpLogPageHeader2->PreviousNonce - << " OffsetInSector# " << OffsetInSector - << " sizeof(TNonceJumpLogPageHeader)# " << sizeof(TNonceJumpLogPageHeader2) - << " chunkIdx# " << ChunkIdx - << " sectorIdx# " << SectorIdx - << " header# " << nonceJumpLogPageHeader2->ToString(false) - << Endl; - Y_FAIL_S(str.Str()); - } - } else if (ChunkIdx == LogEndChunkIdx && SectorIdx >= LogEndSectorIdx) { - LOG_DEBUG_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " ReplyOk, marker LR003" - << " ChunkIdx# " << ChunkIdx - << " LogEndChunkIdx# " << LogEndChunkIdx - << " SectorIdx# " << SectorIdx - << " LogEndSectorId# " << LogEndSectorIdx); + return; + } else if (previousNonce < nonceJumpLogPageHeader2->PreviousNonce && + previousDataNonce < nonceJumpLogPageHeader2->PreviousNonce) { + TStringStream str; + str << "PDiskId# " << PDisk->PDiskId + << "previousNonce# " << previousNonce + << " and previousDataNonce# " << previousDataNonce + << " != header->PreviousNonce# " << nonceJumpLogPageHeader2->PreviousNonce + << " OffsetInSector# " << OffsetInSector + << " sizeof(TNonceJumpLogPageHeader)# " << sizeof(TNonceJumpLogPageHeader2) + << " chunkIdx# " << ChunkIdx + << " sectorIdx# " << SectorIdx + << " header# " << nonceJumpLogPageHeader2->ToString(false) + << Endl; + Y_FAIL_S(str.Str()); + } + } else if (ChunkIdx == LogEndChunkIdx && SectorIdx >= LogEndSectorIdx) { + LOG_DEBUG_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " ReplyOk, marker LR003" + << " ChunkIdx# " << ChunkIdx + << " LogEndChunkIdx# " << LogEndChunkIdx + << " SectorIdx# " << SectorIdx + << " LogEndSectorId# " << LogEndSectorIdx); ReplyOk(); - return; - } - - SetLastGoodToWritePosition = true; - IsLastRecordSkipped = false; - IsLastRecordHeaderValid = false; -} - + return; + } + + SetLastGoodToWritePosition = true; + IsLastRecordSkipped = false; + IsLastRecordHeaderValid = false; +} + void TLogReader::ProcessLogPageNonceJump1(ui8 *data, const ui64 previousNonce) { - auto *nonceJumpLogPageHeader1 = reinterpret_cast<TNonceJumpLogPageHeader1*>(data); - OffsetInSector += sizeof(TNonceJumpLogPageHeader1); - if (IsInitial) { - // TODO: Investigate / process error the proper way here. - if (previousNonce > nonceJumpLogPageHeader1->PreviousNonce) { - // We just came across an outdated nonce jump. This means the end of the log. + auto *nonceJumpLogPageHeader1 = reinterpret_cast<TNonceJumpLogPageHeader1*>(data); + OffsetInSector += sizeof(TNonceJumpLogPageHeader1); + if (IsInitial) { + // TODO: Investigate / process error the proper way here. + if (previousNonce > nonceJumpLogPageHeader1->PreviousNonce) { + // We just came across an outdated nonce jump. This means the end of the log. ReplyOk(); - return; - } - Y_VERIFY(previousNonce == nonceJumpLogPageHeader1->PreviousNonce, - "previousNonce# %" PRIu64 " != header->PreviousNonce# %" PRIu64 - " OffsetInSector# %" PRIu64 " sizeof(TNonceJumpLogPageHeader1)# %" PRIu64 - " chunkIdx# %" PRIu64 " sectorIdx# %" PRIu64, // " header->Flags# %" PRIu64, - (ui64)previousNonce, (ui64)nonceJumpLogPageHeader1->PreviousNonce, - (ui64)OffsetInSector, (ui64)sizeof(TNonceJumpLogPageHeader1), - (ui64)ChunkIdx, (ui64)SectorIdx); //, (ui64)pageHeader->Flags); - } - - if (!IsInitial && ChunkIdx == LogEndChunkIdx && SectorIdx >= LogEndSectorIdx) { + return; + } + Y_VERIFY(previousNonce == nonceJumpLogPageHeader1->PreviousNonce, + "previousNonce# %" PRIu64 " != header->PreviousNonce# %" PRIu64 + " OffsetInSector# %" PRIu64 " sizeof(TNonceJumpLogPageHeader1)# %" PRIu64 + " chunkIdx# %" PRIu64 " sectorIdx# %" PRIu64, // " header->Flags# %" PRIu64, + (ui64)previousNonce, (ui64)nonceJumpLogPageHeader1->PreviousNonce, + (ui64)OffsetInSector, (ui64)sizeof(TNonceJumpLogPageHeader1), + (ui64)ChunkIdx, (ui64)SectorIdx); //, (ui64)pageHeader->Flags); + } + + if (!IsInitial && ChunkIdx == LogEndChunkIdx && SectorIdx >= LogEndSectorIdx) { ReplyOk(); - return; - } - - SetLastGoodToWritePosition = true; - IsLastRecordSkipped = false; - IsLastRecordHeaderValid = false; -} - -bool TLogReader::ProcessSectorSet(TSectorData *sector) { - TDiskFormat &format = PDisk->Format; - UpdateLastGoodToWritePosition(); - - const ui64 magic = format.MagicLogChunk; + return; + } + + SetLastGoodToWritePosition = true; + IsLastRecordSkipped = false; + IsLastRecordHeaderValid = false; +} + +bool TLogReader::ProcessSectorSet(TSectorData *sector) { + TDiskFormat &format = PDisk->Format; + UpdateLastGoodToWritePosition(); + + const ui64 magic = format.MagicLogChunk; TSectorRestorator restorator(false, LogErasureDataParts, false, format, - PDisk->ActorSystem, PDisk->PDiskActor, PDisk->PDiskId, &PDisk->Mon, PDisk->BufferPool.Get()); - restorator.Restore(sector->GetData(), sector->Offset, magic, LastNonce, PDisk->Cfg->UseT1ha0HashInFooter); - - if (!restorator.GoodSectorFlags) { + PDisk->ActorSystem, PDisk->PDiskActor, PDisk->PDiskId, &PDisk->Mon, PDisk->BufferPool.Get()); + restorator.Restore(sector->GetData(), sector->Offset, magic, LastNonce, PDisk->Cfg->UseT1ha0HashInFooter); + + if (!restorator.GoodSectorFlags) { ReplyOk(); - return true; - } + return true; + } { - UpdateLastGoodToWritePosition(); + UpdateLastGoodToWritePosition(); if (!(restorator.GoodSectorFlags & 1)) { ReplyOk(); - return true; - } - + return true; + } + ui8* rawSector = sector->GetData(); - TDataSectorFooter *sectorFooter = (TDataSectorFooter*) - (rawSector + format.SectorSize - sizeof(TDataSectorFooter)); - - LOG_DEBUG_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " SectorIdx# " << SectorIdx + TDataSectorFooter *sectorFooter = (TDataSectorFooter*) + (rawSector + format.SectorSize - sizeof(TDataSectorFooter)); + + LOG_DEBUG_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " SectorIdx# " << SectorIdx << " currentSectorIdx# " << SectorIdx - << " sectorFooter->Nonce# " << sectorFooter->Nonce); - - ui64 previousNonce = std::exchange(LastNonce, sectorFooter->Nonce); - ui64 previousDataNonce = std::exchange(LastDataNonce, sectorFooter->Nonce); - if (LastNonce > MaxNonce) { - MaxNonce = LastNonce; - } - if (IsInitial) { - if (ChunkInfo->FirstNonce == 0) { - ChunkInfo->FirstNonce = sectorFooter->Nonce; - } - ChunkInfo->LastNonce = sectorFooter->Nonce; - } - - ui8 *data = rawSector; - // Decrypt data - Cypher.StartMessage(sectorFooter->Nonce); - Cypher.InplaceEncrypt(data, format.SectorSize - ui32(sizeof(TDataSectorFooter))); + << " sectorFooter->Nonce# " << sectorFooter->Nonce); + + ui64 previousNonce = std::exchange(LastNonce, sectorFooter->Nonce); + ui64 previousDataNonce = std::exchange(LastDataNonce, sectorFooter->Nonce); + if (LastNonce > MaxNonce) { + MaxNonce = LastNonce; + } + if (IsInitial) { + if (ChunkInfo->FirstNonce == 0) { + ChunkInfo->FirstNonce = sectorFooter->Nonce; + } + ChunkInfo->LastNonce = sectorFooter->Nonce; + } + + ui8 *data = rawSector; + // Decrypt data + Cypher.StartMessage(sectorFooter->Nonce); + Cypher.InplaceEncrypt(data, format.SectorSize - ui32(sizeof(TDataSectorFooter))); PDisk->CheckLogCanary(rawSector, ChunkIdx, SectorIdx); - - ui32 maxOffsetInSector = format.SectorPayloadSize() - ui32(sizeof(TFirstLogPageHeader)); - while (OffsetInSector <= maxOffsetInSector) { - TLogPageHeader *pageHeader = (TLogPageHeader*)(data + OffsetInSector); - Y_VERIFY(pageHeader->Version == PDISK_DATA_VERSION, "PDiskId# %" PRIu32 - " incompatible log page header version: %" PRIu32 - " (expected: %" PRIu32 ") at chunk %" PRIu32 " SectorSet: %" PRIu32 " Sector: %" PRIu32 - " Offset in sector: %" PRIu32 " A: %" PRIu32 " B: %" PRIu32, (ui32)PDisk->PDiskId, - (ui32)pageHeader->Version, (ui32)PDISK_DATA_VERSION, (ui32)ChunkIdx, (ui32)SectorIdx, + + ui32 maxOffsetInSector = format.SectorPayloadSize() - ui32(sizeof(TFirstLogPageHeader)); + while (OffsetInSector <= maxOffsetInSector) { + TLogPageHeader *pageHeader = (TLogPageHeader*)(data + OffsetInSector); + Y_VERIFY(pageHeader->Version == PDISK_DATA_VERSION, "PDiskId# %" PRIu32 + " incompatible log page header version: %" PRIu32 + " (expected: %" PRIu32 ") at chunk %" PRIu32 " SectorSet: %" PRIu32 " Sector: %" PRIu32 + " Offset in sector: %" PRIu32 " A: %" PRIu32 " B: %" PRIu32, (ui32)PDisk->PDiskId, + (ui32)pageHeader->Version, (ui32)PDISK_DATA_VERSION, (ui32)ChunkIdx, (ui32)SectorIdx, (ui32)0, (ui32)OffsetInSector, (ui32)pageHeader->A, (ui32)pageHeader->B); - - if (pageHeader->Flags & LogPageTerminator) { - ProcessLogPageTerminator(data + OffsetInSector, format.SectorPayloadSize()); - continue; - } - if (pageHeader->Flags & LogPageNonceJump2) { + + if (pageHeader->Flags & LogPageTerminator) { + ProcessLogPageTerminator(data + OffsetInSector, format.SectorPayloadSize()); + continue; + } + if (pageHeader->Flags & LogPageNonceJump2) { ProcessLogPageNonceJump2(data + OffsetInSector, previousNonce, previousDataNonce); - if (IsReplied.load()) { - return true; - } - continue; - } - if (pageHeader->Flags & LogPageNonceJump1) { + if (IsReplied.load()) { + return true; + } + continue; + } + if (pageHeader->Flags & LogPageNonceJump1) { ProcessLogPageNonceJump1(data + OffsetInSector, previousNonce); - if (IsReplied.load()) { - return true; - } - continue; - } - - if (IsInitial && previousNonce != 0) { + if (IsReplied.load()) { + return true; + } + continue; + } + + if (IsInitial && previousNonce != 0) { if (SectorIdx != 0) { - // Suspicious place! + // Suspicious place! if (LastNonce != previousNonce + 1) { ReplyOk(); - return true; - } - } else { - if (LastNonce != previousNonce + 1) { + return true; + } + } else { + if (LastNonce != previousNonce + 1) { ReplyOk(); - return true; - } - } - } - - if (pageHeader->Flags & LogPageFirst) { - TFirstLogPageHeader *firstPageHeader = (TFirstLogPageHeader*)(data + OffsetInSector); - OffsetInSector += sizeof(TFirstLogPageHeader); - // TODO(cthulhu): ReplyError() and return true; - Y_VERIFY_S(!IsLastRecordHeaderValid, SelfInfo() << - " LogPageNonceJump must be the first record after the last abrupt log termination"); - if (IsInitial || firstPageHeader->LogRecordHeader.OwnerId == Owner) { - if (IsInitial) { - FirstNonceToKeep = (ui64)-1; - FirstLsnToKeep = (ui64)-1; - TGuard<TMutex> guard(PDisk->StateMutex); - ui32 recordOwnerId = firstPageHeader->LogRecordHeader.OwnerId; - TOwnerData &ownerData = PDisk->OwnerData[recordOwnerId]; - if (ownerData.VDiskId != TVDiskID::InvalidId) { - FirstLsnToKeep = ownerData.CurrentFirstLsnToKeep; - FirstNonceToKeep = PDisk->SysLogFirstNoncesToKeep.FirstNonceToKeep[recordOwnerId]; - if (ownerData.LogStartPosition == TLogPosition{0, 0}) { + return true; + } + } + } + + if (pageHeader->Flags & LogPageFirst) { + TFirstLogPageHeader *firstPageHeader = (TFirstLogPageHeader*)(data + OffsetInSector); + OffsetInSector += sizeof(TFirstLogPageHeader); + // TODO(cthulhu): ReplyError() and return true; + Y_VERIFY_S(!IsLastRecordHeaderValid, SelfInfo() << + " LogPageNonceJump must be the first record after the last abrupt log termination"); + if (IsInitial || firstPageHeader->LogRecordHeader.OwnerId == Owner) { + if (IsInitial) { + FirstNonceToKeep = (ui64)-1; + FirstLsnToKeep = (ui64)-1; + TGuard<TMutex> guard(PDisk->StateMutex); + ui32 recordOwnerId = firstPageHeader->LogRecordHeader.OwnerId; + TOwnerData &ownerData = PDisk->OwnerData[recordOwnerId]; + if (ownerData.VDiskId != TVDiskID::InvalidId) { + FirstLsnToKeep = ownerData.CurrentFirstLsnToKeep; + FirstNonceToKeep = PDisk->SysLogFirstNoncesToKeep.FirstNonceToKeep[recordOwnerId]; + if (ownerData.LogStartPosition == TLogPosition{0, 0}) { ownerData.LogStartPosition = PDisk->LogPosition(ChunkIdx, SectorIdx, - OffsetInSector - sizeof(TFirstLogPageHeader)); - TStringStream str; - str << "(B) Initial ownerId# " << (ui32)recordOwnerId - << " set LogStartPosition# " << ownerData.LogStartPosition - << " FirstNonceToKeep# " << FirstNonceToKeep << Endl; - LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " %s", - (ui32)PDisk->PDiskId, str.Str().c_str()); - } - } - } - if (sectorFooter->Nonce < FirstNonceToKeep || - firstPageHeader->LogRecordHeader.OwnerLsn < FirstLsnToKeep) { - IsLastRecordSkipped = true; - OffsetInSector += firstPageHeader->Size; - continue; - } - if (!IsInitial) { - ResultSize += firstPageHeader->DataSize; - if (ResultSize > SizeLimit && ResultSize != firstPageHeader->DataSize) { - OffsetInSector -= sizeof(TFirstLogPageHeader); - ReplyOkInTheMiddle(); - return true; - } - } - IsLastRecordSkipped = false; - LastRecordHeader = firstPageHeader->LogRecordHeader; - LastRecordHeaderNonce = sectorFooter->Nonce; - IsLastRecordHeaderValid = true; - LastRecordData = TString::Uninitialized(firstPageHeader->DataSize); - Y_VERIFY(firstPageHeader->Size <= LastRecordData.size()); - memcpy((void*)LastRecordData.data(), data + OffsetInSector, firstPageHeader->Size); - LastRecordDataWritePosition = firstPageHeader->Size; - } else { - IsLastRecordSkipped = true; - } - OffsetInSector += firstPageHeader->Size; - } else { - OffsetInSector += sizeof(TLogPageHeader); - if (IsLastRecordSkipped) { - OffsetInSector += pageHeader->Size; - continue; - } - if (!IsLastRecordHeaderValid) { + OffsetInSector - sizeof(TFirstLogPageHeader)); + TStringStream str; + str << "(B) Initial ownerId# " << (ui32)recordOwnerId + << " set LogStartPosition# " << ownerData.LogStartPosition + << " FirstNonceToKeep# " << FirstNonceToKeep << Endl; + LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " %s", + (ui32)PDisk->PDiskId, str.Str().c_str()); + } + } + } + if (sectorFooter->Nonce < FirstNonceToKeep || + firstPageHeader->LogRecordHeader.OwnerLsn < FirstLsnToKeep) { + IsLastRecordSkipped = true; + OffsetInSector += firstPageHeader->Size; + continue; + } + if (!IsInitial) { + ResultSize += firstPageHeader->DataSize; + if (ResultSize > SizeLimit && ResultSize != firstPageHeader->DataSize) { + OffsetInSector -= sizeof(TFirstLogPageHeader); + ReplyOkInTheMiddle(); + return true; + } + } + IsLastRecordSkipped = false; + LastRecordHeader = firstPageHeader->LogRecordHeader; + LastRecordHeaderNonce = sectorFooter->Nonce; + IsLastRecordHeaderValid = true; + LastRecordData = TString::Uninitialized(firstPageHeader->DataSize); + Y_VERIFY(firstPageHeader->Size <= LastRecordData.size()); + memcpy((void*)LastRecordData.data(), data + OffsetInSector, firstPageHeader->Size); + LastRecordDataWritePosition = firstPageHeader->Size; + } else { + IsLastRecordSkipped = true; + } + OffsetInSector += firstPageHeader->Size; + } else { + OffsetInSector += sizeof(TLogPageHeader); + if (IsLastRecordSkipped) { + OffsetInSector += pageHeader->Size; + continue; + } + if (!IsLastRecordHeaderValid) { Y_FAIL_S(SelfInfo() << " Last record header is corrupted!" << Endl); - - // TODO: skip the incorrect entry gracefully? ReplyError() and return true - } - if (IsInitial || LastRecordHeader.OwnerId == Owner) { - void* destination = (void*)((ui8*)LastRecordData.data() + LastRecordDataWritePosition); - if (LastRecordDataWritePosition + pageHeader->Size > LastRecordData.size()) { - Y_FAIL_S(SelfInfo() << " Last record header is corrupted!" - << " IsLastRecordSkipped# " << IsLastRecordSkipped - << " OffsetInSector# " << OffsetInSector - << " LastRecordDataWritePosition# " << LastRecordDataWritePosition - << " pageHeader->Size# " << pageHeader->Size - << " LastRecordData.Size()# " << LastRecordData.size() << Endl); - } - memcpy(destination, data + OffsetInSector, pageHeader->Size); - LastRecordDataWritePosition += pageHeader->Size; - } - OffsetInSector += pageHeader->Size; - } - if (IsInitial && IsLastRecordHeaderValid) { - TGuard<TMutex> guard(PDisk->StateMutex); - if (LastRecordHeaderNonce >= - PDisk->SysLogFirstNoncesToKeep.FirstNonceToKeep[LastRecordHeader.OwnerId]) { - TLogChunkInfo &info = *ChunkInfo; - info.RegisterLogSector<false>(LastRecordHeader.OwnerId, LastRecordHeader.OwnerLsn); - } - } - if (IsLastRecordHeaderValid && (pageHeader->Flags & LogPageLast)) { - // Log page last - MaxCompleteLsnCyclic = LastRecordHeader.OwnerLsn; - SetLastGoodToWritePosition = true; - - IsLastRecordHeaderValid = false; - Y_VERIFY_S(LastRecordDataWritePosition == LastRecordData.size(), - "LastRecordDataWritePosition# " << LastRecordDataWritePosition - << " LastRecordData.size()# " << LastRecordData.size() - << " pageHeader->Size# " << pageHeader->Size - << " pageHeader->Flags# " << ui64(pageHeader->Flags) - << " chunkIdx# " << ChunkIdx - << " sectorIdx# " << SectorIdx - << " OffsetInSector# " << OffsetInSector - << " maxOffsetInSector# " << maxOffsetInSector - << " rawSector# " << (void*)rawSector); - - PDisk->ProcessReadLogRecord(LastRecordHeader, LastRecordData, Owner, sectorFooter->Nonce, - Result.Get(), ChunkOwnerMap.Get(), IsInitial, ParseCommits); - LastRecordData = TString(); - } - }// while OffsetInSector <= maxOffsetInSector - - OffsetInSector = 0; + + // TODO: skip the incorrect entry gracefully? ReplyError() and return true + } + if (IsInitial || LastRecordHeader.OwnerId == Owner) { + void* destination = (void*)((ui8*)LastRecordData.data() + LastRecordDataWritePosition); + if (LastRecordDataWritePosition + pageHeader->Size > LastRecordData.size()) { + Y_FAIL_S(SelfInfo() << " Last record header is corrupted!" + << " IsLastRecordSkipped# " << IsLastRecordSkipped + << " OffsetInSector# " << OffsetInSector + << " LastRecordDataWritePosition# " << LastRecordDataWritePosition + << " pageHeader->Size# " << pageHeader->Size + << " LastRecordData.Size()# " << LastRecordData.size() << Endl); + } + memcpy(destination, data + OffsetInSector, pageHeader->Size); + LastRecordDataWritePosition += pageHeader->Size; + } + OffsetInSector += pageHeader->Size; + } + if (IsInitial && IsLastRecordHeaderValid) { + TGuard<TMutex> guard(PDisk->StateMutex); + if (LastRecordHeaderNonce >= + PDisk->SysLogFirstNoncesToKeep.FirstNonceToKeep[LastRecordHeader.OwnerId]) { + TLogChunkInfo &info = *ChunkInfo; + info.RegisterLogSector<false>(LastRecordHeader.OwnerId, LastRecordHeader.OwnerLsn); + } + } + if (IsLastRecordHeaderValid && (pageHeader->Flags & LogPageLast)) { + // Log page last + MaxCompleteLsnCyclic = LastRecordHeader.OwnerLsn; + SetLastGoodToWritePosition = true; + + IsLastRecordHeaderValid = false; + Y_VERIFY_S(LastRecordDataWritePosition == LastRecordData.size(), + "LastRecordDataWritePosition# " << LastRecordDataWritePosition + << " LastRecordData.size()# " << LastRecordData.size() + << " pageHeader->Size# " << pageHeader->Size + << " pageHeader->Flags# " << ui64(pageHeader->Flags) + << " chunkIdx# " << ChunkIdx + << " sectorIdx# " << SectorIdx + << " OffsetInSector# " << OffsetInSector + << " maxOffsetInSector# " << maxOffsetInSector + << " rawSector# " << (void*)rawSector); + + PDisk->ProcessReadLogRecord(LastRecordHeader, LastRecordData, Owner, sectorFooter->Nonce, + Result.Get(), ChunkOwnerMap.Get(), IsInitial, ParseCommits); + LastRecordData = TString(); + } + }// while OffsetInSector <= maxOffsetInSector + + OffsetInSector = 0; } ++SectorIdx; - return false; -} - + return false; +} + void TLogReader::ReplyOk() { - { - TPDiskHashCalculator hasher(PDisk->Cfg->UseT1ha0HashInFooter); - TGuard<TMutex> guard(PDisk->StateMutex); - if (!IsInitial) { - TOwnerData &ownerData = PDisk->OwnerData[Owner]; - if (OwnerVDiskId != TVDiskID::InvalidId && - ownerData.LogRecordsInitiallyRead && - !ownerData.LogRecordsConsequentlyRead) { - TStringStream str; - str << "LogRecordsConsequentlyRead# " << ownerData.LogRecordsConsequentlyRead - << " LogRecordsInitiallyRead# " << ownerData.LogRecordsInitiallyRead; - Y_FAIL_S(str.Str()); - } - // End of log reached - if (OwnerVDiskId != TVDiskID::InvalidId) { - ownerData.HasReadTheWholeLog = true; - } - } - } - - Result->Status = NKikimrProto::OK; - Result->NextPosition = IsInitial ? LastGoodToWriteLogPosition : TLogPosition::Invalid(); - Result->IsEndOfLog = true; - Reply(); -} - -void TLogReader::ReplyOkInTheMiddle() { - Result->Status = NKikimrProto::OK; + { + TPDiskHashCalculator hasher(PDisk->Cfg->UseT1ha0HashInFooter); + TGuard<TMutex> guard(PDisk->StateMutex); + if (!IsInitial) { + TOwnerData &ownerData = PDisk->OwnerData[Owner]; + if (OwnerVDiskId != TVDiskID::InvalidId && + ownerData.LogRecordsInitiallyRead && + !ownerData.LogRecordsConsequentlyRead) { + TStringStream str; + str << "LogRecordsConsequentlyRead# " << ownerData.LogRecordsConsequentlyRead + << " LogRecordsInitiallyRead# " << ownerData.LogRecordsInitiallyRead; + Y_FAIL_S(str.Str()); + } + // End of log reached + if (OwnerVDiskId != TVDiskID::InvalidId) { + ownerData.HasReadTheWholeLog = true; + } + } + } + + Result->Status = NKikimrProto::OK; + Result->NextPosition = IsInitial ? LastGoodToWriteLogPosition : TLogPosition::Invalid(); + Result->IsEndOfLog = true; + Reply(); +} + +void TLogReader::ReplyOkInTheMiddle() { + Result->Status = NKikimrProto::OK; Result->NextPosition = PDisk->LogPosition(ChunkIdx, SectorIdx, OffsetInSector); - Result->IsEndOfLog = false; - Reply(); -} - -void TLogReader::ReplyError() { - Result->Status = NKikimrProto::ERROR; - Result->NextPosition = TLogPosition::Invalid(); - Result->IsEndOfLog = true; - Reply(); -} - -void TLogReader::Reply() { - Y_VERIFY(!IsReplied.load()); - if (IsInitial) { - PDisk->ProcessChunkOwnerMap(*ChunkOwnerMap.Get()); - ChunkOwnerMap.Destroy(); - } - LOG_DEBUG(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " To ownerId# %" PRIu32 " %s", - (ui32)PDisk->PDiskId, (ui32)Owner, Result->ToString().c_str()); - ActorSystem->Send(ReplyTo, Result.Release()); - if (!IsInitial) { - PDisk->Mon.LogRead.CountResponse(ResultSize); - } - IsReplied.store(true); -} - -bool TLogReader::GetIsReplied() const { - return IsReplied.load(); -} - -bool TLogReader::ProcessNextChunkReference(TSectorData& sector) { - TDiskFormat &format = PDisk->Format; - - TSectorRestorator restorator(true, 0, format.IsErasureEncodeNextChunkReference(), - PDisk->Format, PDisk->ActorSystem, PDisk->PDiskActor, PDisk->PDiskId, &PDisk->Mon, - PDisk->BufferPool.Get()); - restorator.Restore(sector.GetData(), sector.Offset, format.MagicNextLogChunkReference, LastNonce, - PDisk->Cfg->UseT1ha0HashInFooter); - LOG_DEBUG_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() << " ProcessNextChunkReference"); - - if (restorator.LastGoodIdx < ReplicationFactor) { - ui8* const rawSector = sector.GetData() + restorator.LastGoodIdx * format.SectorSize; - TDataSectorFooter *sectorFooter = (TDataSectorFooter*) - (rawSector + format.SectorSize - sizeof(TDataSectorFooter)); - if (sectorFooter->Nonce < LastNonce) { - LOG_DEBUG_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " ProcessNextChunkReference, Nonce reordering" - << " sectorFooter->Nonce# " << sectorFooter->Nonce - << " LastNonce# " << LastNonce); - // This one came unexpectedly out of the blue! - // TODO(cthulhu): Write a unit-test that hits this line. - // Steps to reproduce: - // B - Begin - // C - Continuation - // 123 - Idea of nonce - // Wirte something like B1C2 C3C4 B5C6 B7C8 - // Cut up to B5, cut up to B7, stop - // Backup C3C4 - // Start, write B9C10 C11C12 so that we get B9C10 C11C12 -B5C6- B7C8 - // Stop - // Restore C3C4 and get B9C10 C3C4 -B5C6- B7C8 - // Start + Result->IsEndOfLog = false; + Reply(); +} + +void TLogReader::ReplyError() { + Result->Status = NKikimrProto::ERROR; + Result->NextPosition = TLogPosition::Invalid(); + Result->IsEndOfLog = true; + Reply(); +} + +void TLogReader::Reply() { + Y_VERIFY(!IsReplied.load()); + if (IsInitial) { + PDisk->ProcessChunkOwnerMap(*ChunkOwnerMap.Get()); + ChunkOwnerMap.Destroy(); + } + LOG_DEBUG(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " To ownerId# %" PRIu32 " %s", + (ui32)PDisk->PDiskId, (ui32)Owner, Result->ToString().c_str()); + ActorSystem->Send(ReplyTo, Result.Release()); + if (!IsInitial) { + PDisk->Mon.LogRead.CountResponse(ResultSize); + } + IsReplied.store(true); +} + +bool TLogReader::GetIsReplied() const { + return IsReplied.load(); +} + +bool TLogReader::ProcessNextChunkReference(TSectorData& sector) { + TDiskFormat &format = PDisk->Format; + + TSectorRestorator restorator(true, 0, format.IsErasureEncodeNextChunkReference(), + PDisk->Format, PDisk->ActorSystem, PDisk->PDiskActor, PDisk->PDiskId, &PDisk->Mon, + PDisk->BufferPool.Get()); + restorator.Restore(sector.GetData(), sector.Offset, format.MagicNextLogChunkReference, LastNonce, + PDisk->Cfg->UseT1ha0HashInFooter); + LOG_DEBUG_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() << " ProcessNextChunkReference"); + + if (restorator.LastGoodIdx < ReplicationFactor) { + ui8* const rawSector = sector.GetData() + restorator.LastGoodIdx * format.SectorSize; + TDataSectorFooter *sectorFooter = (TDataSectorFooter*) + (rawSector + format.SectorSize - sizeof(TDataSectorFooter)); + if (sectorFooter->Nonce < LastNonce) { + LOG_DEBUG_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " ProcessNextChunkReference, Nonce reordering" + << " sectorFooter->Nonce# " << sectorFooter->Nonce + << " LastNonce# " << LastNonce); + // This one came unexpectedly out of the blue! + // TODO(cthulhu): Write a unit-test that hits this line. + // Steps to reproduce: + // B - Begin + // C - Continuation + // 123 - Idea of nonce + // Wirte something like B1C2 C3C4 B5C6 B7C8 + // Cut up to B5, cut up to B7, stop + // Backup C3C4 + // Start, write B9C10 C11C12 so that we get B9C10 C11C12 -B5C6- B7C8 + // Stop + // Restore C3C4 and get B9C10 C3C4 -B5C6- B7C8 + // Start ReplyOk(); - return true; - } - - // Decrypt data - Cypher.StartMessage(sectorFooter->Nonce); - Cypher.InplaceEncrypt(rawSector, ui32(format.SectorSize - sizeof(TDataSectorFooter))); - PDisk->CheckLogCanary(rawSector); - - TNextLogChunkReference2 *nextLogChunkReference = (TNextLogChunkReference2*)rawSector; - switch (nextLogChunkReference->Version) { - case PDISK_DATA_VERSION_2: - LastNonce = sectorFooter->Nonce; - break; - case PDISK_DATA_VERSION_3: - default: - auto *nextRef = static_cast<TNextLogChunkReference3*>(nextLogChunkReference); - if (nextRef->NextChunkFirstNonce) { - IsLastRecordHeaderValid = false; - IsLastRecordSkipped = true; - LastRecordData = TString(); - LastRecordDataWritePosition = 0; - LastNonce = nextRef->NextChunkFirstNonce - 1; - OnEndOfSplice = true; - } else { - LastNonce = sectorFooter->Nonce; - OnEndOfSplice = false; - } - - if (nextRef->IsNotCompatible) { - TStringStream ss; - ss << SelfInfo() << " ReplyError: unexpected data version in TNextLogChunkReference, version# " - << (ui32)nextLogChunkReference->Version; - LOG_ERROR_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, ss.Str()); - Result->ErrorReason = ss.Str(); - ReplyError(); - return true; - } - } - ChunkInfo->LastNonce = sectorFooter->Nonce; - MaxNonce = Max(MaxNonce, LastNonce); - - ui32 prevChunkIdx = ChunkIdx; - LOG_DEBUG_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " ProcessNextChunkReference, record is valid" - << " prevChunkIdx# " << prevChunkIdx - << " nextChunkIdx# " << nextLogChunkReference->NextChunk - << " LastNonce# " << LastNonce - << " OnEndOfSplice# " << OnEndOfSplice); - SwitchToChunk(nextLogChunkReference->NextChunk); - if (IsInitial) { - UpdateNewChunkInfo(ChunkIdx, prevChunkIdx); - } - return false; - } else { - // As we always write next chunk reference, the situation we are in is impossible. - LOG_DEBUG_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " ProcessNextChunkReference, nextLogChunkReference not in a valid state"); + return true; + } + + // Decrypt data + Cypher.StartMessage(sectorFooter->Nonce); + Cypher.InplaceEncrypt(rawSector, ui32(format.SectorSize - sizeof(TDataSectorFooter))); + PDisk->CheckLogCanary(rawSector); + + TNextLogChunkReference2 *nextLogChunkReference = (TNextLogChunkReference2*)rawSector; + switch (nextLogChunkReference->Version) { + case PDISK_DATA_VERSION_2: + LastNonce = sectorFooter->Nonce; + break; + case PDISK_DATA_VERSION_3: + default: + auto *nextRef = static_cast<TNextLogChunkReference3*>(nextLogChunkReference); + if (nextRef->NextChunkFirstNonce) { + IsLastRecordHeaderValid = false; + IsLastRecordSkipped = true; + LastRecordData = TString(); + LastRecordDataWritePosition = 0; + LastNonce = nextRef->NextChunkFirstNonce - 1; + OnEndOfSplice = true; + } else { + LastNonce = sectorFooter->Nonce; + OnEndOfSplice = false; + } + + if (nextRef->IsNotCompatible) { + TStringStream ss; + ss << SelfInfo() << " ReplyError: unexpected data version in TNextLogChunkReference, version# " + << (ui32)nextLogChunkReference->Version; + LOG_ERROR_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, ss.Str()); + Result->ErrorReason = ss.Str(); + ReplyError(); + return true; + } + } + ChunkInfo->LastNonce = sectorFooter->Nonce; + MaxNonce = Max(MaxNonce, LastNonce); + + ui32 prevChunkIdx = ChunkIdx; + LOG_DEBUG_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " ProcessNextChunkReference, record is valid" + << " prevChunkIdx# " << prevChunkIdx + << " nextChunkIdx# " << nextLogChunkReference->NextChunk + << " LastNonce# " << LastNonce + << " OnEndOfSplice# " << OnEndOfSplice); + SwitchToChunk(nextLogChunkReference->NextChunk); + if (IsInitial) { + UpdateNewChunkInfo(ChunkIdx, prevChunkIdx); + } + return false; + } else { + // As we always write next chunk reference, the situation we are in is impossible. + LOG_DEBUG_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " ProcessNextChunkReference, nextLogChunkReference not in a valid state"); ReplyOk(); - return true; - } -} - -void TLogReader::UpdateNewChunkInfo(ui32 currChunk, const TMaybe<ui32> prevChunkIdx) { - TGuard<TMutex> guard(PDisk->StateMutex); - if (prevChunkIdx) { - PDisk->ChunkState[*prevChunkIdx].CommitState = TChunkState::LOG_COMMITTED; - } - - TChunkState& state = PDisk->ChunkState[currChunk]; - if (IsOwnerUser(state.OwnerId)) { - LOG_WARN_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " chunk will be treated as log chunk, but in ChunkState is marked as owned by user" - << " ChunkState# " << state.ToString()); - } - state.CommitState = TChunkState::LOG_RESERVED; - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() << " chunk is the next log chunk," - << " prevOwnerId# " << ui32(state.OwnerId) << " -> newOwnerId# " << ui32(OwnerSystem)); - state.OwnerId = OwnerSystem; - state.PreviousNonce = LastNonce; -} - -void TLogReader::SwitchToChunk(ui32 chunkIdx) { - ChunkIdx = chunkIdx; - SectorIdx = 0; - OffsetInSector = 0; - if (!ParseCommits) { - ParseCommits = PDisk->FirstLogChunkToParseCommits == chunkIdx; - } -} - -void TLogReader::ScheduleReadAsync(TSectorData §or, ui32 sectorsToRead) { - TDiskFormat &format = PDisk->Format; - sector.Prepare(sectorsToRead * format.SectorSize); - TCompletionLogReadPart *completion = new TCompletionLogReadPart(this, sector.Offset); - Sector->DataByOffset(sector.Offset)->IsScheduled = true; - ui32 sizeToRead = format.SectorSize * sectorsToRead; - completion->CostNs = PDisk->DriveModel.TimeForSizeNs(sizeToRead, sector.Offset / format.ChunkSize, - TDriveModel::OP_TYPE_READ); - PDisk->BlockDevice->CachedPreadAsync(sector.Buffer->Data(), sizeToRead, sector.Offset, completion, ReqId, {}); -} - -bool TLogReader::RegisterBadOffsets(TVector<ui64> &badOffsets) { - BadOffsets.insert(BadOffsets.end(), badOffsets.begin(), badOffsets.end()); - Sort(BadOffsets.begin(), BadOffsets.end()); - TDiskFormat &format = PDisk->Format; - - TVector<ui64>::iterator it = BadOffsets.begin(); - if (it != BadOffsets.end()) { - ui32 erasureSectors = PDisk->UsableSectorsPerLogChunk(); - Y_UNUSED(erasureSectors); - - ui64 prevOffset = *it; - ui32 prevChunk = ui32(prevOffset / format.ChunkSize); - ui64 prevSector = (prevOffset - ui64(prevChunk) * ui64(format.ChunkSize)) / ui64(format.SectorSize); - ui32 prevErasurePartSetBads = 1; + return true; + } +} + +void TLogReader::UpdateNewChunkInfo(ui32 currChunk, const TMaybe<ui32> prevChunkIdx) { + TGuard<TMutex> guard(PDisk->StateMutex); + if (prevChunkIdx) { + PDisk->ChunkState[*prevChunkIdx].CommitState = TChunkState::LOG_COMMITTED; + } + + TChunkState& state = PDisk->ChunkState[currChunk]; + if (IsOwnerUser(state.OwnerId)) { + LOG_WARN_S(*PDisk->ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " chunk will be treated as log chunk, but in ChunkState is marked as owned by user" + << " ChunkState# " << state.ToString()); + } + state.CommitState = TChunkState::LOG_RESERVED; + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() << " chunk is the next log chunk," + << " prevOwnerId# " << ui32(state.OwnerId) << " -> newOwnerId# " << ui32(OwnerSystem)); + state.OwnerId = OwnerSystem; + state.PreviousNonce = LastNonce; +} + +void TLogReader::SwitchToChunk(ui32 chunkIdx) { + ChunkIdx = chunkIdx; + SectorIdx = 0; + OffsetInSector = 0; + if (!ParseCommits) { + ParseCommits = PDisk->FirstLogChunkToParseCommits == chunkIdx; + } +} + +void TLogReader::ScheduleReadAsync(TSectorData §or, ui32 sectorsToRead) { + TDiskFormat &format = PDisk->Format; + sector.Prepare(sectorsToRead * format.SectorSize); + TCompletionLogReadPart *completion = new TCompletionLogReadPart(this, sector.Offset); + Sector->DataByOffset(sector.Offset)->IsScheduled = true; + ui32 sizeToRead = format.SectorSize * sectorsToRead; + completion->CostNs = PDisk->DriveModel.TimeForSizeNs(sizeToRead, sector.Offset / format.ChunkSize, + TDriveModel::OP_TYPE_READ); + PDisk->BlockDevice->CachedPreadAsync(sector.Buffer->Data(), sizeToRead, sector.Offset, completion, ReqId, {}); +} + +bool TLogReader::RegisterBadOffsets(TVector<ui64> &badOffsets) { + BadOffsets.insert(BadOffsets.end(), badOffsets.begin(), badOffsets.end()); + Sort(BadOffsets.begin(), BadOffsets.end()); + TDiskFormat &format = PDisk->Format; + + TVector<ui64>::iterator it = BadOffsets.begin(); + if (it != BadOffsets.end()) { + ui32 erasureSectors = PDisk->UsableSectorsPerLogChunk(); + Y_UNUSED(erasureSectors); + + ui64 prevOffset = *it; + ui32 prevChunk = ui32(prevOffset / format.ChunkSize); + ui64 prevSector = (prevOffset - ui64(prevChunk) * ui64(format.ChunkSize)) / ui64(format.SectorSize); + ui32 prevErasurePartSetBads = 1; ui64 prevErasurePartSet = prevSector; - if (prevSector >= erasureSectors) { - prevErasurePartSet = (ui64)-1; - } - - for (++it; it != BadOffsets.end(); ++it) { - ui64 currOffset = *it; - ui32 currChunk = ui32(currOffset / ui64(format.ChunkSize)); - ui64 currSector = (currOffset - ui64(currChunk) * ui64(format.ChunkSize)) / ui64(format.SectorSize); - if (currChunk != prevChunk) { - prevChunk = currChunk; - prevErasurePartSetBads = 0; - } else { - if (prevSector == currSector) { - continue; - } - } - prevSector = currSector; - if (currSector < erasureSectors) { + if (prevSector >= erasureSectors) { + prevErasurePartSet = (ui64)-1; + } + + for (++it; it != BadOffsets.end(); ++it) { + ui64 currOffset = *it; + ui32 currChunk = ui32(currOffset / ui64(format.ChunkSize)); + ui64 currSector = (currOffset - ui64(currChunk) * ui64(format.ChunkSize)) / ui64(format.SectorSize); + if (currChunk != prevChunk) { + prevChunk = currChunk; + prevErasurePartSetBads = 0; + } else { + if (prevSector == currSector) { + continue; + } + } + prevSector = currSector; + if (currSector < erasureSectors) { ui32 currErasurePartSet = currSector; - if (currErasurePartSet == prevErasurePartSet) { - ++prevErasurePartSetBads; - if (prevErasurePartSetBads > 1) { - // Report unrecoverable error - return false; - } - } else { - prevErasurePartSet = currErasurePartSet; - prevErasurePartSetBads = 1; - } - } else { - if (prevErasurePartSet == (ui64)-1) { - ++prevErasurePartSetBads; - if (prevErasurePartSetBads >= ReplicationFactor) { - // Report unrecoverable error - return false; - } - } else { - prevErasurePartSet = (ui64)-1; - prevErasurePartSetBads = 1; - } - } - } - } - return true; -} - -void TLogReader::ReleaseUsedBadOffsets() { + if (currErasurePartSet == prevErasurePartSet) { + ++prevErasurePartSetBads; + if (prevErasurePartSetBads > 1) { + // Report unrecoverable error + return false; + } + } else { + prevErasurePartSet = currErasurePartSet; + prevErasurePartSetBads = 1; + } + } else { + if (prevErasurePartSet == (ui64)-1) { + ++prevErasurePartSetBads; + if (prevErasurePartSetBads >= ReplicationFactor) { + // Report unrecoverable error + return false; + } + } else { + prevErasurePartSet = (ui64)-1; + prevErasurePartSetBads = 1; + } + } + } + } + return true; +} + +void TLogReader::ReleaseUsedBadOffsets() { ui64 firstSectorToKeep = SectorIdx; - ui32 erasureSectors = PDisk->UsableSectorsPerLogChunk(); - if (SectorIdx >= erasureSectors) { - firstSectorToKeep = erasureSectors; - } - TDiskFormat &format = PDisk->Format; - ui64 offset = format.Offset(ChunkIdx, firstSectorToKeep); - TVector<ui64>::iterator lowerBound = LowerBound(BadOffsets.begin(), BadOffsets.end(), offset); - if (lowerBound != BadOffsets.begin()) { - BadOffsets.erase(BadOffsets.begin(), lowerBound); - } -} - -} // NPDisk -} // NKikimr - + ui32 erasureSectors = PDisk->UsableSectorsPerLogChunk(); + if (SectorIdx >= erasureSectors) { + firstSectorToKeep = erasureSectors; + } + TDiskFormat &format = PDisk->Format; + ui64 offset = format.Offset(ChunkIdx, firstSectorToKeep); + TVector<ui64>::iterator lowerBound = LowerBound(BadOffsets.begin(), BadOffsets.end(), offset); + if (lowerBound != BadOffsets.begin()) { + BadOffsets.erase(BadOffsets.begin(), lowerBound); + } +} + +} // NPDisk +} // NKikimr + diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader.h index 5987bbc9d8..a95ff19be1 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader.h @@ -1,17 +1,17 @@ #pragma once #include "defs.h" -#include "blobstorage_pdisk_state.h" -#include "blobstorage_pdisk_logreader_base.h" - +#include "blobstorage_pdisk_state.h" +#include "blobstorage_pdisk_logreader_base.h" + namespace NKikimr { namespace NPDisk { class TCompletionLogReadPart : public TCompletionAction { -private: +private: TIntrusivePtr<TLogReaderBase> Reader; ui64 Offset; - + public: TCompletionLogReadPart(const TIntrusivePtr<TLogReaderBase> &reader, ui64 offset) @@ -30,46 +30,46 @@ public: } }; -class TPDisk; +class TPDisk; class TLogReader : public TLogReaderBase { static constexpr ui32 BufferSizeSectors = 105; - struct TSectorData; - class TDoubleBuffer; - - enum class ELogReaderState { - PrepareToRead, - NewLogChunk, - ScheduleForwardReads, - ProcessAlreadyReadSectors, - TryProceedNextChunk, - }; - - bool IsInitial; + struct TSectorData; + class TDoubleBuffer; + + enum class ELogReaderState { + PrepareToRead, + NewLogChunk, + ScheduleForwardReads, + ProcessAlreadyReadSectors, + TryProceedNextChunk, + }; + + bool IsInitial; TPDisk * const PDisk; TActorSystem * const ActorSystem; const TActorId ReplyTo; - TOwner Owner; - TLogPosition OwnerLogStartPosition; - TLogPosition Position; + TOwner Owner; + TLogPosition OwnerLogStartPosition; + TLogPosition Position; ui64 SizeLimit; - THolder<TEvReadLogResult> Result; + THolder<TEvReadLogResult> Result; TLogChunkInfo *ChunkInfo; - THolder<TDoubleBuffer> Sector; + THolder<TDoubleBuffer> Sector; THolder<TMap<ui32, TChunkState>> ChunkOwnerMap; - ELogReaderState State; - std::atomic<bool> IsReplied; + ELogReaderState State; + std::atomic<bool> IsReplied; - TLogPosition LastGoodToWriteLogPosition; + TLogPosition LastGoodToWriteLogPosition; ui64 MaxNonce; ui64 LastNonce; ui64 LastDataNonce; - bool OnEndOfSplice; - TPDiskStreamCypher Cypher; + bool OnEndOfSplice; + TPDiskStreamCypher Cypher; ui32 OffsetInSector; bool SetLastGoodToWritePosition; ui32 ChunkIdx; @@ -96,41 +96,41 @@ class TLogReader : public TLogReaderBase { TVector<ui64> BadOffsets; TMutex ExecMutex; - ui32 ErasurePartCount; - bool ParseCommits; - + ui32 ErasurePartCount; + bool ParseCommits; + public: - TLogReader(bool isInitial, TPDisk *pDisk, TActorSystem * const actorSystem, const TActorId &replyTo, TOwner owner, - TLogPosition ownerLogStartPosition, EOwnerGroupType ownerGroupType, TLogPosition position, ui64 sizeLimit, - ui64 lastNonce, ui32 logEndChunkIdx, ui64 logEndSectorIdx, TReqId reqId, - TVector<TChunkIdx> &&chunksToRead, ui64 firstLsnToKeep, ui64 firstNonceToKeep, TVDiskID ownerVDiskId); - - virtual ~TLogReader(); - - void Exec(ui64 offsetRead, TVector<ui64> &badOffsets, TActorSystem *actorSystem) override; - -private: - TString SelfInfo(); - bool PrepareToRead(); - void ScheduleForward(ui64 offset); - ui64 BufferIdxFromOffset(ui64 offset); - void UpdateLastGoodToWritePosition(); - void LogRawData(ui8* data, ui64 size, TString info); - void ProcessLogPageTerminator(ui8 *data, ui32 sectorPayloadSize); + TLogReader(bool isInitial, TPDisk *pDisk, TActorSystem * const actorSystem, const TActorId &replyTo, TOwner owner, + TLogPosition ownerLogStartPosition, EOwnerGroupType ownerGroupType, TLogPosition position, ui64 sizeLimit, + ui64 lastNonce, ui32 logEndChunkIdx, ui64 logEndSectorIdx, TReqId reqId, + TVector<TChunkIdx> &&chunksToRead, ui64 firstLsnToKeep, ui64 firstNonceToKeep, TVDiskID ownerVDiskId); + + virtual ~TLogReader(); + + void Exec(ui64 offsetRead, TVector<ui64> &badOffsets, TActorSystem *actorSystem) override; + +private: + TString SelfInfo(); + bool PrepareToRead(); + void ScheduleForward(ui64 offset); + ui64 BufferIdxFromOffset(ui64 offset); + void UpdateLastGoodToWritePosition(); + void LogRawData(ui8* data, ui64 size, TString info); + void ProcessLogPageTerminator(ui8 *data, ui32 sectorPayloadSize); void ProcessLogPageNonceJump2(ui8 *data, const ui64 previousNonce, const ui64 previousDataNonce); void ProcessLogPageNonceJump1(ui8 *data, const ui64 previousNonce); - bool ProcessSectorSet(TSectorData *sector); + bool ProcessSectorSet(TSectorData *sector); void ReplyOk(); - void ReplyOkInTheMiddle(); - void ReplyError(); - void Reply(); - bool GetIsReplied() const override; - bool ProcessNextChunkReference(TSectorData& sector); - void UpdateNewChunkInfo(ui32 currChunk, const TMaybe<ui32> prevChunkIdx); - void SwitchToChunk(ui32 chunkIdx); - void ScheduleReadAsync(TSectorData §or, ui32 sectorsToRead); - bool RegisterBadOffsets(TVector<ui64> &badOffsets); - void ReleaseUsedBadOffsets(); + void ReplyOkInTheMiddle(); + void ReplyError(); + void Reply(); + bool GetIsReplied() const override; + bool ProcessNextChunkReference(TSectorData& sector); + void UpdateNewChunkInfo(ui32 currChunk, const TMaybe<ui32> prevChunkIdx); + void SwitchToChunk(ui32 chunkIdx); + void ScheduleReadAsync(TSectorData §or, ui32 sectorsToRead); + bool RegisterBadOffsets(TVector<ui64> &badOffsets); + void ReleaseUsedBadOffsets(); }; } // NPDisk diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader_base.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader_base.h index 5a031e0841..f1903220ef 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader_base.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_logreader_base.h @@ -1,14 +1,14 @@ -#pragma once - -#include "defs.h" - -namespace NKikimr::NPDisk { - -class TLogReaderBase : public TThrRefBase { -public: - virtual void Exec(ui64 offsetRead, TVector<ui64> &badOffsets, TActorSystem *actorSystem) = 0; - virtual ~TLogReaderBase() {} - virtual bool GetIsReplied() const = 0; -}; - -} // namespace NKikimr::NPDisk +#pragma once + +#include "defs.h" + +namespace NKikimr::NPDisk { + +class TLogReaderBase : public TThrRefBase { +public: + virtual void Exec(ui64 offsetRead, TVector<ui64> &badOffsets, TActorSystem *actorSystem) = 0; + virtual ~TLogReaderBase() {} + virtual bool GetIsReplied() const = 0; +}; + +} // namespace NKikimr::NPDisk diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_mon.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_mon.cpp index b11e367c58..a514e261ab 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_mon.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_mon.cpp @@ -5,7 +5,7 @@ namespace NKikimr { TPDiskMon::TPDiskMon(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, ui32 pDiskId, - TPDiskConfig *cfg) + TPDiskConfig *cfg) : Counters(counters) , PDiskId(pDiskId) , ChunksGroup(Counters->GetSubgroup("subsystem", "chunks")) @@ -22,9 +22,9 @@ TPDiskMon::TPDiskMon(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counter LogChunks = ChunksGroup->GetCounter("LogChunks"); UncommitedDataChunks = ChunksGroup->GetCounter("UncommitedDataChunks"); CommitedDataChunks = ChunksGroup->GetCounter("CommitedDataChunks"); - LockedChunks = ChunksGroup->GetCounter("LockedChunks"); - QuarantineChunks = ChunksGroup->GetCounter("QuarantineChunks"); - QuarantineOwners = ChunksGroup->GetCounter("QuarantineOwners"); + LockedChunks = ChunksGroup->GetCounter("LockedChunks"); + QuarantineChunks = ChunksGroup->GetCounter("QuarantineChunks"); + QuarantineOwners = ChunksGroup->GetCounter("QuarantineOwners"); // stats subgroup StatsGroup = (cfg && cfg->PDiskCategory.IsSolidState()) @@ -34,31 +34,31 @@ TPDiskMon::TPDiskMon(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counter FreeSpacePerMile = StatsGroup->GetCounter("FreeSpacePerMile"); UsedSpacePerMile = StatsGroup->GetCounter("UsedSpacePerMile"); - SplicedLogChunks = StatsGroup->GetCounter("SplicedLogChunks", true); + SplicedLogChunks = StatsGroup->GetCounter("SplicedLogChunks", true); + + TotalSpaceBytes = StatsGroup->GetCounter("TotalSpaceBytes"); + FreeSpaceBytes = StatsGroup->GetCounter("FreeSpaceBytes"); + UsedSpaceBytes = StatsGroup->GetCounter("UsedSpaceBytes"); + SectorMapAllocatedBytes = StatsGroup->GetCounter("SectorMapAllocatedBytes"); - TotalSpaceBytes = StatsGroup->GetCounter("TotalSpaceBytes"); - FreeSpaceBytes = StatsGroup->GetCounter("FreeSpaceBytes"); - UsedSpaceBytes = StatsGroup->GetCounter("UsedSpaceBytes"); - SectorMapAllocatedBytes = StatsGroup->GetCounter("SectorMapAllocatedBytes"); - // states subgroup PDiskState = StateGroup->GetCounter("PDiskState"); - PDiskBriefState = StateGroup->GetCounter("PDiskBriefState"); - PDiskDetailedState = StateGroup->GetCounter("PDiskDetailedState"); - AtLeastOneVDiskNotLogged = StateGroup->GetCounter("AtLeastOneVDiskNotLogged"); - TooMuchLogChunks = StateGroup->GetCounter("TooMuchLogChunks"); - SerialNumberMismatched = StateGroup->GetCounter("SerialNumberMismatched"); + PDiskBriefState = StateGroup->GetCounter("PDiskBriefState"); + PDiskDetailedState = StateGroup->GetCounter("PDiskDetailedState"); + AtLeastOneVDiskNotLogged = StateGroup->GetCounter("AtLeastOneVDiskNotLogged"); + TooMuchLogChunks = StateGroup->GetCounter("TooMuchLogChunks"); + SerialNumberMismatched = StateGroup->GetCounter("SerialNumberMismatched"); L6. Initialize(StateGroup, "L6"); - L7. Initialize(StateGroup, "L7"); - IdleLight.Initialize(StateGroup, "DeviceBusyPeriods", "DeviceIdleTimeMsPerSec", "DeviceBusyTimeMsPerSec"); - - OwnerIdsIssued = StateGroup->GetCounter("OwnerIdsIssued"); - LastOwnerId = StateGroup->GetCounter("LastOwnerId"); - PendingYardInits = StateGroup->GetCounter("PendingYardInits"); - - SeqnoL6 = 0; - LastDoneOperationTimestamp = 0; - + L7. Initialize(StateGroup, "L7"); + IdleLight.Initialize(StateGroup, "DeviceBusyPeriods", "DeviceIdleTimeMsPerSec", "DeviceBusyTimeMsPerSec"); + + OwnerIdsIssued = StateGroup->GetCounter("OwnerIdsIssued"); + LastOwnerId = StateGroup->GetCounter("LastOwnerId"); + PendingYardInits = StateGroup->GetCounter("PendingYardInits"); + + SeqnoL6 = 0; + LastDoneOperationTimestamp = 0; + // device subgroup DeviceBytesRead = DeviceGroup->GetCounter("DeviceBytesRead", true); DeviceBytesWritten = DeviceGroup->GetCounter("DeviceBytesWritten", true); @@ -69,33 +69,33 @@ TPDiskMon::TPDiskMon(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counter DeviceInFlightReads = DeviceGroup->GetCounter("DeviceInFlightReads"); DeviceInFlightWrites = DeviceGroup->GetCounter("DeviceInFlightWrites"); DeviceTakeoffs = DeviceGroup->GetCounter("DeviceTakeoffs"); - DeviceLandings = DeviceGroup->GetCounter("DeviceLandings"); + DeviceLandings = DeviceGroup->GetCounter("DeviceLandings"); DeviceHaltDetected = DeviceGroup->GetCounter("DeviceHaltDetected"); DeviceExpectedSeeks = DeviceGroup->GetCounter("DeviceExpectedSeeks", true); DeviceReadCacheHits = DeviceGroup->GetCounter("DeviceReadCacheHits", true); DeviceReadCacheMisses = DeviceGroup->GetCounter("DeviceReadCacheMisses", true); DeviceWriteCacheIsValid = DeviceGroup->GetCounter("DeviceWriteCacheIsValid"); DeviceWriteCacheIsEnabled = DeviceGroup->GetCounter("DeviceWriteCacheIsEnabled"); - DeviceOperationPoolTotalAllocations = DeviceGroup->GetCounter("DeviceOperationPoolTotalAllocations"); - DeviceOperationPoolFreeObjectsMin = DeviceGroup->GetCounter("DeviceOperationPoolFreeObjectsMin"); - DeviceBufferPoolFailedAllocations = DeviceGroup->GetCounter("DeviceBufferPoolFailedAllocations"); - DeviceErasureSectorRestorations = DeviceGroup->GetCounter("DeviceErasureSectorRestorations"); + DeviceOperationPoolTotalAllocations = DeviceGroup->GetCounter("DeviceOperationPoolTotalAllocations"); + DeviceOperationPoolFreeObjectsMin = DeviceGroup->GetCounter("DeviceOperationPoolFreeObjectsMin"); + DeviceBufferPoolFailedAllocations = DeviceGroup->GetCounter("DeviceBufferPoolFailedAllocations"); + DeviceErasureSectorRestorations = DeviceGroup->GetCounter("DeviceErasureSectorRestorations"); DeviceEstimatedCostNs = DeviceGroup->GetCounter("DeviceEstimatedCostNs", true); DeviceActualCostNs = DeviceGroup->GetCounter("DeviceActualCostNs", true); DeviceOverestimationRatio = DeviceGroup->GetCounter("DeviceOverestimationRatio"); DeviceNonperformanceMs = DeviceGroup->GetCounter("DeviceNonperformanceMs"); DeviceInterruptedSystemCalls = DeviceGroup->GetCounter("DeviceInterruptedSystemCalls", true); - DeviceSubmitThreadBusyTimeNs = DeviceGroup->GetCounter("DeviceSubmitThreadBusyTimeNs", true); - DeviceCompletionThreadBusyTimeNs = DeviceGroup->GetCounter("DeviceCompletionThreadBusyTimeNs", true); - DeviceIoErrors = DeviceGroup->GetCounter("DeviceIoErrors", true); + DeviceSubmitThreadBusyTimeNs = DeviceGroup->GetCounter("DeviceSubmitThreadBusyTimeNs", true); + DeviceCompletionThreadBusyTimeNs = DeviceGroup->GetCounter("DeviceCompletionThreadBusyTimeNs", true); + DeviceIoErrors = DeviceGroup->GetCounter("DeviceIoErrors", true); + + UpdateDurationTracker.SetCounter(DeviceGroup->GetCounter("PDiskThreadBusyTimeNs", true)); - UpdateDurationTracker.SetCounter(DeviceGroup->GetCounter("PDiskThreadBusyTimeNs", true)); - // queue subgroup QueueRequests = QueueGroup->GetCounter("QueueRequests", true); QueueBytes = QueueGroup->GetCounter("QueueBytes", true); - auto deviceType = cfg ? cfg->PDiskCategory.Type() : TPDiskCategory::DEVICE_TYPE_UNKNOWN; + auto deviceType = cfg ? cfg->PDiskCategory.Type() : TPDiskCategory::DEVICE_TYPE_UNKNOWN; // scheduler subgroup ForsetiCbsNotFound = SchedulerGroup->GetCounter("ForsetiCbsNotFound"); @@ -106,11 +106,11 @@ TPDiskMon::TPDiskMon(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counter percentiles.push_back(0.99f); percentiles.push_back(1.00f); - UpdateDurationTracker.UpdateCycleTime.Initialize(counters, "subsystem", "updateCycle", "Time in millisec", percentiles); + UpdateDurationTracker.UpdateCycleTime.Initialize(counters, "subsystem", "updateCycle", "Time in millisec", percentiles); - DeviceReadDuration.Initialize(counters, "deviceReadDuration", deviceType); - DeviceWriteDuration.Initialize(counters, "deviceWriteDuration", deviceType); - DeviceTrimDuration.Initialize(counters, "deviceTrimDuration", deviceType); + DeviceReadDuration.Initialize(counters, "deviceReadDuration", deviceType); + DeviceWriteDuration.Initialize(counters, "deviceWriteDuration", deviceType); + DeviceTrimDuration.Initialize(counters, "deviceTrimDuration", deviceType); LogQueueTime.Initialize(counters, "subsystem", "logQueueTime", "Time in millisec", percentiles); GetQueueSyncLog.Initialize(counters, "subsystem", "getQueueSyncLog", "Time in millisec", percentiles); @@ -146,19 +146,19 @@ TPDiskMon::TPDiskMon(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counter WriteHullHugeSizeBytes.Initialize(counters, "subsystem", "writeHullHugeSize", "Size in bytes", percentiles); WriteHullCompSizeBytes.Initialize(counters, "subsystem", "writeHullCompSize", "Size in bytes", percentiles); - LogResponseTime.Initialize(counters, "logresponse", deviceType); - GetResponseSyncLog.Initialize(counters, "getResponseSyncLog", deviceType); + LogResponseTime.Initialize(counters, "logresponse", deviceType); + GetResponseSyncLog.Initialize(counters, "getResponseSyncLog", deviceType); - GetResponseHullComp.Initialize(counters, "getResponseHullComp", deviceType); - GetResponseHullOnlineRt.Initialize(counters, "getResponseHullOnlineRt", deviceType); - GetResponseHullOnlineOther.Initialize(counters, "getResponseHullOnlineOther", deviceType); - GetResponseHullLoad.Initialize(counters, "getResponseHullLoad", deviceType); - GetResponseHullLow.Initialize(counters, "getResponseHullLow", deviceType); + GetResponseHullComp.Initialize(counters, "getResponseHullComp", deviceType); + GetResponseHullOnlineRt.Initialize(counters, "getResponseHullOnlineRt", deviceType); + GetResponseHullOnlineOther.Initialize(counters, "getResponseHullOnlineOther", deviceType); + GetResponseHullLoad.Initialize(counters, "getResponseHullLoad", deviceType); + GetResponseHullLow.Initialize(counters, "getResponseHullLow", deviceType); - WriteResponseSyncLog.Initialize(counters, "writeResponseSyncLog", deviceType); - WriteResponseHullFresh.Initialize(counters, "writeResponseHullFresh", deviceType); - WriteResponseHullHuge.Initialize(counters, "writeResponseHullHuge", deviceType); - WriteResponseHullComp.Initialize(counters, "writeResponseHullComp", deviceType); + WriteResponseSyncLog.Initialize(counters, "writeResponseSyncLog", deviceType); + WriteResponseHullFresh.Initialize(counters, "writeResponseHullFresh", deviceType); + WriteResponseHullHuge.Initialize(counters, "writeResponseHullHuge", deviceType); + WriteResponseHullComp.Initialize(counters, "writeResponseHullComp", deviceType); // bandwidth BandwidthPLogPayload = BandwidthGroup->GetCounter("Bandwidth/PDisk/Log/Payload", true); @@ -185,10 +185,10 @@ TPDiskMon::TPDiskMon(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counter // pdisk (interface) YardInit.Setup(PDiskGroup, "YardInit"); - CheckSpace.Setup(PDiskGroup, "YardCheckSpace"); + CheckSpace.Setup(PDiskGroup, "YardCheckSpace"); YardConfigureScheduler.Setup(PDiskGroup, "YardConfigureScheduler"); - ChunkReserve.Setup(PDiskGroup, "YardChunkReserve"); - Harakiri.Setup(PDiskGroup, "YardHarakiri"); + ChunkReserve.Setup(PDiskGroup, "YardChunkReserve"); + Harakiri.Setup(PDiskGroup, "YardHarakiri"); YardSlay.Setup(PDiskGroup, "YardSlay"); YardControl.Setup(PDiskGroup, "YardControl"); @@ -209,13 +209,13 @@ TPDiskMon::TPDiskMon(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counter WriteLog.Setup(PDiskGroup, "WriteLog"); WriteHugeLog.Setup(PDiskGroup, "WriteHugeLog"); - LogRead.Setup(PDiskGroup, "ReadLog"); - - PDiskThreadCPU = PDiskGroup->GetCounter("PDiskThreadCPU", true); - SubmitThreadCPU = PDiskGroup->GetCounter("SubmitThreadCPU", true); - GetThreadCPU = PDiskGroup->GetCounter("GetThreadCPU", true); - TrimThreadCPU = PDiskGroup->GetCounter("TrimThreadCPU", true); - CompletionThreadCPU = PDiskGroup->GetCounter("CompletionThreadCPU", true); + LogRead.Setup(PDiskGroup, "ReadLog"); + + PDiskThreadCPU = PDiskGroup->GetCounter("PDiskThreadCPU", true); + SubmitThreadCPU = PDiskGroup->GetCounter("SubmitThreadCPU", true); + GetThreadCPU = PDiskGroup->GetCounter("GetThreadCPU", true); + TrimThreadCPU = PDiskGroup->GetCounter("TrimThreadCPU", true); + CompletionThreadCPU = PDiskGroup->GetCounter("CompletionThreadCPU", true); } NMonitoring::TDynamicCounters::TCounterPtr TPDiskMon::GetBusyPeriod(const TString& owner, const TString& queue) { @@ -262,7 +262,7 @@ void TPDiskMon::IncrementQueueTime(ui8 priorityClass, size_t timeMs) { } } -void TPDiskMon::IncrementResponseTime(ui8 priorityClass, double timeMs, size_t sizeBytes) { +void TPDiskMon::IncrementResponseTime(ui8 priorityClass, double timeMs, size_t sizeBytes) { switch (priorityClass) { case NPriRead::SyncLog: GetResponseSyncLog.Increment(timeMs); @@ -314,7 +314,7 @@ void TPDiskMon::IncrementResponseTime(ui8 priorityClass, double timeMs, size_t s void TPDiskMon::UpdatePercentileTrackers() { - UpdateDurationTracker.UpdateCycleTime.Update(); + UpdateDurationTracker.UpdateCycleTime.Update(); LogOperationSizeBytes.Update(); @@ -352,22 +352,22 @@ void TPDiskMon::UpdatePercentileTrackers() { } void TPDiskMon::UpdateLights() { - if (HPSecondsFloat(std::abs(HPNow() - AtomicGet(LastDoneOperationTimestamp))) > 15.0) { - auto seqnoL6 = AtomicGetAndIncrement(SeqnoL6); - L6.Set(false, seqnoL6); - } - + if (HPSecondsFloat(std::abs(HPNow() - AtomicGet(LastDoneOperationTimestamp))) > 15.0) { + auto seqnoL6 = AtomicGetAndIncrement(SeqnoL6); + L6.Set(false, seqnoL6); + } + L6. Update(); - L7. Update(); - IdleLight.Update(); -} + L7. Update(); + IdleLight.Update(); +} -bool TPDiskMon::UpdateDeviceHaltCounters() { - NHPTimer::STime hpNow = HPNow(); - if (*DeviceTakeoffs != *DeviceLandings) { +bool TPDiskMon::UpdateDeviceHaltCounters() { + NHPTimer::STime hpNow = HPNow(); + if (*DeviceTakeoffs != *DeviceLandings) { // Halt? if (*DeviceTakeoffs == LastHaltDeviceTakeoffs && - *DeviceLandings == LastHaltDeviceLandings && + *DeviceLandings == LastHaltDeviceLandings && LastHaltDeviceLandings != LastHaltDeviceTakeoffs) { // Halt! if (hpNow > LastHaltTimestamp) { @@ -375,19 +375,19 @@ bool TPDiskMon::UpdateDeviceHaltCounters() { if (haltDuration > 7.5) { *DeviceHaltDetected = 1; } - if (haltDuration >= 60.0) { - return true; - } + if (haltDuration >= 60.0) { + return true; + } } else { LastHaltTimestamp = hpNow; } } else { LastHaltDeviceTakeoffs = *DeviceTakeoffs; - LastHaltDeviceLandings = *DeviceLandings; + LastHaltDeviceLandings = *DeviceLandings; LastHaltTimestamp = hpNow; } } - return false; + return false; } void TPDiskMon::UpdateStats() { diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_mon.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_mon.h index e321f9602d..38a0f73b7c 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_mon.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_mon.h @@ -1,19 +1,19 @@ #pragma once - + #include <ydb/core/blobstorage/base/common_latency_hist_bounds.h> #include <ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.h> #include <ydb/core/mon/mon.h> #include <ydb/core/protos/node_whiteboard.pb.h> - -#include <library/cpp/bucket_quoter/bucket_quoter.h> -#include <library/cpp/containers/stack_vector/stack_vec.h> -#include <library/cpp/monlib/dynamic_counters/percentile/percentile_lg.h> - +#include <library/cpp/bucket_quoter/bucket_quoter.h> +#include <library/cpp/containers/stack_vector/stack_vec.h> +#include <library/cpp/monlib/dynamic_counters/percentile/percentile_lg.h> + + namespace NKikimr { -struct TPDiskConfig; - +struct TPDiskConfig; + inline NHPTimer::STime HPNow() { NHPTimer::STime ret; GetTimeFast(&ret); @@ -28,28 +28,28 @@ inline double HPSecondsFloat(i64 cycles) { } } -inline double HPMilliSecondsFloat(i64 cycles) { +inline double HPMilliSecondsFloat(i64 cycles) { if (cycles > 0) { - return double(cycles) * 1000.0 / NHPTimer::GetClockRate(); + return double(cycles) * 1000.0 / NHPTimer::GetClockRate(); } else { return 0; } } -inline ui64 HPMilliSeconds(i64 cycles) { - return (ui64)HPMilliSecondsFloat(cycles); -} - -inline ui64 HPMicroSecondsFloat(i64 cycles) { +inline ui64 HPMilliSeconds(i64 cycles) { + return (ui64)HPMilliSecondsFloat(cycles); +} + +inline ui64 HPMicroSecondsFloat(i64 cycles) { if (cycles > 0) { - return double(cycles) * 1000000.0 / NHPTimer::GetClockRate(); + return double(cycles) * 1000000.0 / NHPTimer::GetClockRate(); } else { return 0; } } inline ui64 HPMicroSeconds(i64 cycles) { - return (ui64)HPMicroSecondsFloat(cycles); + return (ui64)HPMicroSecondsFloat(cycles); } inline ui64 HPNanoSeconds(i64 cycles) { @@ -96,15 +96,15 @@ public: AdvancedTill = Now(); } - void Initialize(TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, const TString& countName, - const TString& redMsName,const TString& greenMsName) { - Count = counters->GetCounter(countName, true); - RedMs = counters->GetCounter(redMsName, true); - GreenMs = counters->GetCounter(greenMsName, true); - UpdateThreshold = HPCyclesMs(100); - AdvancedTill = Now(); - } - + void Initialize(TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, const TString& countName, + const TString& redMsName,const TString& greenMsName) { + Count = counters->GetCounter(countName, true); + RedMs = counters->GetCounter(redMsName, true); + GreenMs = counters->GetCounter(greenMsName, true); + UpdateThreshold = HPCyclesMs(100); + AdvancedTill = Now(); + } + ui64 GetCount() const { return *Count; } @@ -119,16 +119,16 @@ public: protected: void Modify(bool state, bool prevState) { if (state && !prevState) { // Switched to ON state - if (State) { - *State = true; - } + if (State) { + *State = true; + } (*Count)++; return; } if (!state && prevState) { // Switched to OFF state - if (State) { - *State = false; - } + if (State) { + *State = false; + } return; } } @@ -304,176 +304,176 @@ public: } }; -class THistogram { -private: - NMonitoring::THistogramPtr Histo; - -public: - void Initialize(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, - const TString &name, TPDiskCategory::EDeviceType deviceType) { - TString histName = name + "Ms"; - // Histogram backets in milliseconds - auto h = NMonitoring::ExplicitHistogram(GetCommonLatencyHistBounds(deviceType)); - Histo = counters->GetNamedHistogram("sensor", histName, std::move(h)); - } - - void Increment(double timeMs) { - if (Histo) { - Histo->Collect(timeMs); - } - } -}; - +class THistogram { +private: + NMonitoring::THistogramPtr Histo; + +public: + void Initialize(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, + const TString &name, TPDiskCategory::EDeviceType deviceType) { + TString histName = name + "Ms"; + // Histogram backets in milliseconds + auto h = NMonitoring::ExplicitHistogram(GetCommonLatencyHistBounds(deviceType)); + Histo = counters->GetNamedHistogram("sensor", histName, std::move(h)); + } + + void Increment(double timeMs) { + if (Histo) { + Histo->Collect(timeMs); + } + } +}; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // PDisk monitoring counters //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct TPDiskMon { struct TPDisk { - enum EBriefState { - Booting, - OK, - Error, - }; - - enum EDetailedState { - EverythingIsOk, - BootingFormatRead, - BootingSysLogRead, - BootingCommonLogRead, - BootingFormatMagicChecking, - BootingDeviceFormattingAndTrimming, - ErrorInitialFormatRead, + enum EBriefState { + Booting, + OK, + Error, + }; + + enum EDetailedState { + EverythingIsOk, + BootingFormatRead, + BootingSysLogRead, + BootingCommonLogRead, + BootingFormatMagicChecking, + BootingDeviceFormattingAndTrimming, + ErrorInitialFormatRead, ErrorInitialFormatReadDueToGuid, - ErrorInitialFormatReadIncompleteFormat, - ErrorDiskCannotBeFormated, - ErrorPDiskCannotBeInitialised, - ErrorInitialSysLogRead, - ErrorInitialSysLogParse, - ErrorInitialCommonLogRead, - ErrorInitialCommonLogParse, - ErrorCommonLoggerInit, - ErrorOpenNonexistentFile, - ErrorOpenFileWithoutPermissions, - ErrorOpenFileUnknown, + ErrorInitialFormatReadIncompleteFormat, + ErrorDiskCannotBeFormated, + ErrorPDiskCannotBeInitialised, + ErrorInitialSysLogRead, + ErrorInitialSysLogParse, + ErrorInitialCommonLogRead, + ErrorInitialCommonLogParse, + ErrorCommonLoggerInit, + ErrorOpenNonexistentFile, + ErrorOpenFileWithoutPermissions, + ErrorOpenFileUnknown, ErrorCalculatingChunkQuotas, - ErrorDeviceIoError, - ErrorNoDeviceWithSuchSerial, - ErrorDeviceSerialMismatch, - ErrorFake, - }; - - static TString StateToStr(i64 val) { + ErrorDeviceIoError, + ErrorNoDeviceWithSuchSerial, + ErrorDeviceSerialMismatch, + ErrorFake, + }; + + static TString StateToStr(i64 val) { return NKikimrBlobStorage::TPDiskState::E_Name(static_cast<NKikimrBlobStorage::TPDiskState::E>(val)); } - - static const char *BriefStateToStr(i64 val) { - switch (val) { - case Booting: return "Booting"; - case OK: return "OK"; - case Error: return "Error"; - default: return "Unknown"; - } - } - - static const char *DetailedStateToStr(i64 val) { - switch (val) { - case EverythingIsOk: return "EverythingIsOk"; - case BootingFormatRead: return "BootingSysLogRead"; - case BootingSysLogRead: return "BootingSysLogRead"; - case BootingCommonLogRead: return "BootingCommonLogRead"; - case BootingFormatMagicChecking: return "BootingFormatMagicChecking"; - case BootingDeviceFormattingAndTrimming: return "BootingDeviceFormattingAndTrimming"; - case ErrorInitialFormatRead: return "ErrorInitialFormatRead"; + + static const char *BriefStateToStr(i64 val) { + switch (val) { + case Booting: return "Booting"; + case OK: return "OK"; + case Error: return "Error"; + default: return "Unknown"; + } + } + + static const char *DetailedStateToStr(i64 val) { + switch (val) { + case EverythingIsOk: return "EverythingIsOk"; + case BootingFormatRead: return "BootingSysLogRead"; + case BootingSysLogRead: return "BootingSysLogRead"; + case BootingCommonLogRead: return "BootingCommonLogRead"; + case BootingFormatMagicChecking: return "BootingFormatMagicChecking"; + case BootingDeviceFormattingAndTrimming: return "BootingDeviceFormattingAndTrimming"; + case ErrorInitialFormatRead: return "ErrorInitialFormatRead"; case ErrorInitialFormatReadDueToGuid: return "ErrorInitialFormatReadDueToGuid"; - case ErrorInitialFormatReadIncompleteFormat: return "ErrorInitialFormatReadIncompleteFormat"; - case ErrorDiskCannotBeFormated: return "ErrorDiskCannotBeFormated"; - case ErrorPDiskCannotBeInitialised: return "ErrorPDiskCannotBeInitialised"; - case ErrorInitialSysLogRead: return "ErrorInitialSysLogRead"; - case ErrorInitialSysLogParse: return "ErrorInitialSysLogParse"; - case ErrorInitialCommonLogRead: return "ErrorInitialCommonLogRead"; - case ErrorInitialCommonLogParse: return "ErrorInitialCommonLogParse"; - case ErrorCommonLoggerInit: return "ErrorCommonLoggerInit"; - case ErrorOpenNonexistentFile: return "ErrorOpenNonexistentFile"; - case ErrorOpenFileWithoutPermissions: return "ErrorOpenFileWithoutPermissions"; - case ErrorOpenFileUnknown: return "ErrorOpenFileUnknown"; + case ErrorInitialFormatReadIncompleteFormat: return "ErrorInitialFormatReadIncompleteFormat"; + case ErrorDiskCannotBeFormated: return "ErrorDiskCannotBeFormated"; + case ErrorPDiskCannotBeInitialised: return "ErrorPDiskCannotBeInitialised"; + case ErrorInitialSysLogRead: return "ErrorInitialSysLogRead"; + case ErrorInitialSysLogParse: return "ErrorInitialSysLogParse"; + case ErrorInitialCommonLogRead: return "ErrorInitialCommonLogRead"; + case ErrorInitialCommonLogParse: return "ErrorInitialCommonLogParse"; + case ErrorCommonLoggerInit: return "ErrorCommonLoggerInit"; + case ErrorOpenNonexistentFile: return "ErrorOpenNonexistentFile"; + case ErrorOpenFileWithoutPermissions: return "ErrorOpenFileWithoutPermissions"; + case ErrorOpenFileUnknown: return "ErrorOpenFileUnknown"; case ErrorCalculatingChunkQuotas: return "ErrorCalculatingChunkQuotas"; - case ErrorDeviceIoError: return "ErrorDeviceIoError"; - case ErrorNoDeviceWithSuchSerial: return "ErrorNoDeviceWithSuchSerial"; - case ErrorDeviceSerialMismatch: return "ErrorDeviceSerialMismatch"; - case ErrorFake: return "ErrorFake"; - default: return "Unknown"; - } - } + case ErrorDeviceIoError: return "ErrorDeviceIoError"; + case ErrorNoDeviceWithSuchSerial: return "ErrorNoDeviceWithSuchSerial"; + case ErrorDeviceSerialMismatch: return "ErrorDeviceSerialMismatch"; + case ErrorFake: return "ErrorFake"; + default: return "Unknown"; + } + } }; - class TUpdateDurationTracker { + class TUpdateDurationTracker { bool IsLwProbeEnabled = false; NHPTimer::STime BeginUpdateAt = 0; NHPTimer::STime SchedulingStartAt = 0; NHPTimer::STime ProcessingStartAt = 0; NHPTimer::STime WaitingStartAt = 0; - - NMonitoring::TDynamicCounters::TCounterPtr PDiskThreadBusyTimeNs; - - public: + + NMonitoring::TDynamicCounters::TCounterPtr PDiskThreadBusyTimeNs; + + public: NMonitoring::TPercentileTrackerLg<5, 4, 15> UpdateCycleTime; - - public: - TUpdateDurationTracker() - : BeginUpdateAt(HPNow()) - {} - - void SetCounter(const NMonitoring::TDynamicCounters::TCounterPtr& pDiskThreadBusyTimeNs) { - PDiskThreadBusyTimeNs = pDiskThreadBusyTimeNs; - } - - void UpdateStarted() { - // BeginUpdateAt is set on the end of previous update cycle - IsLwProbeEnabled = GLOBAL_LWPROBE_ENABLED(BLOBSTORAGE_PROVIDER, PDiskUpdateCycleDetails); - } - - void SchedulingStart() { - if (IsLwProbeEnabled) { - SchedulingStartAt = HPNow(); - } - } - - void ProcessingStart() { - if (IsLwProbeEnabled) { - ProcessingStartAt = HPNow(); - } - } - - void WaitingStart(bool isNothingToDo) { - const auto now = HPNow(); - if (PDiskThreadBusyTimeNs) { - *PDiskThreadBusyTimeNs += HPNanoSeconds(now - BeginUpdateAt); - } - if (IsLwProbeEnabled || !isNothingToDo) { - WaitingStartAt = now; - if (!isNothingToDo) { - ui64 durationMs = HPMilliSeconds(WaitingStartAt - BeginUpdateAt); - UpdateCycleTime.Increment(durationMs); - } - } - } - - void UpdateEnded() { - NHPTimer::STime updateEndedAt = HPNow(); - if (IsLwProbeEnabled) { - float entireUpdateMs = HPMilliSecondsFloat(updateEndedAt - BeginUpdateAt); - float inputQueueMs = HPMilliSecondsFloat(SchedulingStartAt - BeginUpdateAt); - float schedulingMs = HPMilliSecondsFloat(ProcessingStartAt - SchedulingStartAt); - float processingMs = HPMilliSecondsFloat(WaitingStartAt - ProcessingStartAt); - float waitingMs = HPMilliSecondsFloat(updateEndedAt - WaitingStartAt); - GLOBAL_LWPROBE(BLOBSTORAGE_PROVIDER, PDiskUpdateCycleDetails, entireUpdateMs, inputQueueMs, - schedulingMs, processingMs, waitingMs); - } - BeginUpdateAt = updateEndedAt; - } - }; - + + public: + TUpdateDurationTracker() + : BeginUpdateAt(HPNow()) + {} + + void SetCounter(const NMonitoring::TDynamicCounters::TCounterPtr& pDiskThreadBusyTimeNs) { + PDiskThreadBusyTimeNs = pDiskThreadBusyTimeNs; + } + + void UpdateStarted() { + // BeginUpdateAt is set on the end of previous update cycle + IsLwProbeEnabled = GLOBAL_LWPROBE_ENABLED(BLOBSTORAGE_PROVIDER, PDiskUpdateCycleDetails); + } + + void SchedulingStart() { + if (IsLwProbeEnabled) { + SchedulingStartAt = HPNow(); + } + } + + void ProcessingStart() { + if (IsLwProbeEnabled) { + ProcessingStartAt = HPNow(); + } + } + + void WaitingStart(bool isNothingToDo) { + const auto now = HPNow(); + if (PDiskThreadBusyTimeNs) { + *PDiskThreadBusyTimeNs += HPNanoSeconds(now - BeginUpdateAt); + } + if (IsLwProbeEnabled || !isNothingToDo) { + WaitingStartAt = now; + if (!isNothingToDo) { + ui64 durationMs = HPMilliSeconds(WaitingStartAt - BeginUpdateAt); + UpdateCycleTime.Increment(durationMs); + } + } + } + + void UpdateEnded() { + NHPTimer::STime updateEndedAt = HPNow(); + if (IsLwProbeEnabled) { + float entireUpdateMs = HPMilliSecondsFloat(updateEndedAt - BeginUpdateAt); + float inputQueueMs = HPMilliSecondsFloat(SchedulingStartAt - BeginUpdateAt); + float schedulingMs = HPMilliSecondsFloat(ProcessingStartAt - SchedulingStartAt); + float processingMs = HPMilliSecondsFloat(WaitingStartAt - ProcessingStartAt); + float waitingMs = HPMilliSecondsFloat(updateEndedAt - WaitingStartAt); + GLOBAL_LWPROBE(BLOBSTORAGE_PROVIDER, PDiskUpdateCycleDetails, entireUpdateMs, inputQueueMs, + schedulingMs, processingMs, waitingMs); + } + BeginUpdateAt = updateEndedAt; + } + }; + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; ui32 PDiskId; @@ -484,39 +484,39 @@ struct TPDiskMon { NMonitoring::TDynamicCounters::TCounterPtr LogChunks; NMonitoring::TDynamicCounters::TCounterPtr UncommitedDataChunks; NMonitoring::TDynamicCounters::TCounterPtr CommitedDataChunks; - NMonitoring::TDynamicCounters::TCounterPtr LockedChunks; - NMonitoring::TDynamicCounters::TCounterPtr QuarantineChunks; - NMonitoring::TDynamicCounters::TCounterPtr QuarantineOwners; + NMonitoring::TDynamicCounters::TCounterPtr LockedChunks; + NMonitoring::TDynamicCounters::TCounterPtr QuarantineChunks; + NMonitoring::TDynamicCounters::TCounterPtr QuarantineOwners; // statistics subgroup TIntrusivePtr<NMonitoring::TDynamicCounters> StatsGroup; NMonitoring::TDynamicCounters::TCounterPtr FreeSpacePerMile; NMonitoring::TDynamicCounters::TCounterPtr UsedSpacePerMile; - NMonitoring::TDynamicCounters::TCounterPtr SplicedLogChunks; + NMonitoring::TDynamicCounters::TCounterPtr SplicedLogChunks; + + NMonitoring::TDynamicCounters::TCounterPtr TotalSpaceBytes; + NMonitoring::TDynamicCounters::TCounterPtr FreeSpaceBytes; + NMonitoring::TDynamicCounters::TCounterPtr UsedSpaceBytes; + NMonitoring::TDynamicCounters::TCounterPtr SectorMapAllocatedBytes; - NMonitoring::TDynamicCounters::TCounterPtr TotalSpaceBytes; - NMonitoring::TDynamicCounters::TCounterPtr FreeSpaceBytes; - NMonitoring::TDynamicCounters::TCounterPtr UsedSpaceBytes; - NMonitoring::TDynamicCounters::TCounterPtr SectorMapAllocatedBytes; - // states subgroup TIntrusivePtr<NMonitoring::TDynamicCounters> StateGroup; NMonitoring::TDynamicCounters::TCounterPtr PDiskState; - NMonitoring::TDynamicCounters::TCounterPtr PDiskBriefState; - NMonitoring::TDynamicCounters::TCounterPtr PDiskDetailedState; - NMonitoring::TDynamicCounters::TCounterPtr AtLeastOneVDiskNotLogged; - NMonitoring::TDynamicCounters::TCounterPtr TooMuchLogChunks; - NMonitoring::TDynamicCounters::TCounterPtr SerialNumberMismatched; + NMonitoring::TDynamicCounters::TCounterPtr PDiskBriefState; + NMonitoring::TDynamicCounters::TCounterPtr PDiskDetailedState; + NMonitoring::TDynamicCounters::TCounterPtr AtLeastOneVDiskNotLogged; + NMonitoring::TDynamicCounters::TCounterPtr TooMuchLogChunks; + NMonitoring::TDynamicCounters::TCounterPtr SerialNumberMismatched; TLight L6; - TLight L7; - TLight IdleLight; - NMonitoring::TDynamicCounters::TCounterPtr OwnerIdsIssued; - NMonitoring::TDynamicCounters::TCounterPtr LastOwnerId; - NMonitoring::TDynamicCounters::TCounterPtr PendingYardInits; - - TAtomic SeqnoL6; - TAtomic LastDoneOperationTimestamp; - + TLight L7; + TLight IdleLight; + NMonitoring::TDynamicCounters::TCounterPtr OwnerIdsIssued; + NMonitoring::TDynamicCounters::TCounterPtr LastOwnerId; + NMonitoring::TDynamicCounters::TCounterPtr PendingYardInits; + + TAtomic SeqnoL6; + TAtomic LastDoneOperationTimestamp; + // device subgroup TIntrusivePtr<NMonitoring::TDynamicCounters> DeviceGroup; NMonitoring::TDynamicCounters::TCounterPtr DeviceBytesRead; @@ -528,25 +528,25 @@ struct TPDiskMon { NMonitoring::TDynamicCounters::TCounterPtr DeviceInFlightReads; NMonitoring::TDynamicCounters::TCounterPtr DeviceInFlightWrites; NMonitoring::TDynamicCounters::TCounterPtr DeviceTakeoffs; - NMonitoring::TDynamicCounters::TCounterPtr DeviceLandings; + NMonitoring::TDynamicCounters::TCounterPtr DeviceLandings; NMonitoring::TDynamicCounters::TCounterPtr DeviceHaltDetected; NMonitoring::TDynamicCounters::TCounterPtr DeviceExpectedSeeks; NMonitoring::TDynamicCounters::TCounterPtr DeviceReadCacheHits; NMonitoring::TDynamicCounters::TCounterPtr DeviceReadCacheMisses; NMonitoring::TDynamicCounters::TCounterPtr DeviceWriteCacheIsValid; NMonitoring::TDynamicCounters::TCounterPtr DeviceWriteCacheIsEnabled; - NMonitoring::TDynamicCounters::TCounterPtr DeviceOperationPoolTotalAllocations; - NMonitoring::TDynamicCounters::TCounterPtr DeviceOperationPoolFreeObjectsMin; - NMonitoring::TDynamicCounters::TCounterPtr DeviceBufferPoolFailedAllocations; - NMonitoring::TDynamicCounters::TCounterPtr DeviceErasureSectorRestorations; + NMonitoring::TDynamicCounters::TCounterPtr DeviceOperationPoolTotalAllocations; + NMonitoring::TDynamicCounters::TCounterPtr DeviceOperationPoolFreeObjectsMin; + NMonitoring::TDynamicCounters::TCounterPtr DeviceBufferPoolFailedAllocations; + NMonitoring::TDynamicCounters::TCounterPtr DeviceErasureSectorRestorations; NMonitoring::TDynamicCounters::TCounterPtr DeviceEstimatedCostNs; NMonitoring::TDynamicCounters::TCounterPtr DeviceActualCostNs; NMonitoring::TDynamicCounters::TCounterPtr DeviceOverestimationRatio; NMonitoring::TDynamicCounters::TCounterPtr DeviceNonperformanceMs; NMonitoring::TDynamicCounters::TCounterPtr DeviceInterruptedSystemCalls; - NMonitoring::TDynamicCounters::TCounterPtr DeviceSubmitThreadBusyTimeNs; - NMonitoring::TDynamicCounters::TCounterPtr DeviceCompletionThreadBusyTimeNs; - NMonitoring::TDynamicCounters::TCounterPtr DeviceIoErrors; + NMonitoring::TDynamicCounters::TCounterPtr DeviceSubmitThreadBusyTimeNs; + NMonitoring::TDynamicCounters::TCounterPtr DeviceCompletionThreadBusyTimeNs; + NMonitoring::TDynamicCounters::TCounterPtr DeviceIoErrors; // queue subgroup TIntrusivePtr<NMonitoring::TDynamicCounters> QueueGroup; @@ -554,29 +554,29 @@ struct TPDiskMon { NMonitoring::TDynamicCounters::TCounterPtr QueueBytes; // Update cycle time - TUpdateDurationTracker UpdateDurationTracker; + TUpdateDurationTracker UpdateDurationTracker; // Device times - THistogram DeviceReadDuration; - THistogram DeviceWriteDuration; - THistogram DeviceTrimDuration; + THistogram DeviceReadDuration; + THistogram DeviceWriteDuration; + THistogram DeviceTrimDuration; - // <BASE_BITS, EXP_BITS, FRAME_COUNT> - using TDurationTracker = NMonitoring::TPercentileTrackerLg<5, 4, 15>; + // <BASE_BITS, EXP_BITS, FRAME_COUNT> + using TDurationTracker = NMonitoring::TPercentileTrackerLg<5, 4, 15>; // log queue duration - TDurationTracker LogQueueTime; + TDurationTracker LogQueueTime; // get queue duration - TDurationTracker GetQueueSyncLog; - TDurationTracker GetQueueHullComp; - TDurationTracker GetQueueHullOnlineRt; - TDurationTracker GetQueueHullOnlineOther; - TDurationTracker GetQueueHullLoad; + TDurationTracker GetQueueSyncLog; + TDurationTracker GetQueueHullComp; + TDurationTracker GetQueueHullOnlineRt; + TDurationTracker GetQueueHullOnlineOther; + TDurationTracker GetQueueHullLoad; TDurationTracker GetQueueHullLow; // write queue duration - TDurationTracker WriteQueueSyncLog; - TDurationTracker WriteQueueHullFresh; - TDurationTracker WriteQueueHullHuge; - TDurationTracker WriteQueueHullComp; + TDurationTracker WriteQueueSyncLog; + TDurationTracker WriteQueueHullFresh; + TDurationTracker WriteQueueHullHuge; + TDurationTracker WriteQueueHullComp; // incoming flow burstiness TBurstmeter SensitiveBurst; @@ -593,34 +593,34 @@ struct TPDiskMon { // log cumulative size bytes // <BASE_BITS, EXP_BITS, FRAME_COUNT> using TSizeTracker = NMonitoring::TPercentileTrackerLg<5, 4, 15>; - TSizeTracker LogOperationSizeBytes; - TSizeTracker GetSyncLogSizeBytes; + TSizeTracker LogOperationSizeBytes; + TSizeTracker GetSyncLogSizeBytes; - TSizeTracker GetHullCompSizeBytes; - TSizeTracker GetHullOnlineRtSizeBytes; - TSizeTracker GetHullOnlineOtherSizeBytes; - TSizeTracker GetHullLoadSizeBytes; + TSizeTracker GetHullCompSizeBytes; + TSizeTracker GetHullOnlineRtSizeBytes; + TSizeTracker GetHullOnlineOtherSizeBytes; + TSizeTracker GetHullLoadSizeBytes; TSizeTracker GetHullLowSizeBytes; - TSizeTracker WriteSyncLogSizeBytes; - TSizeTracker WriteHullFreshSizeBytes; - TSizeTracker WriteHullHugeSizeBytes; - TSizeTracker WriteHullCompSizeBytes; + TSizeTracker WriteSyncLogSizeBytes; + TSizeTracker WriteHullFreshSizeBytes; + TSizeTracker WriteHullHugeSizeBytes; + TSizeTracker WriteHullCompSizeBytes; // log response time - THistogram LogResponseTime; + THistogram LogResponseTime; // get response time - THistogram GetResponseSyncLog; - THistogram GetResponseHullComp; - THistogram GetResponseHullOnlineRt; - THistogram GetResponseHullOnlineOther; - THistogram GetResponseHullLoad; - THistogram GetResponseHullLow; + THistogram GetResponseSyncLog; + THistogram GetResponseHullComp; + THistogram GetResponseHullOnlineRt; + THistogram GetResponseHullOnlineOther; + THistogram GetResponseHullLoad; + THistogram GetResponseHullLow; // write response time - THistogram WriteResponseSyncLog; - THistogram WriteResponseHullFresh; - THistogram WriteResponseHullHuge; - THistogram WriteResponseHullComp; + THistogram WriteResponseSyncLog; + THistogram WriteResponseHullFresh; + THistogram WriteResponseHullHuge; + THistogram WriteResponseHullComp; // scheduler subgroup TIntrusivePtr<NMonitoring::TDynamicCounters> SchedulerGroup; @@ -703,10 +703,10 @@ struct TPDiskMon { // yard subgroup TIntrusivePtr<NMonitoring::TDynamicCounters> PDiskGroup; TReqCounters YardInit; - TReqCounters CheckSpace; + TReqCounters CheckSpace; TReqCounters YardConfigureScheduler; - TReqCounters ChunkReserve; - TReqCounters Harakiri; + TReqCounters ChunkReserve; + TReqCounters Harakiri; TReqCounters YardSlay; TReqCounters YardControl; @@ -727,7 +727,7 @@ struct TPDiskMon { TIoCounters WriteLog; TReqCounters WriteHugeLog; - TIoCounters LogRead; + TIoCounters LogRead; // Halter @@ -735,22 +735,22 @@ struct TPDiskMon { i64 LastHaltDeviceLandings = 0; NHPTimer::STime LastHaltTimestamp = 0; - // System counters - for tracking usage of CPU, memory etc. - TIntrusivePtr<NMonitoring::TDynamicCounters> SystemGroup; - NMonitoring::TDynamicCounters::TCounterPtr PDiskThreadCPU; - NMonitoring::TDynamicCounters::TCounterPtr SubmitThreadCPU; - NMonitoring::TDynamicCounters::TCounterPtr GetThreadCPU; - NMonitoring::TDynamicCounters::TCounterPtr TrimThreadCPU; - NMonitoring::TDynamicCounters::TCounterPtr CompletionThreadCPU; - - TPDiskMon(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, ui32 pdiskId, TPDiskConfig *cfg); + // System counters - for tracking usage of CPU, memory etc. + TIntrusivePtr<NMonitoring::TDynamicCounters> SystemGroup; + NMonitoring::TDynamicCounters::TCounterPtr PDiskThreadCPU; + NMonitoring::TDynamicCounters::TCounterPtr SubmitThreadCPU; + NMonitoring::TDynamicCounters::TCounterPtr GetThreadCPU; + NMonitoring::TDynamicCounters::TCounterPtr TrimThreadCPU; + NMonitoring::TDynamicCounters::TCounterPtr CompletionThreadCPU; + + TPDiskMon(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, ui32 pdiskId, TPDiskConfig *cfg); NMonitoring::TDynamicCounters::TCounterPtr GetBusyPeriod(const TString& owner, const TString& queue); void IncrementQueueTime(ui8 priorityClass, size_t timeMs); - void IncrementResponseTime(ui8 priorityClass, double timeMs, size_t sizeBytes); + void IncrementResponseTime(ui8 priorityClass, double timeMs, size_t sizeBytes); void UpdatePercentileTrackers(); void UpdateLights(); - bool UpdateDeviceHaltCounters(); + bool UpdateDeviceHaltCounters(); void UpdateStats(); TIoCounters *GetWriteCounter(ui8 priority); TIoCounters *GetReadCounter(ui8 priority); diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_params.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_params.cpp index 547b7c898d..e762b4e703 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_params.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_params.cpp @@ -9,7 +9,7 @@ namespace NKikimr { //////////////////////////////////////////////////////////////////////////// // TPDiskParams //////////////////////////////////////////////////////////////////////////// - TPDiskParams::TPDiskParams(NPDisk::TOwner owner, ui64 ownerRound, ui32 chunkSize, ui32 appendBlockSize, + TPDiskParams::TPDiskParams(NPDisk::TOwner owner, ui64 ownerRound, ui32 chunkSize, ui32 appendBlockSize, ui64 seekTimeUs, ui64 readSpeedBps, ui64 writeSpeedBps, ui64 readBlockSize, ui64 writeBlockSize, ui64 bulkWriteBlockSize) : Owner(owner) @@ -55,7 +55,7 @@ namespace NKikimr { TString TPDiskParams::ToString() const { TStringStream str; - str << "{TPDiskParams ownerId# " << Owner; + str << "{TPDiskParams ownerId# " << Owner; str << " ownerRound# " << OwnerRound; str << " ChunkSize# " << ChunkSize; str << " AppendBlockSize# " << AppendBlockSize; @@ -84,7 +84,7 @@ namespace NKikimr { TABLEBODY() { TABLER() { TABLED() {str << "Owner";} - TABLED() {str << Owner;} + TABLED() {str << Owner;} } TABLER() { TABLED() {str << "ChunkSize";} diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_params.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_params.h index 58777bb889..7e5017d879 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_params.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_params.h @@ -1,6 +1,6 @@ #pragma once #include "defs.h" -#include "blobstorage_pdisk_defs.h" +#include "blobstorage_pdisk_defs.h" #include <util/stream/output.h> namespace NKikimr { @@ -10,7 +10,7 @@ namespace NKikimr { // PDisk related constants, obtained from PDisk during initialization //////////////////////////////////////////////////////////////////////////// struct TPDiskParams : public TThrRefBase { - const NPDisk::TOwner Owner; + const NPDisk::TOwner Owner; const ui64 OwnerRound; const ui64 ChunkSize; const ui32 AppendBlockSize; @@ -31,7 +31,7 @@ namespace NKikimr { static ui64 CalculatePrefetchSizeBytes(ui64 seekTimeUs, ui64 readSpeedBps); static ui64 CalculateGlueRequestDistanceBytes(ui64 seekTimeUs, ui64 readSpeedBps); - TPDiskParams(NPDisk::TOwner owner, ui64 ownerRound, ui32 chunkSize, ui32 appendBlockSize, + TPDiskParams(NPDisk::TOwner owner, ui64 ownerRound, ui32 chunkSize, ui32 appendBlockSize, ui64 seekTimeUs, ui64 readSpeedBps, ui64 writeSpeedBps, ui64 readBlockSize, ui64 writeBlockSize, ui64 bulkWriteBlockSize); void OutputHtml(IOutputStream &str) const; diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_quota_record.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_quota_record.h index 994f9e5e06..ef728e3213 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_quota_record.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_quota_record.h @@ -22,8 +22,8 @@ namespace NPDisk { // class TQuotaRecord { - friend class TPerOwnerQuotaTracker; - + friend class TPerOwnerQuotaTracker; + TAtomic HardLimit = 0; TAtomic Free = 0; @@ -32,16 +32,16 @@ class TQuotaRecord { #undef DEFINE_DISK_SPACE_COLOR TString Name; - std::optional<TVDiskID> VDiskId; + std::optional<TVDiskID> VDiskId; public: - void SetName(const TString& name) { + void SetName(const TString& name) { Name = name; } - void SetVDiskId(const TVDiskID& v) { - VDiskId = v; - } - + void SetVDiskId(const TVDiskID& v) { + VDiskId = v; + } + i64 GetUsed() const { return AtomicGet(HardLimit) - AtomicGet(Free); } @@ -54,22 +54,22 @@ public: return AtomicGet(Free); } - TString Print() const { - TStringStream str; - Print(str); - return str.Str(); - } - - void Print(IOutputStream &str) const { - str << "\nName# \"" << Name << "\""; - if (VDiskId) { - str << " VDiskId# " << *VDiskId; - } - str << "\n"; + TString Print() const { + TStringStream str; + Print(str); + return str.Str(); + } + + void Print(IOutputStream &str) const { + str << "\nName# \"" << Name << "\""; + if (VDiskId) { + str << " VDiskId# " << *VDiskId; + } + str << "\n"; str << " HardLimit# " << HardLimit; - str << " Free# " << Free; - str << " Used# " << GetUsed(); - str << " CurrentColor# " << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(EstimateSpaceColor(0)) << "\n"; + str << " Free# " << Free; + str << " Used# " << GetUsed(); + str << " CurrentColor# " << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(EstimateSpaceColor(0)) << "\n"; #define PRINT_DISK_SPACE_COLOR(NAME) str << " " #NAME "# " << NAME; DISK_SPACE_COLORS(PRINT_DISK_SPACE_COLOR) #undef PRINT_DISK_SPACE_COLOR @@ -78,7 +78,7 @@ public: // Called only from the main trhead // Returns number of chunks released (negative for chunks acquired) i64 ForceHardLimit(i64 hardLimit, const TColorLimits &limits) { - i64 oldHardLimit = AtomicGet(HardLimit); + i64 oldHardLimit = AtomicGet(HardLimit); i64 increment = hardLimit - oldHardLimit; AtomicAdd(HardLimit, increment); AtomicAdd(Free, increment); @@ -94,21 +94,21 @@ public: return -increment; } - bool ForceAllocate(i64 count) { - return AtomicSub(Free, count) > AtomicGet(Black); - } - + bool ForceAllocate(i64 count) { + return AtomicSub(Free, count) > AtomicGet(Black); + } + // Called only from the main thread bool TryAllocate(i64 count, TString &outErrorReason) { Y_VERIFY(count > 0); - if (AtomicSub(Free, count) > AtomicGet(Black)) { + if (AtomicSub(Free, count) > AtomicGet(Black)) { return true; } - AtomicAdd(Free, count); + AtomicAdd(Free, count); outErrorReason = (TStringBuilder() << "Allocation of count# " << count - << " chunks falls into the black zone, free# " << AtomicGet(Free) + << " chunks falls into the black zone, free# " << AtomicGet(Free) << " black# " << AtomicGet(Black) - << " hardLimit# " << AtomicGet(HardLimit) + << " hardLimit# " << AtomicGet(HardLimit) << " Name# \"" << Name << "\"" << " Marker# BPQ10"); return false; @@ -116,43 +116,43 @@ public: bool InitialAllocate(i64 count) { Y_VERIFY(count >= 0); - if (AtomicSub(Free, count) >= 0) { + if (AtomicSub(Free, count) >= 0) { return true; - } else { - AtomicAdd(Free, count); - return false; + } else { + AtomicAdd(Free, count); + return false; } } void Release(i64 count) { Y_VERIFY(count > 0); - TAtomicBase newFree = AtomicAdd(Free, count); - Y_VERIFY_S(newFree <= AtomicGet(HardLimit), Print()); + TAtomicBase newFree = AtomicAdd(Free, count); + Y_VERIFY_S(newFree <= AtomicGet(HardLimit), Print()); } // Called from any thread // TODO(cthulhu): Profile and consider caching - NKikimrBlobStorage::TPDiskSpaceColor::E EstimateSpaceColor(i64 count) const { - using TColor = NKikimrBlobStorage::TPDiskSpaceColor; - const i64 newFree = AtomicGet(Free) - count; - - if (newFree > AtomicGet(Cyan)) { - return TColor::GREEN; + NKikimrBlobStorage::TPDiskSpaceColor::E EstimateSpaceColor(i64 count) const { + using TColor = NKikimrBlobStorage::TPDiskSpaceColor; + const i64 newFree = AtomicGet(Free) - count; + + if (newFree > AtomicGet(Cyan)) { + return TColor::GREEN; } else if (newFree > AtomicGet(LightYellow)) { return TColor::CYAN; - } else if (newFree > AtomicGet(Yellow)) { + } else if (newFree > AtomicGet(Yellow)) { return TColor::LIGHT_YELLOW; - } else if (newFree > AtomicGet(LightOrange)) { - return TColor::YELLOW; - } else if (newFree > AtomicGet(Orange)) { - return TColor::LIGHT_ORANGE; - } else if (newFree > AtomicGet(Red)) { - return TColor::ORANGE; - } else if (newFree > AtomicGet(Black)) { - return TColor::RED; - } else { - return TColor::BLACK; - } + } else if (newFree > AtomicGet(LightOrange)) { + return TColor::YELLOW; + } else if (newFree > AtomicGet(Orange)) { + return TColor::LIGHT_ORANGE; + } else if (newFree > AtomicGet(Red)) { + return TColor::ORANGE; + } else if (newFree > AtomicGet(Black)) { + return TColor::RED; + } else { + return TColor::BLACK; + } } }; diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_req_creator.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_req_creator.h index 368039d6fc..af5402a4f8 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_req_creator.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_req_creator.h @@ -1,253 +1,253 @@ -#pragma once - -#include "defs.h" - +#pragma once + +#include "defs.h" + #include <ydb/core/blobstorage/base/wilson_events.h> - -#include "blobstorage_pdisk.h" -#include "blobstorage_pdisk_gate.h" -#include "blobstorage_pdisk_mon.h" -#include "blobstorage_pdisk_requestimpl.h" - + +#include "blobstorage_pdisk.h" +#include "blobstorage_pdisk_gate.h" +#include "blobstorage_pdisk_mon.h" +#include "blobstorage_pdisk_requestimpl.h" + #include <util/system/type_name.h> - -namespace NKikimr::NPDisk { - -LWTRACE_USING(BLOBSTORAGE_PROVIDER); - -class TReqCreator { -private: - // PDisk info - const ui32 PDiskId; - TActorSystem *ActorSystem; - TPDiskMon *Mon; - TDriveModel *Model; - TAtomic *EstimatedLogChunkIdx; - -public: - // Self variables - TAtomic LastReqId; - -private: - void Classify(TRequestBase* request) { - request->EstimateCost(*Model); - request->TotalCost = request->Cost; - switch (request->GetType()) { - case ERequestType::RequestLogRead: - request->GateId = GateLog; - request->IsSensitive = true; - return; - case ERequestType::RequestLogReadContinue: - request->GateId = GateLog; - request->IsSensitive = true; - return; - case ERequestType::RequestLogSectorRestore: - request->GateId = GateLog; - request->IsSensitive = true; - return; - case ERequestType::RequestLogReadResultProcess: - request->GateId = GateLog; - request->IsSensitive = true; - return; - case ERequestType::RequestLogWrite: - request->GateId = GateLog; - request->IsSensitive = true; - return; - case ERequestType::RequestChunkRead: - request->IsFast = (request->PriorityClass == NPriRead::HullOnlineOther); - request->IsSensitive = (request->PriorityClass == NPriRead::HullOnlineRt); - switch (request->PriorityClass) { - case NPriRead::HullComp: - request->GateId = GateComp; - break; - case NPriRead::HullOnlineRt: - request->GateId = GateFastRead; - break; - case NPriRead::HullOnlineOther: - request->GateId = GateOtherRead; - break; - case NPriRead::HullLoad: - request->GateId = GateLoad; - break; - case NPriRead::SyncLog: - request->GateId = GateSyncLog; - break; + +namespace NKikimr::NPDisk { + +LWTRACE_USING(BLOBSTORAGE_PROVIDER); + +class TReqCreator { +private: + // PDisk info + const ui32 PDiskId; + TActorSystem *ActorSystem; + TPDiskMon *Mon; + TDriveModel *Model; + TAtomic *EstimatedLogChunkIdx; + +public: + // Self variables + TAtomic LastReqId; + +private: + void Classify(TRequestBase* request) { + request->EstimateCost(*Model); + request->TotalCost = request->Cost; + switch (request->GetType()) { + case ERequestType::RequestLogRead: + request->GateId = GateLog; + request->IsSensitive = true; + return; + case ERequestType::RequestLogReadContinue: + request->GateId = GateLog; + request->IsSensitive = true; + return; + case ERequestType::RequestLogSectorRestore: + request->GateId = GateLog; + request->IsSensitive = true; + return; + case ERequestType::RequestLogReadResultProcess: + request->GateId = GateLog; + request->IsSensitive = true; + return; + case ERequestType::RequestLogWrite: + request->GateId = GateLog; + request->IsSensitive = true; + return; + case ERequestType::RequestChunkRead: + request->IsFast = (request->PriorityClass == NPriRead::HullOnlineOther); + request->IsSensitive = (request->PriorityClass == NPriRead::HullOnlineRt); + switch (request->PriorityClass) { + case NPriRead::HullComp: + request->GateId = GateComp; + break; + case NPriRead::HullOnlineRt: + request->GateId = GateFastRead; + break; + case NPriRead::HullOnlineOther: + request->GateId = GateOtherRead; + break; + case NPriRead::HullLoad: + request->GateId = GateLoad; + break; + case NPriRead::SyncLog: + request->GateId = GateSyncLog; + break; case NPriRead::HullLow: request->GateId = GateLow; break; - default: - request->GateId = GateOtherRead; - break; - } - return; - case ERequestType::RequestChunkWrite: - request->IsFast = (request->PriorityClass == NPriWrite::HullHugeAsyncBlob || - request->PriorityClass == NPriWrite::HullHugeUserData); - switch (request->PriorityClass) { - case NPriWrite::HullFresh: - request->GateId = GateFresh; - break; - case NPriWrite::HullComp: - request->GateId = GateComp; - break; - case NPriWrite::HullHugeAsyncBlob: - case NPriWrite::HullHugeUserData: - request->GateId = GateHuge; - break; - case NPriWrite::SyncLog: - request->GateId = GateSyncLog; - break; - default: - request->GateId = GateHuge; - break; - } - request->IsSensitive = false; - return; - case ERequestType::RequestChunkTrim: - request->GateId = GateTrim; - request->IsSensitive = false; - return; - default: // FastOperationsQueue - request->GateId = GateFastOperation; - request->IsSensitive = false; - return; - } - } - - template <class TRequest> - TRequest* NewRequest(TRequest* request, double* burstMs = nullptr) { - // Note that call to Classify() is thread-safe (thanks to the fact that queues are not created dynamically) - Classify(request); - CountRequest(*request); - LWTRACK(PDiskNewRequest, request->Orbit, PDiskId, request->ReqId.Id, HPSecondsFloat(request->CreationTime), - double(request->Cost) / 1000000.0, request->IsSensitive, request->IsFast, - request->Owner, request->PriorityClass, (ui32)request->GetType()); - double tmpBurstMs = 0; - if (request->GateId != GateFastOperation && request->GateId != GateTrim) { - if (request->IsSensitive) { - tmpBurstMs = Mon->SensitiveBurst.Increment(request->Cost); - } else { - tmpBurstMs = Mon->BestEffortBurst.Increment(request->Cost); - } - LWTRACK(PDiskBurst, request->Orbit, PDiskId, request->ReqId.Id, HPSecondsFloat(request->CreationTime), - request->IsSensitive, double(request->Cost) / 1000000.0, tmpBurstMs); - } - if (burstMs) { - *burstMs = tmpBurstMs; - } - return request; - } - -#define CASE_COUNT_REQUEST(name) \ - case ERequestType::Request##name: Mon->name.CountRequest(); break; - - - template<typename T> - void CountRequest(const T& req) { - switch (req.GetType()) { - CASE_COUNT_REQUEST(YardInit); - CASE_COUNT_REQUEST(CheckSpace); - CASE_COUNT_REQUEST(Harakiri); - CASE_COUNT_REQUEST(YardSlay); - CASE_COUNT_REQUEST(ChunkReserve); - CASE_COUNT_REQUEST(YardControl); - CASE_COUNT_REQUEST(LogRead); - default: break; - } - } -#undef CASE_COUNT_REQUEST - - template<typename TEv> - static TString ToString(const TEv &ev) { - return ev.ToString(); - } - - template<typename TEv> - static TString ToString(const TAutoPtr<NActors::TEventHandle<TEv>> &ev) { - Y_VERIFY(ev && ev->Get()); - return ev->Get()->ToString(); - } - -public: - TReqCreator(ui32 pDiskId, TPDiskMon *mon, TDriveModel *model, TAtomic *estimatedChunkIdx) - : PDiskId(pDiskId) + default: + request->GateId = GateOtherRead; + break; + } + return; + case ERequestType::RequestChunkWrite: + request->IsFast = (request->PriorityClass == NPriWrite::HullHugeAsyncBlob || + request->PriorityClass == NPriWrite::HullHugeUserData); + switch (request->PriorityClass) { + case NPriWrite::HullFresh: + request->GateId = GateFresh; + break; + case NPriWrite::HullComp: + request->GateId = GateComp; + break; + case NPriWrite::HullHugeAsyncBlob: + case NPriWrite::HullHugeUserData: + request->GateId = GateHuge; + break; + case NPriWrite::SyncLog: + request->GateId = GateSyncLog; + break; + default: + request->GateId = GateHuge; + break; + } + request->IsSensitive = false; + return; + case ERequestType::RequestChunkTrim: + request->GateId = GateTrim; + request->IsSensitive = false; + return; + default: // FastOperationsQueue + request->GateId = GateFastOperation; + request->IsSensitive = false; + return; + } + } + + template <class TRequest> + TRequest* NewRequest(TRequest* request, double* burstMs = nullptr) { + // Note that call to Classify() is thread-safe (thanks to the fact that queues are not created dynamically) + Classify(request); + CountRequest(*request); + LWTRACK(PDiskNewRequest, request->Orbit, PDiskId, request->ReqId.Id, HPSecondsFloat(request->CreationTime), + double(request->Cost) / 1000000.0, request->IsSensitive, request->IsFast, + request->Owner, request->PriorityClass, (ui32)request->GetType()); + double tmpBurstMs = 0; + if (request->GateId != GateFastOperation && request->GateId != GateTrim) { + if (request->IsSensitive) { + tmpBurstMs = Mon->SensitiveBurst.Increment(request->Cost); + } else { + tmpBurstMs = Mon->BestEffortBurst.Increment(request->Cost); + } + LWTRACK(PDiskBurst, request->Orbit, PDiskId, request->ReqId.Id, HPSecondsFloat(request->CreationTime), + request->IsSensitive, double(request->Cost) / 1000000.0, tmpBurstMs); + } + if (burstMs) { + *burstMs = tmpBurstMs; + } + return request; + } + +#define CASE_COUNT_REQUEST(name) \ + case ERequestType::Request##name: Mon->name.CountRequest(); break; + + + template<typename T> + void CountRequest(const T& req) { + switch (req.GetType()) { + CASE_COUNT_REQUEST(YardInit); + CASE_COUNT_REQUEST(CheckSpace); + CASE_COUNT_REQUEST(Harakiri); + CASE_COUNT_REQUEST(YardSlay); + CASE_COUNT_REQUEST(ChunkReserve); + CASE_COUNT_REQUEST(YardControl); + CASE_COUNT_REQUEST(LogRead); + default: break; + } + } +#undef CASE_COUNT_REQUEST + + template<typename TEv> + static TString ToString(const TEv &ev) { + return ev.ToString(); + } + + template<typename TEv> + static TString ToString(const TAutoPtr<NActors::TEventHandle<TEv>> &ev) { + Y_VERIFY(ev && ev->Get()); + return ev->Get()->ToString(); + } + +public: + TReqCreator(ui32 pDiskId, TPDiskMon *mon, TDriveModel *model, TAtomic *estimatedChunkIdx) + : PDiskId(pDiskId) , ActorSystem(nullptr) - , Mon(mon) - , Model(model) - , EstimatedLogChunkIdx(estimatedChunkIdx) - , LastReqId(ui64(PDiskId) * 10000000ull) - {} - - void SetActorSystem(TActorSystem *actorSystem) { - ActorSystem = actorSystem; - } - - template<typename TReq, typename TEv> - [[nodiscard]] TReq* CreateFromEv(TEv &&ev, const TActorId &sender, double *burstMs = nullptr) { - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId << " ev# " - << ToString(ev) << " Sender# " << sender.LocalId() << " ReqId# " << AtomicGet(LastReqId)); - auto req = MakeHolder<TReq>(std::forward<TEv>(ev), sender, AtomicIncrement(LastReqId)); - NewRequest(req.Get(), burstMs); - return req.Release(); - } - - template<typename TReq, typename... TArgs> - [[nodiscard]] TReq* CreateFromArgs(TArgs&&... args) { - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId << " create req# " - << TypeName<TReq>() << " ReqId# " << AtomicGet(LastReqId)); - auto req = MakeHolder<TReq>(std::forward<TArgs>(args)..., AtomicIncrement(LastReqId)); - NewRequest(req.Get(), nullptr); - return req.Release(); - } - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // TODO: Make all functions in style - [[nodiscard]] TChunkTrim* CreateChunkTrim(ui32 chunkIdx, ui32 offset, ui64 size) { - Mon->Trim.CountRequest(size); - return CreateFromArgs<TChunkTrim>(chunkIdx, offset, size); - } - + , Mon(mon) + , Model(model) + , EstimatedLogChunkIdx(estimatedChunkIdx) + , LastReqId(ui64(PDiskId) * 10000000ull) + {} + + void SetActorSystem(TActorSystem *actorSystem) { + ActorSystem = actorSystem; + } + + template<typename TReq, typename TEv> + [[nodiscard]] TReq* CreateFromEv(TEv &&ev, const TActorId &sender, double *burstMs = nullptr) { + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId << " ev# " + << ToString(ev) << " Sender# " << sender.LocalId() << " ReqId# " << AtomicGet(LastReqId)); + auto req = MakeHolder<TReq>(std::forward<TEv>(ev), sender, AtomicIncrement(LastReqId)); + NewRequest(req.Get(), burstMs); + return req.Release(); + } + + template<typename TReq, typename... TArgs> + [[nodiscard]] TReq* CreateFromArgs(TArgs&&... args) { + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId << " create req# " + << TypeName<TReq>() << " ReqId# " << AtomicGet(LastReqId)); + auto req = MakeHolder<TReq>(std::forward<TArgs>(args)..., AtomicIncrement(LastReqId)); + NewRequest(req.Get(), nullptr); + return req.Release(); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // TODO: Make all functions in style + [[nodiscard]] TChunkTrim* CreateChunkTrim(ui32 chunkIdx, ui32 offset, ui64 size) { + Mon->Trim.CountRequest(size); + return CreateFromArgs<TChunkTrim>(chunkIdx, offset, size); + } + [[nodiscard]] TLogWrite* CreateLogWrite(NPDisk::TEvLog &ev, const TActorId &sender, double& burstMs, NWilson::TTraceId traceId) { - TReqId reqId(TReqId::LogWrite, AtomicIncrement(LastReqId)); - LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " %s Sender# %" PRIu64 " ReqId# %" PRIu64, - (ui32)PDiskId, ev.ToString().c_str(), (ui64)sender.LocalId(), (ui64)reqId.Id); - Mon->QueueRequests->Inc(); - *Mon->QueueBytes += ev.Data.size(); - Mon->WriteLog.CountRequest(ev.Data.size()); - if (ev.Data.size() > (1 << 20)) { - Mon->WriteHugeLog.CountRequest(); - } - // WILSON_TRACE(*ActorSystem, &traceId, EvLogReceived); // TODO - return NewRequest(new TLogWrite(ev, sender, AtomicGet(*EstimatedLogChunkIdx), reqId, std::move(traceId)), &burstMs); - } - + TReqId reqId(TReqId::LogWrite, AtomicIncrement(LastReqId)); + LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " %s Sender# %" PRIu64 " ReqId# %" PRIu64, + (ui32)PDiskId, ev.ToString().c_str(), (ui64)sender.LocalId(), (ui64)reqId.Id); + Mon->QueueRequests->Inc(); + *Mon->QueueBytes += ev.Data.size(); + Mon->WriteLog.CountRequest(ev.Data.size()); + if (ev.Data.size() > (1 << 20)) { + Mon->WriteHugeLog.CountRequest(); + } + // WILSON_TRACE(*ActorSystem, &traceId, EvLogReceived); // TODO + return NewRequest(new TLogWrite(ev, sender, AtomicGet(*EstimatedLogChunkIdx), reqId, std::move(traceId)), &burstMs); + } + [[nodiscard]] TChunkRead* CreateChunkRead(const NPDisk::TEvChunkRead &ev, const TActorId &sender, double& burstMs, - NWilson::TTraceId traceId) { - TReqId reqId(TReqId::ChunkRead, AtomicIncrement(LastReqId)); - LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " %s Sender# %" PRIu64 " ReqId# %" PRIu64, - (ui32)PDiskId, ev.ToString().c_str(), (ui64)sender.LocalId(), (ui64)reqId.Id); - Mon->QueueRequests->Inc(); - *Mon->QueueBytes += ev.Size; - Mon->GetReadCounter(ev.PriorityClass)->CountRequest(ev.Size); - WILSON_TRACE(*ActorSystem, &traceId, EvChunkReadReceived, ChunkIdx = ev.ChunkIdx, Offset = ev.Offset, Size = ev.Size); - auto read = new TChunkRead(ev, sender, reqId, std::move(traceId)); - read->SelfPointer = read; - return NewRequest(read, &burstMs); - } - + NWilson::TTraceId traceId) { + TReqId reqId(TReqId::ChunkRead, AtomicIncrement(LastReqId)); + LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " %s Sender# %" PRIu64 " ReqId# %" PRIu64, + (ui32)PDiskId, ev.ToString().c_str(), (ui64)sender.LocalId(), (ui64)reqId.Id); + Mon->QueueRequests->Inc(); + *Mon->QueueBytes += ev.Size; + Mon->GetReadCounter(ev.PriorityClass)->CountRequest(ev.Size); + WILSON_TRACE(*ActorSystem, &traceId, EvChunkReadReceived, ChunkIdx = ev.ChunkIdx, Offset = ev.Offset, Size = ev.Size); + auto read = new TChunkRead(ev, sender, reqId, std::move(traceId)); + read->SelfPointer = read; + return NewRequest(read, &burstMs); + } + [[nodiscard]] TChunkWrite* CreateChunkWrite(const NPDisk::TEvChunkWrite &ev, const TActorId &sender, double& burstMs, - NWilson::TTraceId traceId) { - TReqId reqId(TReqId::ChunkWrite, AtomicIncrement(LastReqId)); - LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " %s Sender# %" PRIu64 " ReqId# %" PRIu64, - (ui32)PDiskId, ev.ToString().c_str(), (ui64)sender.LocalId(), (ui64)reqId.Id); - Mon->QueueRequests->Inc(); - ui32 size = ev.PartsPtr ? ev.PartsPtr->ByteSize() : 0; - ev.Validate(); - *Mon->QueueBytes += size; - Mon->GetWriteCounter(ev.PriorityClass)->CountRequest(size); - WILSON_TRACE(*ActorSystem, &traceId, EvChunkWriteReceived, ChunkIdx = ev.ChunkIdx, Offset = ev.Offset, Size = size); - return NewRequest(new TChunkWrite(ev, sender, reqId, std::move(traceId)), &burstMs); - } -}; - -} // namespace NKikimr::NPDisk { + NWilson::TTraceId traceId) { + TReqId reqId(TReqId::ChunkWrite, AtomicIncrement(LastReqId)); + LOG_DEBUG(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " %s Sender# %" PRIu64 " ReqId# %" PRIu64, + (ui32)PDiskId, ev.ToString().c_str(), (ui64)sender.LocalId(), (ui64)reqId.Id); + Mon->QueueRequests->Inc(); + ui32 size = ev.PartsPtr ? ev.PartsPtr->ByteSize() : 0; + ev.Validate(); + *Mon->QueueBytes += size; + Mon->GetWriteCounter(ev.PriorityClass)->CountRequest(size); + WILSON_TRACE(*ActorSystem, &traceId, EvChunkWriteReceived, ChunkIdx = ev.ChunkIdx, Offset = ev.Offset, Size = size); + return NewRequest(new TChunkWrite(ev, sender, reqId, std::move(traceId)), &burstMs); + } +}; + +} // namespace NKikimr::NPDisk { diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_request_id.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_request_id.h index 39a65aaf21..c0824b6002 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_request_id.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_request_id.h @@ -1,5 +1,5 @@ #pragma once - + #include <util/stream/output.h> class IOutputStream; @@ -81,17 +81,17 @@ struct TReqId { TrimAllUntrimmedChunks = 62, TryTrimChunk = 63, ReleaseChunks = 64, - StopDevice = 65, + StopDevice = 65, }; // 56 bit idx, 8 bit source ui64 Id; - explicit TReqId(ui64 idx) - : Id(ui64(EReqSource::Invalid) | (idx << 8)) - {} - + explicit TReqId(ui64 idx) + : Id(ui64(EReqSource::Invalid) | (idx << 8)) + {} + explicit TReqId(EReqSource source, ui64 idx) : Id(ui64(source) | (idx << 8)) {} @@ -109,42 +109,42 @@ struct TReqId { } }; -enum class ERequestType { - RequestLogRead, - RequestLogReadContinue, - RequestLogReadResultProcess, - RequestLogSectorRestore, - RequestLogWrite, - RequestChunkRead, - RequestChunkReadPiece, - RequestChunkWrite, - RequestChunkWritePiece, - RequestChunkTrim, - RequestYardInit, - RequestCheckSpace, - RequestHarakiri, - RequestYardSlay, - RequestChunkReserve, - RequestChunksLock, - RequestChunksUnlock, - RequestYardControl, - RequestAskForCutLog, - RequestConfigureScheduler, - RequestWhiteboartReport, - RequestHttpInfo, - RequestUndelivered, - RequestNop, - RequestCommitLogChunks, - RequestLogCommitDone, - RequestTryTrimChunk, - RequestReleaseChunks, - RequestStopDevice, -}; - -inline IOutputStream& operator <<(IOutputStream& out, const TReqId& reqId) { - return out << reqId.Id; -} - - +enum class ERequestType { + RequestLogRead, + RequestLogReadContinue, + RequestLogReadResultProcess, + RequestLogSectorRestore, + RequestLogWrite, + RequestChunkRead, + RequestChunkReadPiece, + RequestChunkWrite, + RequestChunkWritePiece, + RequestChunkTrim, + RequestYardInit, + RequestCheckSpace, + RequestHarakiri, + RequestYardSlay, + RequestChunkReserve, + RequestChunksLock, + RequestChunksUnlock, + RequestYardControl, + RequestAskForCutLog, + RequestConfigureScheduler, + RequestWhiteboartReport, + RequestHttpInfo, + RequestUndelivered, + RequestNop, + RequestCommitLogChunks, + RequestLogCommitDone, + RequestTryTrimChunk, + RequestReleaseChunks, + RequestStopDevice, +}; + +inline IOutputStream& operator <<(IOutputStream& out, const TReqId& reqId) { + return out << reqId.Id; +} + + } // NPDisk } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_requestimpl.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_requestimpl.cpp index 4678706f84..27f26b1f4d 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_requestimpl.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_requestimpl.cpp @@ -23,70 +23,70 @@ void TRequestBase::AbortDelete(TRequestBase* request, TActorSystem* actorSystem) break; } default: - request->Abort(actorSystem); - delete request; + request->Abort(actorSystem); + delete request; break; } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TChunkWrite -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -TAtomic TChunkWrite::LastIndex = 0; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// TChunkWrite +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +TAtomic TChunkWrite::LastIndex = 0; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // TChunkRead -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -TAtomic TChunkRead::LastIndex = 0; - -void TChunkRead::Abort(TActorSystem* actorSystem) { - if (FinalCompletion) { - FinalCompletion->PartDeleted(actorSystem); - } else { - Y_VERIFY(!IsReplied); - TStringStream error; - error << "ReqId# " << ReqId << " ChunkRead is deleted because of PDisk stoppage"; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +TAtomic TChunkRead::LastIndex = 0; + +void TChunkRead::Abort(TActorSystem* actorSystem) { + if (FinalCompletion) { + FinalCompletion->PartDeleted(actorSystem); + } else { + Y_VERIFY(!IsReplied); + TStringStream error; + error << "ReqId# " << ReqId << " ChunkRead is deleted because of PDisk stoppage"; THolder<NPDisk::TEvChunkReadResult> result = MakeHolder <NPDisk::TEvChunkReadResult>(NKikimrProto::ERROR, - ChunkIdx, Offset, Cookie, - NKikimrBlobStorage::StatusIsValid, error.Str()); - actorSystem->Send(Sender, result.Release()); - IsReplied = true; - } + ChunkIdx, Offset, Cookie, + NKikimrBlobStorage::StatusIsValid, error.Str()); + actorSystem->Send(Sender, result.Release()); + IsReplied = true; + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // TChunkReadPiece -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// TChunkReadPiece::TChunkReadPiece(TIntrusivePtr<TChunkRead> &read, ui64 pieceCurrentSector, ui64 pieceSizeLimit, - bool isTheLastPiece) - : TRequestBase(read->Sender, read->ReqId, read->Owner, read->OwnerRound, read->PriorityClass) - , ChunkRead(read) - , PieceCurrentSector(pieceCurrentSector) - , PieceSizeLimit(pieceSizeLimit) - , IsTheLastPiece(isTheLastPiece) -{ - Y_VERIFY(ChunkRead->FinalCompletion); - if (!IsTheLastPiece) { - ChunkRead->FinalCompletion->AddPart(); - } -} - -void TChunkReadPiece::Abort(TActorSystem* actorSystem) { - ChunkRead->FinalCompletion->PartDeleted(actorSystem); + bool isTheLastPiece) + : TRequestBase(read->Sender, read->ReqId, read->Owner, read->OwnerRound, read->PriorityClass) + , ChunkRead(read) + , PieceCurrentSector(pieceCurrentSector) + , PieceSizeLimit(pieceSizeLimit) + , IsTheLastPiece(isTheLastPiece) +{ + Y_VERIFY(ChunkRead->FinalCompletion); + if (!IsTheLastPiece) { + ChunkRead->FinalCompletion->AddPart(); + } +} + +void TChunkReadPiece::Abort(TActorSystem* actorSystem) { + ChunkRead->FinalCompletion->PartDeleted(actorSystem); } -void TChunkReadPiece::OnSuccessfulDestroy(TActorSystem* actorSystem) { - if (!IsTheLastPiece) { - ChunkRead->FinalCompletion->PartReadComplete(actorSystem); - } -} - - +void TChunkReadPiece::OnSuccessfulDestroy(TActorSystem* actorSystem) { + if (!IsTheLastPiece) { + ChunkRead->FinalCompletion->PartReadComplete(actorSystem); + } +} + + } // NPDisk } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_requestimpl.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_requestimpl.h index c3a8769ee2..61c0996e08 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_requestimpl.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_requestimpl.h @@ -4,15 +4,15 @@ #include <ydb/library/pdisk_io/buffers.h> #include "blobstorage_pdisk_data.h" #include "blobstorage_pdisk_drivemodel.h" -#include "blobstorage_pdisk_internal_interface.h" +#include "blobstorage_pdisk_internal_interface.h" #include "blobstorage_pdisk_mon.h" #include "blobstorage_pdisk_request_id.h" - + #include <ydb/core/blobstorage/base/vdisk_priorities.h> #include <ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.h> #include <ydb/core/blobstorage/crypto/secured_block.h> #include <ydb/library/schlab/schine/job_kind.h> - + #include <util/generic/utility.h> #include <util/generic/ptr.h> @@ -46,7 +46,7 @@ public: // Scheduling NHPTimer::STime Deadline = 0; // Deadline from request input to rt-scheduler ui64 Cost = 0; // Remaining cost in nanoseconds - NSchLab::EJobKind JobKind = NSchLab::EJobKind::JobKindRequest; + NSchLab::EJobKind JobKind = NSchLab::EJobKind::JobKindRequest; // Monitoring const NHPTimer::STime CreationTime; @@ -73,19 +73,19 @@ public: OwnerGroupType = (isStaticGroupOwner ? EOwnerGroupType::Static : EOwnerGroupType::Dynamic); } - virtual void Abort(TActorSystem* /*actorSystem*/) { - } - - virtual ~TRequestBase() = default; + virtual void Abort(TActorSystem* /*actorSystem*/) { + } + + virtual ~TRequestBase() = default; - virtual ERequestType GetType() const = 0; + virtual ERequestType GetType() const = 0; - virtual void EstimateCost(const TDriveModel &drive) { - Cost = drive.SeekTimeNs(); - } + virtual void EstimateCost(const TDriveModel &drive) { + Cost = drive.SeekTimeNs(); + } - double LifeDurationMs(NHPTimer::STime now) { - return HPMilliSecondsFloat(now - CreationTime); + double LifeDurationMs(NHPTimer::STime now) { + return HPMilliSecondsFloat(now - CreationTime); } ui64 GetCost() const { @@ -124,7 +124,7 @@ public: ui32 SlotId; TYardInit(const NPDisk::TEvYardInit &ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::YardInit, reqIdx), 0, ev.OwnerRound, NPriInternal::Other) + : TRequestBase(sender, TReqId(TReqId::YardInit, reqIdx), 0, ev.OwnerRound, NPriInternal::Other) , VDisk(ev.VDisk) , PDiskGuid(ev.PDiskGuid) , CutLogId(ev.CutLogID) @@ -132,15 +132,15 @@ public: , SlotId(ev.SlotId) {} - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestYardInit; } - - TVDiskID VDiskIdWOGeneration() const { - TVDiskID v = VDisk; - v.GroupGeneration = -1; - return v; - } + + TVDiskID VDiskIdWOGeneration() const { + TVDiskID v = VDisk; + v.GroupGeneration = -1; + return v; + } }; // @@ -149,12 +149,12 @@ public: class TCheckSpace : public TRequestBase { public: TCheckSpace(const NPDisk::TEvCheckSpace &ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::CheckSpace, reqIdx), ev.Owner, ev.OwnerRound, NPriInternal::Other) + : TRequestBase(sender, TReqId(TReqId::CheckSpace, reqIdx), ev.Owner, ev.OwnerRound, NPriInternal::Other) { Y_UNUSED(ev); } - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestCheckSpace; } }; @@ -164,87 +164,87 @@ public: // class TLogRead : public TRequestBase { public: - TLogPosition Position; + TLogPosition Position; ui64 SizeLimit; TLogRead(const NPDisk::TEvReadLog &ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::LogRead, reqIdx), ev.Owner, ev.OwnerRound, NPriInternal::LogRead) + : TRequestBase(sender, TReqId(TReqId::LogRead, reqIdx), ev.Owner, ev.OwnerRound, NPriInternal::LogRead) , Position(ev.Position) , SizeLimit(ev.SizeLimit) {} - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestLogRead; } }; // -// TLogReadContinue -// -class TLogReadContinue : public TRequestBase { -public: - void *Data; - ui32 Size; - ui64 Offset; - TCompletionAction *CompletionAction; +// TLogReadContinue +// +class TLogReadContinue : public TRequestBase { +public: + void *Data; + ui32 Size; + ui64 Offset; + TCompletionAction *CompletionAction; TReqId ReqId; - + TLogReadContinue(const NPDisk::TEvReadLogContinue &ev, const TActorId &sender, TAtomicBase /*reqIdx*/) - : TRequestBase(sender, ev.ReqId, 0, 0, NPriInternal::LogRead) - , Data(ev.Data) - , Size(ev.Size) - , Offset(ev.Offset) - , CompletionAction(ev.CompletionAction) - , ReqId(ev.ReqId) - {} - - ERequestType GetType() const override { - return ERequestType::RequestLogReadContinue; - } -}; - -// -// TLogReadResultProcess -// -class TLogReadResultProcess : public TRequestBase { -public: - NPDisk::TEvReadLogResult::TPtr ReadLogResult; - + : TRequestBase(sender, ev.ReqId, 0, 0, NPriInternal::LogRead) + , Data(ev.Data) + , Size(ev.Size) + , Offset(ev.Offset) + , CompletionAction(ev.CompletionAction) + , ReqId(ev.ReqId) + {} + + ERequestType GetType() const override { + return ERequestType::RequestLogReadContinue; + } +}; + +// +// TLogReadResultProcess +// +class TLogReadResultProcess : public TRequestBase { +public: + NPDisk::TEvReadLogResult::TPtr ReadLogResult; + TLogReadResultProcess(NPDisk::TEvReadLogResult::TPtr &ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::LogReadResultProcess, reqIdx), 0, 0, NPriInternal::LogRead) - - , ReadLogResult(std::move(ev)) - {} - - ERequestType GetType() const override { - return ERequestType::RequestLogReadResultProcess; - } -}; - -// -// TLogSectorRestore -// -class TLogSectorRestore : public TRequestBase { -public: - void *Data; - ui32 Size; - ui64 Offset; - TCompletionAction *CompletionAction; - + : TRequestBase(sender, TReqId(TReqId::LogReadResultProcess, reqIdx), 0, 0, NPriInternal::LogRead) + + , ReadLogResult(std::move(ev)) + {} + + ERequestType GetType() const override { + return ERequestType::RequestLogReadResultProcess; + } +}; + +// +// TLogSectorRestore +// +class TLogSectorRestore : public TRequestBase { +public: + void *Data; + ui32 Size; + ui64 Offset; + TCompletionAction *CompletionAction; + TLogSectorRestore(const NPDisk::TEvLogSectorRestore &ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::LogSectorRestore, reqIdx), 0, 0, NPriInternal::LogRead) - , Data(ev.Data) - , Size(ev.Size) - , Offset(ev.Offset) - , CompletionAction(ev.CompletionAction) - {} - - ERequestType GetType() const override { - return ERequestType::RequestLogSectorRestore; - } -}; - -// + : TRequestBase(sender, TReqId(TReqId::LogSectorRestore, reqIdx), 0, 0, NPriInternal::LogRead) + , Data(ev.Data) + , Size(ev.Size) + , Offset(ev.Offset) + , CompletionAction(ev.CompletionAction) + {} + + ERequestType GetType() const override { + return ERequestType::RequestLogSectorRestore; + } +}; + +// // TLogWrite // class TLogWrite : public TRequestBase { @@ -253,7 +253,7 @@ public: TLogWrite *BatchTail; // Valid only for the head of the batch using TCallback = NPDisk::TEvLog::TCallback; - TLogSignature Signature; + TLogSignature Signature; ui32 EstimatedChunkIdx; TString Data; ui64 LsnSegmentStart; // Additional data, for sanity checks only. @@ -262,7 +262,7 @@ public: TCallback LogCallback; NPDisk::TCommitRecord CommitRecord; THolder<NPDisk::TEvLogResult> Result; - std::function<void()> OnDestroy; + std::function<void()> OnDestroy; TLogWrite(NPDisk::TEvLog &ev, const TActorId &sender, ui32 estimatedChunkIdx, TReqId reqId, NWilson::TTraceId traceId) : TRequestBase(sender, reqId, ev.Owner, ev.OwnerRound, NPriInternal::LogWrite, std::move(traceId)) @@ -280,19 +280,19 @@ public: } virtual ~TLogWrite() { - if (OnDestroy) { - OnDestroy(); - } + if (OnDestroy) { + OnDestroy(); + } delete NextInBatch; } - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestLogWrite; } void EstimateCost(const TDriveModel &drive) override { ui64 totalBytes = (Data.size() + sizeof(TLogRecordHeader)) * (LogErasureDataParts + 1) / LogErasureDataParts; - Cost = drive.TimeForSizeNs(totalBytes, EstimatedChunkIdx, TDriveModel::OP_TYPE_WRITE); + Cost = drive.TimeForSizeNs(totalBytes, EstimatedChunkIdx, TDriveModel::OP_TYPE_WRITE); } void AddToBatch(TLogWrite *req) { @@ -307,20 +307,20 @@ public: NextInBatch = nullptr; return next; } - - void SetOnDestroy(std::function<void()> onDestroy) { - OnDestroy = std::move(onDestroy); - } + + void SetOnDestroy(std::function<void()> onDestroy) { + OnDestroy = std::move(onDestroy); + } }; -class TCompletionChunkRead; +class TCompletionChunkRead; // // TChunkRead // class TChunkRead : public TRequestBase { protected: static TAtomic LastIndex; - static constexpr ui64 ReferenceCanary = 890461871990457885ull; + static constexpr ui64 ReferenceCanary = 890461871990457885ull; public: ui32 ChunkIdx; ui64 Offset; @@ -329,7 +329,7 @@ public: ui64 CurrentSector = 0; ui64 RemainingSize; - TCompletionChunkRead *FinalCompletion = nullptr; + TCompletionChunkRead *FinalCompletion = nullptr; TAtomicBase Index; bool IsReplied = false; @@ -337,13 +337,13 @@ public: ui64 FirstSector = 0; ui64 LastSector = 0; - // Request is placed in scheduler's queues as raw pointer. To avoid deletion - // in such situation request will take owning to self when pushed to - // scheduler and drop owning when poped from scheduler - TIntrusivePtr<TChunkRead> SelfPointer; - - const ui64 DoubleFreeCanary; - + // Request is placed in scheduler's queues as raw pointer. To avoid deletion + // in such situation request will take owning to self when pushed to + // scheduler and drop owning when poped from scheduler + TIntrusivePtr<TChunkRead> SelfPointer; + + const ui64 DoubleFreeCanary; + TChunkRead(const NPDisk::TEvChunkRead &ev, const TActorId &sender, TReqId reqId, NWilson::TTraceId traceId) : TRequestBase(sender, reqId, ev.Owner, ev.OwnerRound, ev.PriorityClass, std::move(traceId)) , ChunkIdx(ev.ChunkIdx) @@ -352,40 +352,40 @@ public: , Cookie(ev.Cookie) , RemainingSize(ev.Size) , SlackSize(Max<ui32>()) - , DoubleFreeCanary(ReferenceCanary) + , DoubleFreeCanary(ReferenceCanary) { Index = AtomicIncrement(LastIndex); } virtual ~TChunkRead() { - Y_VERIFY(DoubleFreeCanary == ReferenceCanary, "DoubleFreeCanary in TChunkRead is dead"); - // Set DoubleFreeCanary to 0 and make sure compiler will not eliminate that action - SecureWipeBuffer((ui8*)&DoubleFreeCanary, sizeof(DoubleFreeCanary)); - Y_VERIFY(!SelfPointer); + Y_VERIFY(DoubleFreeCanary == ReferenceCanary, "DoubleFreeCanary in TChunkRead is dead"); + // Set DoubleFreeCanary to 0 and make sure compiler will not eliminate that action + SecureWipeBuffer((ui8*)&DoubleFreeCanary, sizeof(DoubleFreeCanary)); + Y_VERIFY(!SelfPointer); Y_VERIFY(IsReplied, "Unreplied read request, chunkIdx# %" PRIu32 " Offset# %" PRIu32 " Size# %" PRIu32 " CurrentSector# %" PRIu32 " RemainingSize# %" PRIu32, (ui32)ChunkIdx, (ui32)Offset, (ui32)Size, (ui32)CurrentSector, (ui32)RemainingSize); } - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestChunkRead; } - void Abort(TActorSystem* actorSystem) override; + void Abort(TActorSystem* actorSystem) override; void EstimateCost(const TDriveModel &drive) override { - Cost = drive.SeekTimeNs() + drive.TimeForSizeNs((ui64)RemainingSize, ChunkIdx, TDriveModel::OP_TYPE_READ); + Cost = drive.SeekTimeNs() + drive.TimeForSizeNs((ui64)RemainingSize, ChunkIdx, TDriveModel::OP_TYPE_READ); } bool TryStealSlack(ui64& slackNs, const TDriveModel &drive, ui64 appendBlockSize, bool adhesion) override { Y_UNUSED(appendBlockSize); Y_UNUSED(adhesion); // Calculate how many bytes can we read within given slack (with single seek) - SlackSize = (ui32)drive.SizeForTimeNs(slackNs > drive.SeekTimeNs()? slackNs - drive.SeekTimeNs(): 0, - ChunkIdx, TDriveModel::OP_TYPE_READ); + SlackSize = (ui32)drive.SizeForTimeNs(slackNs > drive.SeekTimeNs()? slackNs - drive.SeekTimeNs(): 0, + ChunkIdx, TDriveModel::OP_TYPE_READ); if (SlackSize > 0) { // TODO[serxa]: actually there is some lower bound, // because we are not reading less than some number of bytes SlackSize = Min(SlackSize, RemainingSize); - ui64 costNs = drive.SeekTimeNs() + drive.TimeForSizeNs((ui64)SlackSize, ChunkIdx, TDriveModel::OP_TYPE_READ); + ui64 costNs = drive.SeekTimeNs() + drive.TimeForSizeNs((ui64)SlackSize, ChunkIdx, TDriveModel::OP_TYPE_READ); slackNs -= costNs; return true; } else { @@ -402,26 +402,26 @@ public: TIntrusivePtr<TChunkRead> ChunkRead; ui64 PieceCurrentSector; ui64 PieceSizeLimit; - bool IsTheLastPiece; + bool IsTheLastPiece; + + // Request is placed in scheduler's queues as raw pointer. To avoid deletion + // in such situation request will take owning to self when pushed to + // scheduler and drop owning when poped from scheduler + TIntrusivePtr<TChunkReadPiece> SelfPointer; - // Request is placed in scheduler's queues as raw pointer. To avoid deletion - // in such situation request will take owning to self when pushed to - // scheduler and drop owning when poped from scheduler - TIntrusivePtr<TChunkReadPiece> SelfPointer; - TChunkReadPiece(TIntrusivePtr<TChunkRead> &read, ui64 pieceCurrentSector, ui64 pieceSizeLimit, bool isTheLastPiece); virtual ~TChunkReadPiece() { - Y_VERIFY(!SelfPointer); + Y_VERIFY(!SelfPointer); } - void OnSuccessfulDestroy(TActorSystem* actorSystem); - - ERequestType GetType() const override { + void OnSuccessfulDestroy(TActorSystem* actorSystem); + + ERequestType GetType() const override { return ERequestType::RequestChunkReadPiece; } - void Abort(TActorSystem* actorSystem) override; + void Abort(TActorSystem* actorSystem) override; void EstimateCost(const TDriveModel &drive) override { Cost = drive.SeekTimeNs() + @@ -455,8 +455,8 @@ public: ui32 SlackSize; ui32 BytesWritten = 0; - THolder<NPDisk::TCompletionAction> Completion; - + THolder<NPDisk::TCompletionAction> Completion; + TChunkWrite(const NPDisk::TEvChunkWrite &ev, const TActorId &sender, TReqId reqId, NWilson::TTraceId traceId) : TRequestBase(sender, reqId, ev.Owner, ev.OwnerRound, ev.PriorityClass, std::move(traceId)) , ChunkIdx(ev.ChunkIdx) @@ -477,12 +477,12 @@ public: SlackSize = Max<ui32>(); } - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestChunkWrite; } void EstimateCost(const TDriveModel &drive) override { - Cost = drive.SeekTimeNs() + drive.TimeForSizeNs((ui64)UnenqueuedSize, ChunkIdx, TDriveModel::OP_TYPE_WRITE); + Cost = drive.SeekTimeNs() + drive.TimeForSizeNs((ui64)UnenqueuedSize, ChunkIdx, TDriveModel::OP_TYPE_WRITE); } bool IsFinalIteration() { @@ -496,14 +496,14 @@ public: bool TryStealSlack(ui64& slackNs, const TDriveModel &drive, ui64 appendBlockSize, bool adhesion) override { // Calculate how many bytes can we write within given slack (with single seek) // TODO[serxa]: use write speed? but there is no write speed in drive model! - SlackSize = (ui32)drive.SizeForTimeNs(slackNs > drive.SeekTimeNs()? slackNs - drive.SeekTimeNs(): 0, - ChunkIdx, TDriveModel::OP_TYPE_WRITE); + SlackSize = (ui32)drive.SizeForTimeNs(slackNs > drive.SeekTimeNs()? slackNs - drive.SeekTimeNs(): 0, + ChunkIdx, TDriveModel::OP_TYPE_WRITE); // actually there is some lower bound, because we are not writing less than appendBlockSize bytes if (SlackSize >= appendBlockSize) { SlackSize = Min( SlackSize / appendBlockSize * appendBlockSize, (UnenqueuedSize + appendBlockSize - 1) / appendBlockSize * appendBlockSize); - ui64 costNs = (adhesion? 0: drive.SeekTimeNs()) + drive.TimeForSizeNs((ui64)SlackSize, ChunkIdx, TDriveModel::OP_TYPE_WRITE); + ui64 costNs = (adhesion? 0: drive.SeekTimeNs()) + drive.TimeForSizeNs((ui64)SlackSize, ChunkIdx, TDriveModel::OP_TYPE_WRITE); slackNs -= costNs; return true; } else { @@ -528,7 +528,7 @@ public: , PieceSize(pieceSize) {} - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestChunkWritePiece; } @@ -547,14 +547,14 @@ public: ui32 Offset; ui64 Size; - TChunkTrim(ui32 chunkIdx, ui32 offset, ui64 size, TAtomicBase reqIdx) - : TRequestBase(TActorId(), TReqId(TReqId::ChunkTrim, reqIdx), OwnerUnallocated, TOwnerRound(0), NPriInternal::Trim) + TChunkTrim(ui32 chunkIdx, ui32 offset, ui64 size, TAtomicBase reqIdx) + : TRequestBase(TActorId(), TReqId(TReqId::ChunkTrim, reqIdx), OwnerUnallocated, TOwnerRound(0), NPriInternal::Trim) , ChunkIdx(chunkIdx) , Offset(offset) , Size(size) {} - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestChunkTrim; } @@ -580,10 +580,10 @@ public: class THarakiri : public TRequestBase { public: THarakiri(const NPDisk::TEvHarakiri &ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::Harakiri, reqIdx), ev.Owner, ev.OwnerRound, NPriInternal::Other) + : TRequestBase(sender, TReqId(TReqId::Harakiri, reqIdx), ev.Owner, ev.OwnerRound, NPriInternal::Other) {} - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestHarakiri; } }; @@ -598,58 +598,58 @@ public: ui32 PDiskId; ui32 VSlotId; TSlay(const NPDisk::TEvSlay &ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::Slay, reqIdx), OwnerUnallocated, ev.SlayOwnerRound, NPriInternal::Other) + : TRequestBase(sender, TReqId(TReqId::Slay, reqIdx), OwnerUnallocated, ev.SlayOwnerRound, NPriInternal::Other) , VDiskId(ev.VDiskId) , SlayOwnerRound(ev.SlayOwnerRound) , PDiskId(ev.PDiskId) , VSlotId(ev.VSlotId) {} - ERequestType GetType() const override { - return ERequestType::RequestYardSlay; + ERequestType GetType() const override { + return ERequestType::RequestYardSlay; } }; -// -// TChunkLock -// -class TChunksLock : public TRequestBase { -public: - bool LockByRange; - ui32 Begin; - ui32 End; - ui32 Count; +// +// TChunkLock +// +class TChunksLock : public TRequestBase { +public: + bool LockByRange; + ui32 Begin; + ui32 End; + ui32 Count; TChunksLock(const NPDisk::TEvChunksLock &ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::ChunksLock, reqIdx), 0, 0, NPriInternal::Other) - , LockByRange(ev.LockByRange) - , Begin(ev.Begin) - , End(ev.End) - , Count(ev.Count) - {} - - ERequestType GetType() const override { - return ERequestType::RequestChunksLock; - } -}; - -// -// TChunksUnlock -// -class TChunksUnlock : public TRequestBase { -public: + : TRequestBase(sender, TReqId(TReqId::ChunksLock, reqIdx), 0, 0, NPriInternal::Other) + , LockByRange(ev.LockByRange) + , Begin(ev.Begin) + , End(ev.End) + , Count(ev.Count) + {} + + ERequestType GetType() const override { + return ERequestType::RequestChunksLock; + } +}; + +// +// TChunksUnlock +// +class TChunksUnlock : public TRequestBase { +public: TChunksUnlock(const NPDisk::TEvChunksUnlock &ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::ChunksUnlock, reqIdx), 0, 0, NPriInternal::Other) - { - Y_UNUSED(ev); - } - - ERequestType GetType() const override { - return ERequestType::RequestChunksUnlock; - } -}; - -// + : TRequestBase(sender, TReqId(TReqId::ChunksUnlock, reqIdx), 0, 0, NPriInternal::Other) + { + Y_UNUSED(ev); + } + + ERequestType GetType() const override { + return ERequestType::RequestChunksUnlock; + } +}; + +// // TChunkReserve // class TChunkReserve : public TRequestBase { @@ -657,92 +657,92 @@ public: ui32 SizeChunks; TChunkReserve(const NPDisk::TEvChunkReserve &ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::ChunkReserve, reqIdx), ev.Owner, ev.OwnerRound, NPriInternal::Other) + : TRequestBase(sender, TReqId(TReqId::ChunkReserve, reqIdx), ev.Owner, ev.OwnerRound, NPriInternal::Other) , SizeChunks(ev.SizeChunks) {} - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestChunkReserve; } }; // -// TWhiteboardReport -// -class TWhiteboardReport : public TRequestBase { -public: - TAutoPtr<TEvWhiteboardReportResult> Response; - - TWhiteboardReport(const TActorId &sender, TEvWhiteboardReportResult *response, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::WhiteboardReport, reqIdx), 0u, 0u, NPriInternal::Other) - , Response(response) - {} - - ERequestType GetType() const override { - return ERequestType::RequestWhiteboartReport; - } -}; - -// -// THttpInfo -// -class THttpInfo : public TRequestBase { -public: +// TWhiteboardReport +// +class TWhiteboardReport : public TRequestBase { +public: + TAutoPtr<TEvWhiteboardReportResult> Response; + + TWhiteboardReport(const TActorId &sender, TEvWhiteboardReportResult *response, TAtomicBase reqIdx) + : TRequestBase(sender, TReqId(TReqId::WhiteboardReport, reqIdx), 0u, 0u, NPriInternal::Other) + , Response(response) + {} + + ERequestType GetType() const override { + return ERequestType::RequestWhiteboartReport; + } +}; + +// +// THttpInfo +// +class THttpInfo : public TRequestBase { +public: const TActorId EndCustomer; - TStringStream OutputString; - TString DeviceFlagStr; - TString RealtimeFlagStr; - TString FairSchedulerStr; - TString ErrorStr; + TStringStream OutputString; + TString DeviceFlagStr; + TString RealtimeFlagStr; + TString FairSchedulerStr; + TString ErrorStr; bool DoGetSchedule; - - THttpInfo(const TActorId &sender, const TActorId &endCustomer, TStringStream outputString, + + THttpInfo(const TActorId &sender, const TActorId &endCustomer, TStringStream outputString, TString deviceFlagStr, TString realtimeFlagStr, TString fairSchedulerStr, TString errorStr, - bool doGetSchedule, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::HttpInfo, reqIdx), 0u, 0u, NPriInternal::Other) - , EndCustomer(endCustomer) - , OutputString(outputString) - , DeviceFlagStr(deviceFlagStr) - , RealtimeFlagStr(realtimeFlagStr) - , FairSchedulerStr(fairSchedulerStr) - , ErrorStr(errorStr) + bool doGetSchedule, TAtomicBase reqIdx) + : TRequestBase(sender, TReqId(TReqId::HttpInfo, reqIdx), 0u, 0u, NPriInternal::Other) + , EndCustomer(endCustomer) + , OutputString(outputString) + , DeviceFlagStr(deviceFlagStr) + , RealtimeFlagStr(realtimeFlagStr) + , FairSchedulerStr(fairSchedulerStr) + , ErrorStr(errorStr) , DoGetSchedule(doGetSchedule) - {} - - ERequestType GetType() const override { - return ERequestType::RequestHttpInfo; - } - - void Abort(TActorSystem* actorSystem) override { - TEvHttpInfoResult *reportResult = new TEvHttpInfoResult(EndCustomer); - if (DoGetSchedule) { - reportResult->HttpInfoRes = new NMon::TEvHttpInfoRes("", 0, NMon::IEvHttpInfoRes::EContentType::Custom); - actorSystem->Send(Sender, reportResult); - } else { - reportResult->HttpInfoRes = new NMon::TEvHttpInfoRes(""); - actorSystem->Send(Sender, reportResult); - } - } -}; - -// -// TUndelivered -// -class TUndelivered : public TRequestBase { -public: - TAutoPtr<TEvents::TEvUndelivered> Event; - + {} + + ERequestType GetType() const override { + return ERequestType::RequestHttpInfo; + } + + void Abort(TActorSystem* actorSystem) override { + TEvHttpInfoResult *reportResult = new TEvHttpInfoResult(EndCustomer); + if (DoGetSchedule) { + reportResult->HttpInfoRes = new NMon::TEvHttpInfoRes("", 0, NMon::IEvHttpInfoRes::EContentType::Custom); + actorSystem->Send(Sender, reportResult); + } else { + reportResult->HttpInfoRes = new NMon::TEvHttpInfoRes(""); + actorSystem->Send(Sender, reportResult); + } + } +}; + +// +// TUndelivered +// +class TUndelivered : public TRequestBase { +public: + TAutoPtr<TEvents::TEvUndelivered> Event; + TUndelivered(TEvents::TEvUndelivered::TPtr ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::Undelivered, reqIdx), 0u, 0u, NPriInternal::Other) - , Event(ev->Release()) - {} - - ERequestType GetType() const override { - return ERequestType::RequestUndelivered; - } -}; - -// + : TRequestBase(sender, TReqId(TReqId::Undelivered, reqIdx), 0u, 0u, NPriInternal::Other) + , Event(ev->Release()) + {} + + ERequestType GetType() const override { + return ERequestType::RequestUndelivered; + } +}; + +// // TYardControl // class TYardControl : public TRequestBase { @@ -751,12 +751,12 @@ public: void *Cookie; TYardControl(const NPDisk::TEvYardControl &ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::YardControl, reqIdx), 0, 0, NPriInternal::Other) + : TRequestBase(sender, TReqId(TReqId::YardControl, reqIdx), 0, 0, NPriInternal::Other) , Action(ev.Action) , Cookie(ev.Cookie) {} - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestYardControl; } }; @@ -767,10 +767,10 @@ public: class TAskForCutLog : public TRequestBase { public: TAskForCutLog(const NPDisk::TEvAskForCutLog &ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::AskForCutLog, reqIdx), ev.Owner, ev.OwnerRound, NPriInternal::Other) + : TRequestBase(sender, TReqId(TReqId::AskForCutLog, reqIdx), ev.Owner, ev.OwnerRound, NPriInternal::Other) {} - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestAskForCutLog; } }; @@ -783,13 +783,13 @@ public: TOwner OwnerId; TOwnerRound OwnerRound; - TPDiskSchedulerConfig SchedulerCfg; - + TPDiskSchedulerConfig SchedulerCfg; + TConfigureScheduler(const NPDisk::TEvConfigureScheduler &ev, const TActorId &sender, TAtomicBase reqIdx) - : TRequestBase(sender, TReqId(TReqId::ConfigureScheduler, reqIdx), 0, 0, NPriInternal::Other) + : TRequestBase(sender, TReqId(TReqId::ConfigureScheduler, reqIdx), 0, 0, NPriInternal::Other) , OwnerId(ev.Owner) , OwnerRound(ev.OwnerRound) - , SchedulerCfg(ev.SchedulerCfg) + , SchedulerCfg(ev.SchedulerCfg) {} TConfigureScheduler(TOwner ownerId, TOwnerRound ownerRound) @@ -798,7 +798,7 @@ public: , OwnerRound(ownerRound) {} - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestConfigureScheduler; } }; @@ -810,13 +810,13 @@ public: class TCommitLogChunks : public TRequestBase { public: TVector<ui32> CommitedLogChunks; - - TCommitLogChunks(TVector<ui32>&& commitedLogChunks, TAtomicBase reqIdx) + + TCommitLogChunks(TVector<ui32>&& commitedLogChunks, TAtomicBase reqIdx) : TRequestBase(TActorId(), TReqId(TReqId::CommitLogChunks, reqIdx), OwnerSystem, 0, NPriInternal::Other) , CommitedLogChunks(std::move(commitedLogChunks)) {} - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestCommitLogChunks; } }; @@ -826,27 +826,27 @@ public: // class TReleaseChunks : public TRequestBase { public: - TMaybe<TLogChunkInfo> GapStart; - TMaybe<TLogChunkInfo> GapEnd; - TVector<TChunkIdx> ChunksToRelease; - bool IsChunksFromLogSplice; - - TReleaseChunks(const TLogChunkInfo& gapStart, const TLogChunkInfo& gapEnd, TVector<TChunkIdx> chunksToRelease, - TAtomicBase reqIdx) + TMaybe<TLogChunkInfo> GapStart; + TMaybe<TLogChunkInfo> GapEnd; + TVector<TChunkIdx> ChunksToRelease; + bool IsChunksFromLogSplice; + + TReleaseChunks(const TLogChunkInfo& gapStart, const TLogChunkInfo& gapEnd, TVector<TChunkIdx> chunksToRelease, + TAtomicBase reqIdx) : TRequestBase(TActorId(), TReqId(TReqId::ReleaseChunks, reqIdx), OwnerSystem, 0, NPriInternal::Other) - , GapStart(gapStart) - , GapEnd(gapEnd) - , ChunksToRelease(std::move(chunksToRelease)) - , IsChunksFromLogSplice(true) + , GapStart(gapStart) + , GapEnd(gapEnd) + , ChunksToRelease(std::move(chunksToRelease)) + , IsChunksFromLogSplice(true) {} - TReleaseChunks(TVector<TChunkIdx> chunksToRelease, TAtomicBase reqIdx) + TReleaseChunks(TVector<TChunkIdx> chunksToRelease, TAtomicBase reqIdx) : TRequestBase(TActorId(), TReqId(TReqId::ReleaseChunks, reqIdx), OwnerSystem, 0, NPriInternal::Other) - , ChunksToRelease(std::move(chunksToRelease)) - , IsChunksFromLogSplice(false) + , ChunksToRelease(std::move(chunksToRelease)) + , IsChunksFromLogSplice(false) {} - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestReleaseChunks; } }; @@ -859,19 +859,19 @@ public: TOwner OwnerId; TOwnerRound OwnerRound; ui64 Lsn; - TVector<TChunkIdx> CommitedChunks; + TVector<TChunkIdx> CommitedChunks; TVector<TChunkIdx> DeletedChunks; - TLogCommitDone(const TLogWrite& reqLog, TAtomicBase reqIdx) - : TRequestBase({}, TReqId(TReqId::LogCommitDone, reqIdx), OwnerSystem, 0, NPriInternal::Other) - , OwnerId(reqLog.Owner) - , OwnerRound(reqLog.OwnerRound) - , Lsn(reqLog.Lsn) - , CommitedChunks(std::move(reqLog.CommitRecord.CommitChunks)) - , DeletedChunks(std::move(reqLog.CommitRecord.DeleteChunks)) - {} + TLogCommitDone(const TLogWrite& reqLog, TAtomicBase reqIdx) + : TRequestBase({}, TReqId(TReqId::LogCommitDone, reqIdx), OwnerSystem, 0, NPriInternal::Other) + , OwnerId(reqLog.Owner) + , OwnerRound(reqLog.OwnerRound) + , Lsn(reqLog.Lsn) + , CommitedChunks(std::move(reqLog.CommitRecord.CommitChunks)) + , DeletedChunks(std::move(reqLog.CommitRecord.DeleteChunks)) + {} - ERequestType GetType() const override { + ERequestType GetType() const override { return ERequestType::RequestLogCommitDone; } }; @@ -883,27 +883,27 @@ class TTryTrimChunk : public TRequestBase { public: ui64 TrimSize; - TTryTrimChunk(ui64 trimSize, TAtomicBase reqIdx) - : TRequestBase(TActorId(), TReqId(TReqId::TryTrimChunk, reqIdx), OwnerSystem, 0, NPriInternal::Other) + TTryTrimChunk(ui64 trimSize, TAtomicBase reqIdx) + : TRequestBase(TActorId(), TReqId(TReqId::TryTrimChunk, reqIdx), OwnerSystem, 0, NPriInternal::Other) , TrimSize(trimSize) - {} - - ERequestType GetType() const override { + {} + + ERequestType GetType() const override { return ERequestType::RequestTryTrimChunk; } }; -class TStopDevice : public TRequestBase { -public: - TStopDevice(TAtomicBase reqIdx) - : TRequestBase(TActorId(), TReqId(TReqId::StopDevice, reqIdx), OwnerSystem, 0, NPriInternal::Other) - {} - - ERequestType GetType() const override { - return ERequestType::RequestStopDevice; - } -}; - +class TStopDevice : public TRequestBase { +public: + TStopDevice(TAtomicBase reqIdx) + : TRequestBase(TActorId(), TReqId(TReqId::StopDevice, reqIdx), OwnerSystem, 0, NPriInternal::Other) + {} + + ERequestType GetType() const override { + return ERequestType::RequestStopDevice; + } +}; + } // NPDisk } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_restore_ut.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_restore_ut.cpp index 9918030c2a..2e0cc9c9a7 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_restore_ut.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_restore_ut.cpp @@ -25,7 +25,7 @@ Y_UNIT_TEST_SUITE(TYardTestRestore) { VERBOSE_COUT("TestRestore15 i=" << i); DestroySectors(&tc, dataAfter, dataSize, i, 15); - Run<TTestCheckLog>(&tc, 1, chunkSize, false, true); + Run<TTestCheckLog>(&tc, 1, chunkSize, false, true); //Can't use resutlts for the next test because we don't wait for the restoration before shutting down. } } diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_sectorrestorator.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_sectorrestorator.cpp index 4a1d5619b4..11e1126134 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_sectorrestorator.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_sectorrestorator.cpp @@ -1,189 +1,189 @@ -#include "blobstorage_pdisk_crypto.h" -#include "blobstorage_pdisk_internal_interface.h" -#include "blobstorage_pdisk_sectorrestorator.h" - -namespace NKikimr { -namespace NPDisk { - -TSectorRestorator::TSectorRestorator(const bool isTrippleCopy, const ui32 erasureDataParts, - const bool isErasureEncode, const TDiskFormat &format, +#include "blobstorage_pdisk_crypto.h" +#include "blobstorage_pdisk_internal_interface.h" +#include "blobstorage_pdisk_sectorrestorator.h" + +namespace NKikimr { +namespace NPDisk { + +TSectorRestorator::TSectorRestorator(const bool isTrippleCopy, const ui32 erasureDataParts, + const bool isErasureEncode, const TDiskFormat &format, TActorSystem *actorSystem, const TActorId pDiskActorId, ui32 pDiskId, TPDiskMon *mon, - TBufferPool *bufferPool) - : IsTrippleCopy(isTrippleCopy) - , ErasureDataParts(erasureDataParts) - , LastGoodIdx((ui32)-1) - , LastBadIdx((ui32)-1) - , GoodSectorFlags(0) - , GoodSectorCount(0) - , RestoredSectorFlags(0) - , Format(format) - , ActorSystem(actorSystem) - , PDiskActorId(pDiskActorId) - , PDiskId(pDiskId) - , IsErasureEncode(isErasureEncode) - , Mon(mon) - , BufferPool(bufferPool) -{} - -TSectorRestorator::TSectorRestorator(const bool isTrippleCopy, const ui32 erasureDataParts, - const bool isErasureEncode, const TDiskFormat &format) - : TSectorRestorator(isTrippleCopy, erasureDataParts, isErasureEncode, format, nullptr, {}, 0, nullptr, - nullptr) -{} - -void TSectorRestorator::Restore(ui8 *source, const ui64 offset, const ui64 magic, const ui64 lastNonce, - const bool useT1ha0Hash) { - ui32 sectorCount = IsErasureEncode ? (IsTrippleCopy ? ReplicationFactor : (ErasureDataParts + 1)) : 1; - ui64 maxNonce = 0; - TPDiskHashCalculator hasher(useT1ha0Hash); - for (ui32 i = 0; i < sectorCount; ++i) { - TDataSectorFooter *sectorFooter = (TDataSectorFooter*) - (source + (i + 1) * Format.SectorSize - sizeof(TDataSectorFooter)); - TParitySectorFooter *paritySectorFooter = (TParitySectorFooter*) - (source + (i + 1) * Format.SectorSize - sizeof(TParitySectorFooter)); - - ui64 sectorOffset = offset + (IsTrippleCopy ? 0 : (ui64)i * (ui64)Format.SectorSize); - ui8 *sectorData = source + i * Format.SectorSize; - bool isCrcOk = hasher.CheckSectorHash(sectorOffset, magic, sectorData, Format.SectorSize, sectorFooter->Hash); - if (!isCrcOk) { - if (ActorSystem) { - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId << " Bad hash." - << " IsErasureEncode# " << (ui32)IsErasureEncode - << " ErasureDataParts# " << (ui32)ErasureDataParts << " i# " << (ui32)i - << " readHash# " << (ui64)sectorFooter->Hash - << " calculatedOldHash# " << hasher.OldHashSector(sectorOffset, magic, sectorData, Format.SectorSize) - << " calculatedT1ha0NoAvxHash# " - << hasher.T1ha0HashSector<TT1ha0NoAvxHasher>(sectorOffset, magic, sectorData, Format.SectorSize) - << " sectorOffset# " << sectorOffset - << " chunkIdx# " << (sectorOffset / (ui64)Format.ChunkSize) - << " sectorIdx# " << ((sectorOffset % (ui64)Format.ChunkSize) / (ui64)Format.SectorSize)); - } - LastBadIdx = i; - } else if (IsTrippleCopy) { - ui64 nonce = sectorFooter->Nonce; - // One with the greatest Nonce is the correct sector - if (nonce > maxNonce) { - maxNonce = nonce; - LastGoodIdx = i; - GoodSectorFlags = (1u << i); - GoodSectorCount = 1; - } else if (nonce == maxNonce) { - LastGoodIdx = i; - GoodSectorFlags |= (1 << i); - ++GoodSectorCount; - } - } else { - ui64 sectorFooterNonce = i < ErasureDataParts ? sectorFooter->Nonce : paritySectorFooter->Nonce; - if (sectorFooterNonce <= lastNonce || sectorFooterNonce <= maxNonce) { - if (ActorSystem) { - LOG_WARN_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << " Sector nonce reordering." - << " IsErasureEncode# " << (ui32)IsErasureEncode - << " ErasureDataParts# " << (ui32)ErasureDataParts - << " i# " << (ui32)i << " readNonce# " << (ui64)sectorFooterNonce - << " lastNonce# " << (ui64)lastNonce << " maxNonce# " << (ui64)maxNonce - << " sectorOffset# " << sectorOffset); - } - // Consider decreasing nonces to be a sign of write reordering, restore sectors - LastBadIdx = i; - } else { - maxNonce = sectorFooterNonce; - LastGoodIdx = i; - GoodSectorFlags |= (1 << i); - ++GoodSectorCount; - } - - } - } - - if (IsErasureEncode) { - if (!IsTrippleCopy && GoodSectorCount == ErasureDataParts) { - if (ActorSystem) { - LOG_WARN_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << " Restoring sector. ErasureDataParts# " << (ui32)ErasureDataParts - << " LastBadIdx# " << (ui32)LastBadIdx - << " sectorOffset# " << (ui64)(offset + (ui64)LastBadIdx * (ui64)Format.SectorSize)); - } - for (ui32 i = 0; i < Format.SectorSize / sizeof(ui64) - 1; ++i) { - ui64 restored = 0; - for (ui32 a = 0; a < LastBadIdx; ++a) { - restored ^= ((ui64*)(source + a * Format.SectorSize))[i]; - } - for (ui32 a = LastBadIdx + 1; a <= ErasureDataParts ; ++a) { - restored ^= ((ui64*)(source + a * Format.SectorSize))[i]; - } - ((ui64*)(source + LastBadIdx * Format.SectorSize))[i] = restored; - } - ui8 *sectorData = source + LastBadIdx * Format.SectorSize; - ui64 sectorOffset = offset + (ui64(LastBadIdx) * ui64(Format.SectorSize)); - if (LastBadIdx == ErasureDataParts) { - // restoring parity sector - TParitySectorFooter *sectorFooter = (TParitySectorFooter*) - (sectorData + Format.SectorSize - sizeof(TParitySectorFooter)); - TDataSectorFooter *goodDataFooter = (TDataSectorFooter*) - (source + (ErasureDataParts) * Format.SectorSize - sizeof(TDataSectorFooter)); - sectorFooter->Nonce = goodDataFooter->Nonce + 1; - sectorFooter->Hash = hasher.HashSector(sectorOffset, magic, sectorData, Format.SectorSize); - } else { - // restoring data sector - TDataSectorFooter *sectorFooter = (TDataSectorFooter*) - (sectorData + Format.SectorSize - sizeof(TDataSectorFooter)); - // TODO: restore the correct Version value - sectorFooter->Version = PDISK_DATA_VERSION; - sectorFooter->Hash = hasher.HashSector(sectorOffset, magic, sectorData, Format.SectorSize); - // Increment here because we don't want to count initialy not written parts - *Mon->DeviceErasureSectorRestorations += 1; - } - GoodSectorFlags |= (1 << LastBadIdx); - ++GoodSectorCount; - RestoredSectorFlags |= (1 << LastBadIdx); - WriteSector(sectorData, sectorOffset); - } else if (IsTrippleCopy && GoodSectorCount > 0 && GoodSectorCount < ReplicationFactor) { - ui32 lastGoodSector = 0; - for (i32 i = ReplicationFactor - 1; i >= 0; --i) { - if (GoodSectorFlags & (1 << i)) { - lastGoodSector = i; - break; - } - } - ui32 mask = (1 << lastGoodSector) - 1; - if ((GoodSectorFlags & mask) != mask) { - *Mon->DeviceErasureSectorRestorations += 1; - } - for (ui32 i = 0; i < ReplicationFactor; ++i) { - if (!(GoodSectorFlags & (1 << i))) { - ui8 *badSector = source + i * Format.SectorSize; - ui64 sectorOffset = offset + (ui64)(i * Format.SectorSize); - ui8 *goodSector = source + LastGoodIdx * Format.SectorSize; - - if (ActorSystem) { - LOG_WARN_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId - << " Restoring trippleCopy sector i# " << (ui32)i - << " GoodSectorCount# " << (ui32)GoodSectorCount - << " ReplicationFactor# " << (ui32)ReplicationFactor - << " sectorOffset# " << (ui64)sectorOffset); - } - // Y_FAIL("RESTORE"); - memcpy(badSector, goodSector, size_t(Format.SectorSize)); - GoodSectorFlags |= (1 << i); - ++GoodSectorCount; - RestoredSectorFlags |= (1 << i); - WriteSector(badSector, sectorOffset); - } - } - } - } -} - -void TSectorRestorator::WriteSector(ui8 *sectorData, ui64 writeOffset) { - if (ActorSystem && BufferPool) { - TBuffer *buffer = BufferPool->Pop(); - Y_VERIFY(Format.SectorSize <= buffer->Size()); - memcpy(buffer->Data(), sectorData, (size_t)Format.SectorSize); - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(buffer->Data(), Format.SectorSize); - ActorSystem->Send(PDiskActorId, new TEvLogSectorRestore(buffer->Data(), Format.SectorSize, writeOffset, buffer)); - } -} - -} // NPDisk -} // NKikimr + TBufferPool *bufferPool) + : IsTrippleCopy(isTrippleCopy) + , ErasureDataParts(erasureDataParts) + , LastGoodIdx((ui32)-1) + , LastBadIdx((ui32)-1) + , GoodSectorFlags(0) + , GoodSectorCount(0) + , RestoredSectorFlags(0) + , Format(format) + , ActorSystem(actorSystem) + , PDiskActorId(pDiskActorId) + , PDiskId(pDiskId) + , IsErasureEncode(isErasureEncode) + , Mon(mon) + , BufferPool(bufferPool) +{} + +TSectorRestorator::TSectorRestorator(const bool isTrippleCopy, const ui32 erasureDataParts, + const bool isErasureEncode, const TDiskFormat &format) + : TSectorRestorator(isTrippleCopy, erasureDataParts, isErasureEncode, format, nullptr, {}, 0, nullptr, + nullptr) +{} + +void TSectorRestorator::Restore(ui8 *source, const ui64 offset, const ui64 magic, const ui64 lastNonce, + const bool useT1ha0Hash) { + ui32 sectorCount = IsErasureEncode ? (IsTrippleCopy ? ReplicationFactor : (ErasureDataParts + 1)) : 1; + ui64 maxNonce = 0; + TPDiskHashCalculator hasher(useT1ha0Hash); + for (ui32 i = 0; i < sectorCount; ++i) { + TDataSectorFooter *sectorFooter = (TDataSectorFooter*) + (source + (i + 1) * Format.SectorSize - sizeof(TDataSectorFooter)); + TParitySectorFooter *paritySectorFooter = (TParitySectorFooter*) + (source + (i + 1) * Format.SectorSize - sizeof(TParitySectorFooter)); + + ui64 sectorOffset = offset + (IsTrippleCopy ? 0 : (ui64)i * (ui64)Format.SectorSize); + ui8 *sectorData = source + i * Format.SectorSize; + bool isCrcOk = hasher.CheckSectorHash(sectorOffset, magic, sectorData, Format.SectorSize, sectorFooter->Hash); + if (!isCrcOk) { + if (ActorSystem) { + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId << " Bad hash." + << " IsErasureEncode# " << (ui32)IsErasureEncode + << " ErasureDataParts# " << (ui32)ErasureDataParts << " i# " << (ui32)i + << " readHash# " << (ui64)sectorFooter->Hash + << " calculatedOldHash# " << hasher.OldHashSector(sectorOffset, magic, sectorData, Format.SectorSize) + << " calculatedT1ha0NoAvxHash# " + << hasher.T1ha0HashSector<TT1ha0NoAvxHasher>(sectorOffset, magic, sectorData, Format.SectorSize) + << " sectorOffset# " << sectorOffset + << " chunkIdx# " << (sectorOffset / (ui64)Format.ChunkSize) + << " sectorIdx# " << ((sectorOffset % (ui64)Format.ChunkSize) / (ui64)Format.SectorSize)); + } + LastBadIdx = i; + } else if (IsTrippleCopy) { + ui64 nonce = sectorFooter->Nonce; + // One with the greatest Nonce is the correct sector + if (nonce > maxNonce) { + maxNonce = nonce; + LastGoodIdx = i; + GoodSectorFlags = (1u << i); + GoodSectorCount = 1; + } else if (nonce == maxNonce) { + LastGoodIdx = i; + GoodSectorFlags |= (1 << i); + ++GoodSectorCount; + } + } else { + ui64 sectorFooterNonce = i < ErasureDataParts ? sectorFooter->Nonce : paritySectorFooter->Nonce; + if (sectorFooterNonce <= lastNonce || sectorFooterNonce <= maxNonce) { + if (ActorSystem) { + LOG_WARN_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << " Sector nonce reordering." + << " IsErasureEncode# " << (ui32)IsErasureEncode + << " ErasureDataParts# " << (ui32)ErasureDataParts + << " i# " << (ui32)i << " readNonce# " << (ui64)sectorFooterNonce + << " lastNonce# " << (ui64)lastNonce << " maxNonce# " << (ui64)maxNonce + << " sectorOffset# " << sectorOffset); + } + // Consider decreasing nonces to be a sign of write reordering, restore sectors + LastBadIdx = i; + } else { + maxNonce = sectorFooterNonce; + LastGoodIdx = i; + GoodSectorFlags |= (1 << i); + ++GoodSectorCount; + } + + } + } + + if (IsErasureEncode) { + if (!IsTrippleCopy && GoodSectorCount == ErasureDataParts) { + if (ActorSystem) { + LOG_WARN_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << " Restoring sector. ErasureDataParts# " << (ui32)ErasureDataParts + << " LastBadIdx# " << (ui32)LastBadIdx + << " sectorOffset# " << (ui64)(offset + (ui64)LastBadIdx * (ui64)Format.SectorSize)); + } + for (ui32 i = 0; i < Format.SectorSize / sizeof(ui64) - 1; ++i) { + ui64 restored = 0; + for (ui32 a = 0; a < LastBadIdx; ++a) { + restored ^= ((ui64*)(source + a * Format.SectorSize))[i]; + } + for (ui32 a = LastBadIdx + 1; a <= ErasureDataParts ; ++a) { + restored ^= ((ui64*)(source + a * Format.SectorSize))[i]; + } + ((ui64*)(source + LastBadIdx * Format.SectorSize))[i] = restored; + } + ui8 *sectorData = source + LastBadIdx * Format.SectorSize; + ui64 sectorOffset = offset + (ui64(LastBadIdx) * ui64(Format.SectorSize)); + if (LastBadIdx == ErasureDataParts) { + // restoring parity sector + TParitySectorFooter *sectorFooter = (TParitySectorFooter*) + (sectorData + Format.SectorSize - sizeof(TParitySectorFooter)); + TDataSectorFooter *goodDataFooter = (TDataSectorFooter*) + (source + (ErasureDataParts) * Format.SectorSize - sizeof(TDataSectorFooter)); + sectorFooter->Nonce = goodDataFooter->Nonce + 1; + sectorFooter->Hash = hasher.HashSector(sectorOffset, magic, sectorData, Format.SectorSize); + } else { + // restoring data sector + TDataSectorFooter *sectorFooter = (TDataSectorFooter*) + (sectorData + Format.SectorSize - sizeof(TDataSectorFooter)); + // TODO: restore the correct Version value + sectorFooter->Version = PDISK_DATA_VERSION; + sectorFooter->Hash = hasher.HashSector(sectorOffset, magic, sectorData, Format.SectorSize); + // Increment here because we don't want to count initialy not written parts + *Mon->DeviceErasureSectorRestorations += 1; + } + GoodSectorFlags |= (1 << LastBadIdx); + ++GoodSectorCount; + RestoredSectorFlags |= (1 << LastBadIdx); + WriteSector(sectorData, sectorOffset); + } else if (IsTrippleCopy && GoodSectorCount > 0 && GoodSectorCount < ReplicationFactor) { + ui32 lastGoodSector = 0; + for (i32 i = ReplicationFactor - 1; i >= 0; --i) { + if (GoodSectorFlags & (1 << i)) { + lastGoodSector = i; + break; + } + } + ui32 mask = (1 << lastGoodSector) - 1; + if ((GoodSectorFlags & mask) != mask) { + *Mon->DeviceErasureSectorRestorations += 1; + } + for (ui32 i = 0; i < ReplicationFactor; ++i) { + if (!(GoodSectorFlags & (1 << i))) { + ui8 *badSector = source + i * Format.SectorSize; + ui64 sectorOffset = offset + (ui64)(i * Format.SectorSize); + ui8 *goodSector = source + LastGoodIdx * Format.SectorSize; + + if (ActorSystem) { + LOG_WARN_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId + << " Restoring trippleCopy sector i# " << (ui32)i + << " GoodSectorCount# " << (ui32)GoodSectorCount + << " ReplicationFactor# " << (ui32)ReplicationFactor + << " sectorOffset# " << (ui64)sectorOffset); + } + // Y_FAIL("RESTORE"); + memcpy(badSector, goodSector, size_t(Format.SectorSize)); + GoodSectorFlags |= (1 << i); + ++GoodSectorCount; + RestoredSectorFlags |= (1 << i); + WriteSector(badSector, sectorOffset); + } + } + } + } +} + +void TSectorRestorator::WriteSector(ui8 *sectorData, ui64 writeOffset) { + if (ActorSystem && BufferPool) { + TBuffer *buffer = BufferPool->Pop(); + Y_VERIFY(Format.SectorSize <= buffer->Size()); + memcpy(buffer->Data(), sectorData, (size_t)Format.SectorSize); + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(buffer->Data(), Format.SectorSize); + ActorSystem->Send(PDiskActorId, new TEvLogSectorRestore(buffer->Data(), Format.SectorSize, writeOffset, buffer)); + } +} + +} // NPDisk +} // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_sectorrestorator.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_sectorrestorator.h index 6d21ce8873..e0936e8bc3 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_sectorrestorator.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_sectorrestorator.h @@ -1,43 +1,43 @@ -#pragma once - -#include "blobstorage_pdisk_blockdevice.h" +#pragma once + +#include "blobstorage_pdisk_blockdevice.h" #include <ydb/library/pdisk_io/buffers.h> -#include "blobstorage_pdisk_data.h" -#include "blobstorage_pdisk_mon.h" -#include "defs.h" - -namespace NKikimr { -namespace NPDisk { - -struct TSectorRestorator { - const bool IsTrippleCopy; - const ui32 ErasureDataParts; - ui32 LastGoodIdx; - ui32 LastBadIdx; - ui32 GoodSectorFlags; - ui32 GoodSectorCount; - ui32 RestoredSectorFlags; - const TDiskFormat &Format; - TActorSystem *ActorSystem; +#include "blobstorage_pdisk_data.h" +#include "blobstorage_pdisk_mon.h" +#include "defs.h" + +namespace NKikimr { +namespace NPDisk { + +struct TSectorRestorator { + const bool IsTrippleCopy; + const ui32 ErasureDataParts; + ui32 LastGoodIdx; + ui32 LastBadIdx; + ui32 GoodSectorFlags; + ui32 GoodSectorCount; + ui32 RestoredSectorFlags; + const TDiskFormat &Format; + TActorSystem *ActorSystem; const TActorId PDiskActorId; - ui32 PDiskId; - bool IsErasureEncode; - TPDiskMon *Mon; - TBufferPool *BufferPool; - - TSectorRestorator(const bool isTrippleCopy, const ui32 erasureDataParts, - const bool isErasureEncode, const TDiskFormat &format, + ui32 PDiskId; + bool IsErasureEncode; + TPDiskMon *Mon; + TBufferPool *BufferPool; + + TSectorRestorator(const bool isTrippleCopy, const ui32 erasureDataParts, + const bool isErasureEncode, const TDiskFormat &format, TActorSystem *actorSystem, const TActorId pDiskActorId, ui32 pDiskId, TPDiskMon *mon, - TBufferPool *bufferPool); - - TSectorRestorator(const bool isTrippleCopy, const ui32 erasureDataParts, - const bool isErasureEncode, const TDiskFormat &format); - - - void Restore(ui8 *source, const ui64 offset, const ui64 magic, const ui64 lastNonce, const bool useT1ha0Hash); - - void WriteSector(ui8 *sectorData, ui64 writeOffset); -}; - -} // NPDisk -} // NKikimr + TBufferPool *bufferPool); + + TSectorRestorator(const bool isTrippleCopy, const ui32 erasureDataParts, + const bool isErasureEncode, const TDiskFormat &format); + + + void Restore(ui8 *source, const ui64 offset, const ui64 magic, const ui64 lastNonce, const bool useT1ha0Hash); + + void WriteSector(ui8 *sectorData, ui64 writeOffset); +}; + +} // NPDisk +} // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_signature.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_signature.h index 0349d3f342..6f8e8fb20b 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_signature.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_signature.h @@ -1,97 +1,97 @@ -#pragma once - -#include "defs.h" - -namespace NKikimr { - -class TLogSignature { - static const ui8 ChunkCommitMask = 0x80; - - ui8 Signature; - -public: - enum E : ui8 { - First = 0, - SignatureLogoBlob = 1, // deprecated - SignatureBlock = 2, - SignatureGC = 3, - SignatureSyncLogIdx = 4, - SignatureHullLogoBlobsDB = 5, - SignatureHullBlocksDB = 6, - SignatureHullBarriersDB = 7, - SignatureHullCutLog = 8, - SignatureLocalSyncData = 9, - SignatureSyncerState = 10, - SignatureHandoffDelLogoBlob = 11, - SignatureHugeBlobAllocChunk = 12, - SignatureHugeBlobFreeChunk = 13, - SignatureHugeBlobEntryPoint = 14, - SignatureHugeLogoBlob = 15, - SignatureLogoBlobOpt = 16, // optimized LogoBlob record - SignaturePhantomBlobs = 17, - SignatureIncrHugeChunks = 18, - SignatureIncrHugeDeletes = 19, - SignatureAnubisOsirisPut = 20, - SignatureAddBulkSst = 21, +#pragma once + +#include "defs.h" + +namespace NKikimr { + +class TLogSignature { + static const ui8 ChunkCommitMask = 0x80; + + ui8 Signature; + +public: + enum E : ui8 { + First = 0, + SignatureLogoBlob = 1, // deprecated + SignatureBlock = 2, + SignatureGC = 3, + SignatureSyncLogIdx = 4, + SignatureHullLogoBlobsDB = 5, + SignatureHullBlocksDB = 6, + SignatureHullBarriersDB = 7, + SignatureHullCutLog = 8, + SignatureLocalSyncData = 9, + SignatureSyncerState = 10, + SignatureHandoffDelLogoBlob = 11, + SignatureHugeBlobAllocChunk = 12, + SignatureHugeBlobFreeChunk = 13, + SignatureHugeBlobEntryPoint = 14, + SignatureHugeLogoBlob = 15, + SignatureLogoBlobOpt = 16, // optimized LogoBlob record + SignaturePhantomBlobs = 17, + SignatureIncrHugeChunks = 18, + SignatureIncrHugeDeletes = 19, + SignatureAnubisOsirisPut = 20, + SignatureAddBulkSst = 21, SignatureScrub = 22, Max = 23 - }; - - TLogSignature(ui8 val = 0, bool hasCommit = false) - : Signature(val) - { - if (hasCommit) { - SetCommitRecord(); - } - } - - operator ui8() const { - return Signature; - } - - void SetCommitRecord() { - Signature |= ChunkCommitMask; - } - - bool HasCommitRecord() const { - return Signature & ChunkCommitMask; - } - - ui32 GetUnmasked() const { - return Signature & ~ChunkCommitMask; - } - - TString ToString() const { - switch(Signature) { - case First: return "First"; - case SignatureLogoBlob: return "LogoBlob"; - case SignatureBlock: return "Block"; - case SignatureGC: return "GC"; - case SignatureSyncLogIdx: return "SyncLogIdx"; - case SignatureHullLogoBlobsDB: return "HullLogoBlobsDB"; - case SignatureHullBlocksDB: return "HullBlocksDB"; - case SignatureHullBarriersDB: return "HullBarriersDB"; - case SignatureHullCutLog: return "HullCutLog"; - case SignatureLocalSyncData: return "LocalSyncData"; - case SignatureSyncerState: return "SyncerState"; - case SignatureHandoffDelLogoBlob: return "HandoffDelLogoBlob"; - case SignatureHugeBlobAllocChunk: return "HugeBlobAllocChunk"; - case SignatureHugeBlobFreeChunk: return "HugeBlobFreeChunk"; - case SignatureHugeBlobEntryPoint: return "HugeBlobEntryPoint"; - case SignatureHugeLogoBlob: return "SignatureHugeLogoBlob"; - case SignatureLogoBlobOpt: return "SignatureLogoBlobOpt"; - case SignaturePhantomBlobs: return "PhantomBlobs"; - case SignatureIncrHugeChunks: return "IncrHugeChunks"; - case SignatureIncrHugeDeletes: return "IncrHugeDeletes"; - case SignatureAnubisOsirisPut: return "SignatureAnubisOsirisPut"; - case SignatureAddBulkSst: return "SignatureAddBulkSst"; + }; + + TLogSignature(ui8 val = 0, bool hasCommit = false) + : Signature(val) + { + if (hasCommit) { + SetCommitRecord(); + } + } + + operator ui8() const { + return Signature; + } + + void SetCommitRecord() { + Signature |= ChunkCommitMask; + } + + bool HasCommitRecord() const { + return Signature & ChunkCommitMask; + } + + ui32 GetUnmasked() const { + return Signature & ~ChunkCommitMask; + } + + TString ToString() const { + switch(Signature) { + case First: return "First"; + case SignatureLogoBlob: return "LogoBlob"; + case SignatureBlock: return "Block"; + case SignatureGC: return "GC"; + case SignatureSyncLogIdx: return "SyncLogIdx"; + case SignatureHullLogoBlobsDB: return "HullLogoBlobsDB"; + case SignatureHullBlocksDB: return "HullBlocksDB"; + case SignatureHullBarriersDB: return "HullBarriersDB"; + case SignatureHullCutLog: return "HullCutLog"; + case SignatureLocalSyncData: return "LocalSyncData"; + case SignatureSyncerState: return "SyncerState"; + case SignatureHandoffDelLogoBlob: return "HandoffDelLogoBlob"; + case SignatureHugeBlobAllocChunk: return "HugeBlobAllocChunk"; + case SignatureHugeBlobFreeChunk: return "HugeBlobFreeChunk"; + case SignatureHugeBlobEntryPoint: return "HugeBlobEntryPoint"; + case SignatureHugeLogoBlob: return "SignatureHugeLogoBlob"; + case SignatureLogoBlobOpt: return "SignatureLogoBlobOpt"; + case SignaturePhantomBlobs: return "PhantomBlobs"; + case SignatureIncrHugeChunks: return "IncrHugeChunks"; + case SignatureIncrHugeDeletes: return "IncrHugeDeletes"; + case SignatureAnubisOsirisPut: return "SignatureAnubisOsirisPut"; + case SignatureAddBulkSst: return "SignatureAddBulkSst"; case SignatureScrub: return "SignatureScrub"; - case Max: return "Max"; - } - return TStringBuilder() << "Unknown(" << static_cast<ui32>(Signature) << "("; - } -}; - -static_assert(sizeof(TLogSignature) == 1, "for compatibility"); - -} + case Max: return "Max"; + } + return TStringBuilder() << "Unknown(" << static_cast<ui32>(Signature) << "("; + } +}; + +static_assert(sizeof(TLogSignature) == 1, "for compatibility"); + +} diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h index c2ef2d51a6..3791397b91 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h @@ -1,11 +1,11 @@ #pragma once #include "defs.h" - + #include "blobstorage_pdisk.h" -#include "blobstorage_pdisk_logreader_base.h" +#include "blobstorage_pdisk_logreader_base.h" #include <ydb/core/util/metrics.h> - + namespace NKikimr { namespace NPDisk { @@ -13,156 +13,156 @@ namespace NPDisk { // PDisk In-memory structures //////////////////////////////////////////////////////////////////////////// -enum class EInitPhase { - Uninitialized, - ReadingSysLog, - ReadingLog, - Initialized, -}; - -enum EOwner { - OwnerSystem = 0, // Chunk0, SysLog chunks and CommonLog + just common log tracking, mens "for dynamic" in requests - OwnerUnallocated = 1, // Unallocated chunks, Trim scheduling, Slay commands - OwnerBeginUser = 2, - OwnerEndUser = 241, - OwnerSystemLog = 251, // Not used to actually mark chunks, just for space tracking - OwnerSystemReserve = 252, // Not used to actually mark chunks, just for space tracking, means "for static" in requests - OwnerCommonStaticLog = 253, // Not used to actually mark chunks, just for space tracking - OwnerUnallocatedTrimmed = 254, // Because of forward compatibility may not be written to disk - OwnerLocked = 255, - OwnerCount = 256 -}; - -inline bool IsOwnerAllocated(TOwner owner) { - return owner != OwnerUnallocated && owner != OwnerUnallocatedTrimmed; -} - -inline bool IsOwnerUser(TOwner owner) { - return OwnerBeginUser <= owner && owner < OwnerEndUser; -} - -struct TOwnerInflight : TThrRefBase { - std::atomic<i64> ChunkWrites = 0; - std::atomic<i64> ChunkReads = 0; - std::atomic<i64> LogWrites = 0; -}; - -struct TOwnerData { - enum EVDiskStatus { - VDISK_STATUS_DEFAULT = 0, - VDISK_STATUS_HASNT_COME = 1, - VDISK_STATUS_SENT_INIT = 2, - VDISK_STATUS_READING_LOG = 3, - VDISK_STATUS_LOGGED = 4, - }; - TMap<TLogSignature, NPDisk::TLogRecord> StartingPoints; - TVDiskID VDiskId = TVDiskID::InvalidId; - EVDiskStatus Status = VDISK_STATUS_DEFAULT; - ui64 CurrentFirstLsnToKeep = 0; - ui64 LastWrittenCommitLsn = 0; - TActorId CutLogId; - TActorId WhiteboardProxyId; - ui64 LogRecordsInitiallyRead = 0; - ui64 LogRecordsConsequentlyRead = 0; - TOwnerRound OwnerRound = 0; - TInstant AskedToCutLogAt; - TInstant CutLogAt; - ui64 LastSeenLsn = 0; - bool HasAlreadyLoggedThisIncarnation = false; - bool HasReadTheWholeLog = false; - TLogPosition LogStartPosition{0, 0}; - NMetrics::TDecayingAverageValue<ui64, NMetrics::DurationPerMinute, NMetrics::DurationPerSecond> ReadThroughput; - NMetrics::TDecayingAverageValue<ui64, NMetrics::DurationPerMinute, NMetrics::DurationPerSecond> WriteThroughput; +enum class EInitPhase { + Uninitialized, + ReadingSysLog, + ReadingLog, + Initialized, +}; + +enum EOwner { + OwnerSystem = 0, // Chunk0, SysLog chunks and CommonLog + just common log tracking, mens "for dynamic" in requests + OwnerUnallocated = 1, // Unallocated chunks, Trim scheduling, Slay commands + OwnerBeginUser = 2, + OwnerEndUser = 241, + OwnerSystemLog = 251, // Not used to actually mark chunks, just for space tracking + OwnerSystemReserve = 252, // Not used to actually mark chunks, just for space tracking, means "for static" in requests + OwnerCommonStaticLog = 253, // Not used to actually mark chunks, just for space tracking + OwnerUnallocatedTrimmed = 254, // Because of forward compatibility may not be written to disk + OwnerLocked = 255, + OwnerCount = 256 +}; + +inline bool IsOwnerAllocated(TOwner owner) { + return owner != OwnerUnallocated && owner != OwnerUnallocatedTrimmed; +} + +inline bool IsOwnerUser(TOwner owner) { + return OwnerBeginUser <= owner && owner < OwnerEndUser; +} + +struct TOwnerInflight : TThrRefBase { + std::atomic<i64> ChunkWrites = 0; + std::atomic<i64> ChunkReads = 0; + std::atomic<i64> LogWrites = 0; +}; + +struct TOwnerData { + enum EVDiskStatus { + VDISK_STATUS_DEFAULT = 0, + VDISK_STATUS_HASNT_COME = 1, + VDISK_STATUS_SENT_INIT = 2, + VDISK_STATUS_READING_LOG = 3, + VDISK_STATUS_LOGGED = 4, + }; + TMap<TLogSignature, NPDisk::TLogRecord> StartingPoints; + TVDiskID VDiskId = TVDiskID::InvalidId; + EVDiskStatus Status = VDISK_STATUS_DEFAULT; + ui64 CurrentFirstLsnToKeep = 0; + ui64 LastWrittenCommitLsn = 0; + TActorId CutLogId; + TActorId WhiteboardProxyId; + ui64 LogRecordsInitiallyRead = 0; + ui64 LogRecordsConsequentlyRead = 0; + TOwnerRound OwnerRound = 0; + TInstant AskedToCutLogAt; + TInstant CutLogAt; + ui64 LastSeenLsn = 0; + bool HasAlreadyLoggedThisIncarnation = false; + bool HasReadTheWholeLog = false; + TLogPosition LogStartPosition{0, 0}; + NMetrics::TDecayingAverageValue<ui64, NMetrics::DurationPerMinute, NMetrics::DurationPerSecond> ReadThroughput; + NMetrics::TDecayingAverageValue<ui64, NMetrics::DurationPerMinute, NMetrics::DurationPerSecond> WriteThroughput; ui32 VDiskSlotId = 0; - - TIntrusivePtr<TLogReaderBase> LogReader; - TIntrusivePtr<TOwnerInflight> InFlight; - - TOwnerData() - : InFlight(new TOwnerInflight) - {} - - bool IsStaticGroupOwner() const { - if (VDiskId == TVDiskID::InvalidId) { - return false; - } - TGroupID groupId(VDiskId.GroupID); - bool isStatic = (groupId.ConfigurationType() == GroupConfigurationTypeStatic); - return isStatic; - } - - bool IsNextLsnOk(const ui64 lsn) const { - if (lsn == LastSeenLsn + 1) { - // The most common case, linear increment. - return true; - } - if (lsn <= LastSeenLsn) { - // Lsn reversal or duplication. - return false; - } - // Forward jump. - if (HasAlreadyLoggedThisIncarnation) { - return false; - } - return true; - } - - void SetLastSeenLsn(const ui64 lsn) { - LastSeenLsn = lsn; - if (!HasAlreadyLoggedThisIncarnation) { - HasAlreadyLoggedThisIncarnation = true; - } - } - - const char* GetStringStatus() const { - return RenderStatus(Status); - } - - static const char* RenderStatus(const EVDiskStatus status) { - switch(status) { - case VDISK_STATUS_DEFAULT: - return "Error in status, status is unknown"; - case VDISK_STATUS_HASNT_COME: - // VDisk is known, but didn't sent TEvYardInit yet - return "seen before, but not initialized yet"; - case VDISK_STATUS_SENT_INIT: - // VDisk sent TEvYardInit, but didn't sent TEvReadLog - return "alive, but didn't start to read log"; - case VDISK_STATUS_READING_LOG: - // VDisk sent TEvReadLog, but didn't sent TEvLog - return "reading log"; - case VDISK_STATUS_LOGGED: - // VDisk sent TEvLog at least once - return "logged"; - } - return "Unexpected enum value"; - } - - bool HaveRequestsInFlight() const { - return LogReader || InFlight->ChunkWrites || InFlight->ChunkReads || InFlight->LogWrites; - } -}; - + + TIntrusivePtr<TLogReaderBase> LogReader; + TIntrusivePtr<TOwnerInflight> InFlight; + + TOwnerData() + : InFlight(new TOwnerInflight) + {} + + bool IsStaticGroupOwner() const { + if (VDiskId == TVDiskID::InvalidId) { + return false; + } + TGroupID groupId(VDiskId.GroupID); + bool isStatic = (groupId.ConfigurationType() == GroupConfigurationTypeStatic); + return isStatic; + } + + bool IsNextLsnOk(const ui64 lsn) const { + if (lsn == LastSeenLsn + 1) { + // The most common case, linear increment. + return true; + } + if (lsn <= LastSeenLsn) { + // Lsn reversal or duplication. + return false; + } + // Forward jump. + if (HasAlreadyLoggedThisIncarnation) { + return false; + } + return true; + } + + void SetLastSeenLsn(const ui64 lsn) { + LastSeenLsn = lsn; + if (!HasAlreadyLoggedThisIncarnation) { + HasAlreadyLoggedThisIncarnation = true; + } + } + + const char* GetStringStatus() const { + return RenderStatus(Status); + } + + static const char* RenderStatus(const EVDiskStatus status) { + switch(status) { + case VDISK_STATUS_DEFAULT: + return "Error in status, status is unknown"; + case VDISK_STATUS_HASNT_COME: + // VDisk is known, but didn't sent TEvYardInit yet + return "seen before, but not initialized yet"; + case VDISK_STATUS_SENT_INIT: + // VDisk sent TEvYardInit, but didn't sent TEvReadLog + return "alive, but didn't start to read log"; + case VDISK_STATUS_READING_LOG: + // VDisk sent TEvReadLog, but didn't sent TEvLog + return "reading log"; + case VDISK_STATUS_LOGGED: + // VDisk sent TEvLog at least once + return "logged"; + } + return "Unexpected enum value"; + } + + bool HaveRequestsInFlight() const { + return LogReader || InFlight->ChunkWrites || InFlight->ChunkReads || InFlight->LogWrites; + } +}; + struct TChunkState { - enum ECommitState : ui8 { - FREE = 0, - DATA_RESERVED_DELETE_IN_PROGRESS, - DATA_COMMITTED_DELETE_IN_PROGRESS, - DATA_RESERVED, - DATA_COMMITTED, - DATA_ON_QUARANTINE, - DATA_COMMITTED_ON_QUARANTINE, - LOG_RESERVED, - LOG_COMMITTED, - }; - + enum ECommitState : ui8 { + FREE = 0, + DATA_RESERVED_DELETE_IN_PROGRESS, + DATA_COMMITTED_DELETE_IN_PROGRESS, + DATA_RESERVED, + DATA_COMMITTED, + DATA_ON_QUARANTINE, + DATA_COMMITTED_ON_QUARANTINE, + LOG_RESERVED, + LOG_COMMITTED, + }; + ui64 Nonce; ui64 CurrentNonce; ui64 PreviousNonce; - std::atomic<i64> OperationsInProgress; + std::atomic<i64> OperationsInProgress; TOwner OwnerId; - ECommitState CommitState; - ui64 CommitsInProgress; + ECommitState CommitState; + ui64 CommitsInProgress; TChunkState() : Nonce(0) @@ -170,108 +170,108 @@ struct TChunkState { , PreviousNonce(0) , OperationsInProgress(0) , OwnerId(OwnerUnallocated) - , CommitState(FREE) - , CommitsInProgress(0) + , CommitState(FREE) + , CommitsInProgress(0) {} - - bool HasAnyOperationsInProgress() const { - return OperationsInProgress || CommitsInProgress; - } - -#ifdef OUT_VAR -#error "OUT_VAR already defined" -#endif -#define OUT_VAR(x) do { str << #x "# " << x << ", "; } while(false) - TString ToString() const { - TStringStream str; - str << "{ "; - OUT_VAR(Nonce); - OUT_VAR(CurrentNonce); - OUT_VAR(PreviousNonce); - OUT_VAR(OperationsInProgress.load()); - OUT_VAR(OwnerId); - OUT_VAR(CommitState); - OUT_VAR(CommitsInProgress); - str << "}"; - return str.Str(); - } -#undef OUT_VAR + + bool HasAnyOperationsInProgress() const { + return OperationsInProgress || CommitsInProgress; + } + +#ifdef OUT_VAR +#error "OUT_VAR already defined" +#endif +#define OUT_VAR(x) do { str << #x "# " << x << ", "; } while(false) + TString ToString() const { + TStringStream str; + str << "{ "; + OUT_VAR(Nonce); + OUT_VAR(CurrentNonce); + OUT_VAR(PreviousNonce); + OUT_VAR(OperationsInProgress.load()); + OUT_VAR(OwnerId); + OUT_VAR(CommitState); + OUT_VAR(CommitsInProgress); + str << "}"; + return str.Str(); + } +#undef OUT_VAR +}; + +struct TLogChunkInfo { + struct TLsnRange { + ui64 FirstLsn; + ui64 LastLsn; + bool IsPresent; + + TLsnRange() + : FirstLsn(0xffffffffffffffffull) + , LastLsn(0) + , IsPresent(false) + {} + }; + ui32 ChunkIdx; + ui64 PrevChunkLastNonce; + ui64 FirstNonce; + ui64 LastNonce; + TVector<TLsnRange> OwnerLsnRange; + + ui32 CurrentUserCount; + + bool IsEndOfSplice; + + TLogChunkInfo(ui32 chunkIdx, ui32 ownerCount) + : ChunkIdx(chunkIdx) + , PrevChunkLastNonce(0) + , FirstNonce(0) + , LastNonce(0) + , OwnerLsnRange(ownerCount) + , CurrentUserCount(0) + , IsEndOfSplice(false) + {} + + template <bool AllowNewOwners> + void RegisterLogSector(TOwner ownerId, ui64 ownerLsn) { + if (ownerId >= OwnerLsnRange.size()) { + if (AllowNewOwners) { + OwnerLsnRange.resize(ownerId + 1); + } else { + return; + } + } + TLogChunkInfo::TLsnRange &range = OwnerLsnRange[ownerId]; + range.FirstLsn = Min(range.FirstLsn, ownerLsn); + range.LastLsn = Max(range.LastLsn, ownerLsn); + if (!range.IsPresent) { + range.IsPresent = true; + ++CurrentUserCount; + } + } + + TString ToString() const { + TStringStream str; + str << "{ChunkIdx# " << ChunkIdx + << " PrevChunkLastNonce# " << PrevChunkLastNonce + << " FirstNonce# " << FirstNonce + << " LastNonce# " << LastNonce + << " CurrentUserCount# " << CurrentUserCount + << " OwnersLsnRange# {"; + + for (size_t i = 0; i < OwnerLsnRange.size(); ++i) { + const TLsnRange &range = OwnerLsnRange[i]; + if (range.IsPresent) { + str << "{ownerId# " << i << " first# " << range.FirstLsn << " last# " << range.LastLsn << "}"; + } + } + str << "}}"; + return str.Str(); + } }; -struct TLogChunkInfo { - struct TLsnRange { - ui64 FirstLsn; - ui64 LastLsn; - bool IsPresent; - - TLsnRange() - : FirstLsn(0xffffffffffffffffull) - , LastLsn(0) - , IsPresent(false) - {} - }; - ui32 ChunkIdx; - ui64 PrevChunkLastNonce; - ui64 FirstNonce; - ui64 LastNonce; - TVector<TLsnRange> OwnerLsnRange; - - ui32 CurrentUserCount; - - bool IsEndOfSplice; - - TLogChunkInfo(ui32 chunkIdx, ui32 ownerCount) - : ChunkIdx(chunkIdx) - , PrevChunkLastNonce(0) - , FirstNonce(0) - , LastNonce(0) - , OwnerLsnRange(ownerCount) - , CurrentUserCount(0) - , IsEndOfSplice(false) - {} - - template <bool AllowNewOwners> - void RegisterLogSector(TOwner ownerId, ui64 ownerLsn) { - if (ownerId >= OwnerLsnRange.size()) { - if (AllowNewOwners) { - OwnerLsnRange.resize(ownerId + 1); - } else { - return; - } - } - TLogChunkInfo::TLsnRange &range = OwnerLsnRange[ownerId]; - range.FirstLsn = Min(range.FirstLsn, ownerLsn); - range.LastLsn = Max(range.LastLsn, ownerLsn); - if (!range.IsPresent) { - range.IsPresent = true; - ++CurrentUserCount; - } - } - - TString ToString() const { - TStringStream str; - str << "{ChunkIdx# " << ChunkIdx - << " PrevChunkLastNonce# " << PrevChunkLastNonce - << " FirstNonce# " << FirstNonce - << " LastNonce# " << LastNonce - << " CurrentUserCount# " << CurrentUserCount - << " OwnersLsnRange# {"; - - for (size_t i = 0; i < OwnerLsnRange.size(); ++i) { - const TLsnRange &range = OwnerLsnRange[i]; - if (range.IsPresent) { - str << "{ownerId# " << i << " first# " << range.FirstLsn << " last# " << range.LastLsn << "}"; - } - } - str << "}}"; - return str.Str(); - } -}; - } // NPDisk } // NKikimr -template<> -inline void Out<NKikimr::NPDisk::TLogChunkInfo>(IOutputStream& os, const NKikimr::NPDisk::TLogChunkInfo& x) { - os << x.ToString(); -} +template<> +inline void Out<NKikimr::NPDisk::TLogChunkInfo>(IOutputStream& os, const NKikimr::NPDisk::TLogChunkInfo& x) { + os << x.ToString(); +} diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_syslogreader.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_syslogreader.cpp index 1fc5520691..06ef9e9f88 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_syslogreader.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_syslogreader.cpp @@ -1,10 +1,10 @@ -#include "blobstorage_pdisk_blockdevice.h" -#include "blobstorage_pdisk_completion_impl.h" +#include "blobstorage_pdisk_blockdevice.h" +#include "blobstorage_pdisk_completion_impl.h" #include "blobstorage_pdisk_data.h" #include "blobstorage_pdisk_drivemodel.h" -#include "blobstorage_pdisk_impl.h" -#include "blobstorage_pdisk_sectorrestorator.h" -#include "blobstorage_pdisk_syslogreader.h" +#include "blobstorage_pdisk_impl.h" +#include "blobstorage_pdisk_sectorrestorator.h" +#include "blobstorage_pdisk_syslogreader.h" namespace NKikimr { namespace NPDisk { @@ -35,42 +35,42 @@ public: // Public interface //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -class TSysLogReadCompletionPart : public TCompletionPart { - TAlignedData *CommonData; - ui32 SizeToReadPart; - ui32 Offset; - TBuffer::TPtr DataPart; - -public: - TSysLogReadCompletionPart(TCumulativeCompletionHolder *cumulativeCompletionHolder, TAlignedData *data, - TBuffer *buffer, ui32 sizeToReadPart, ui32 offset) - : TCompletionPart(cumulativeCompletionHolder) - , CommonData(data) - , SizeToReadPart(sizeToReadPart) - , Offset(offset) - , DataPart(buffer) - {} - - void Exec(TActorSystem *actorSystem) override { - memcpy(CommonData->Get() + Offset, DataPart->Data(), SizeToReadPart); - DataPart.Reset(); - TCompletionPart::Exec(actorSystem); - } - - virtual ~TSysLogReadCompletionPart() { - } -}; - -TSysLogReader::TSysLogReader(TPDisk *pDisk, TActorSystem *const actorSystem, const TActorId &replyTo, TReqId reqId) +class TSysLogReadCompletionPart : public TCompletionPart { + TAlignedData *CommonData; + ui32 SizeToReadPart; + ui32 Offset; + TBuffer::TPtr DataPart; + +public: + TSysLogReadCompletionPart(TCumulativeCompletionHolder *cumulativeCompletionHolder, TAlignedData *data, + TBuffer *buffer, ui32 sizeToReadPart, ui32 offset) + : TCompletionPart(cumulativeCompletionHolder) + , CommonData(data) + , SizeToReadPart(sizeToReadPart) + , Offset(offset) + , DataPart(buffer) + {} + + void Exec(TActorSystem *actorSystem) override { + memcpy(CommonData->Get() + Offset, DataPart->Data(), SizeToReadPart); + DataPart.Reset(); + TCompletionPart::Exec(actorSystem); + } + + virtual ~TSysLogReadCompletionPart() { + } +}; + +TSysLogReader::TSysLogReader(TPDisk *pDisk, TActorSystem *const actorSystem, const TActorId &replyTo, TReqId reqId) : PDisk(pDisk) , ActorSystem(actorSystem) , ReplyTo(replyTo) , ReqId(reqId) - , Result(new TEvReadLogResult(NKikimrProto::ERROR, TLogPosition{0, 0}, TLogPosition::Invalid(), - true, 0, nullptr, 0)) - , Cypher(pDisk->Cfg->EnableSectorEncryption) - , SizeToRead(PDisk->Format.SysLogSectorCount * ReplicationFactor * PDisk->Format.SectorSize) - , Data(SizeToRead) + , Result(new TEvReadLogResult(NKikimrProto::ERROR, TLogPosition{0, 0}, TLogPosition::Invalid(), + true, 0, nullptr, 0)) + , Cypher(pDisk->Cfg->EnableSectorEncryption) + , SizeToRead(PDisk->Format.SysLogSectorCount * ReplicationFactor * PDisk->Format.SectorSize) + , Data(SizeToRead) { Cypher.SetKey(PDisk->Format.SysLogKey); AtomicIncrement(PDisk->InFlightLogRead); @@ -90,30 +90,30 @@ TSysLogReader::~TSysLogReader() { void TSysLogReader::Start() { TDiskFormat &format = PDisk->Format; - auto *finalCompletion = new TCompletionSysLogRead(this); - finalCompletion->CostNs = PDisk->DriveModel.TimeForSizeNs(SizeToRead, 0, TDriveModel::EOperationType::OP_TYPE_READ); - const ui32 bufferSize = PDisk->BufferPool->GetBufferSize(); - const ui32 partsToRead = (SizeToRead + bufferSize - 1) / bufferSize; + auto *finalCompletion = new TCompletionSysLogRead(this); + finalCompletion->CostNs = PDisk->DriveModel.TimeForSizeNs(SizeToRead, 0, TDriveModel::EOperationType::OP_TYPE_READ); + const ui32 bufferSize = PDisk->BufferPool->GetBufferSize(); + const ui32 partsToRead = (SizeToRead + bufferSize - 1) / bufferSize; Y_VERIFY(partsToRead > 0); - TVector<TCompletionAction *> completionParts; - TVector<TBuffer *> bufferParts; - completionParts.reserve(partsToRead); - bufferParts.reserve(partsToRead); - auto *cumulativeCompletion = new TCumulativeCompletionHolder(); - for (ui32 idx = 0; idx < partsToRead; ++idx) { - const ui32 offset = idx * bufferSize; - const ui32 sizeToReadPart = Min(bufferSize, SizeToRead - offset); - bufferParts.push_back(PDisk->BufferPool->Pop()); - completionParts.push_back(new TSysLogReadCompletionPart(cumulativeCompletion, &Data, - bufferParts[idx], sizeToReadPart, offset)); - } - cumulativeCompletion->SetCompletionAction(finalCompletion); - for (ui32 idx = 0; idx < partsToRead; ++idx) { - const ui32 offset = idx * bufferSize; - const ui32 sizeToReadPart = Min(bufferSize, SizeToRead - offset); - PDisk->BlockDevice->PreadAsync(bufferParts[idx]->Data(), sizeToReadPart, BeginSectorIdx * format.SectorSize + offset, - completionParts[idx], ReqId, {}); - } + TVector<TCompletionAction *> completionParts; + TVector<TBuffer *> bufferParts; + completionParts.reserve(partsToRead); + bufferParts.reserve(partsToRead); + auto *cumulativeCompletion = new TCumulativeCompletionHolder(); + for (ui32 idx = 0; idx < partsToRead; ++idx) { + const ui32 offset = idx * bufferSize; + const ui32 sizeToReadPart = Min(bufferSize, SizeToRead - offset); + bufferParts.push_back(PDisk->BufferPool->Pop()); + completionParts.push_back(new TSysLogReadCompletionPart(cumulativeCompletion, &Data, + bufferParts[idx], sizeToReadPart, offset)); + } + cumulativeCompletion->SetCompletionAction(finalCompletion); + for (ui32 idx = 0; idx < partsToRead; ++idx) { + const ui32 offset = idx * bufferSize; + const ui32 sizeToReadPart = Min(bufferSize, SizeToRead - offset); + PDisk->BlockDevice->PreadAsync(bufferParts[idx]->Data(), sizeToReadPart, BeginSectorIdx * format.SectorSize + offset, + completionParts[idx], ReqId, {}); + } } void TSysLogReader::Exec() { @@ -157,9 +157,9 @@ void TSysLogReader::RestoreSectorSets() { const ui64 magic = format.MagicSysLogChunk; const bool isErasureEncode = format.IsErasureEncodeSysLog(); - TSectorRestorator restorator(true, LogErasureDataParts, isErasureEncode, format, - PDisk->ActorSystem, PDisk->PDiskActor, PDisk->PDiskId, &PDisk->Mon, PDisk->BufferPool.Get()); - restorator.Restore(sectorSetData, sectorIdx * format.SectorSize, magic, 0, PDisk->Cfg->UseT1ha0HashInFooter); + TSectorRestorator restorator(true, LogErasureDataParts, isErasureEncode, format, + PDisk->ActorSystem, PDisk->PDiskActor, PDisk->PDiskId, &PDisk->Mon, PDisk->BufferPool.Get()); + restorator.Restore(sectorSetData, sectorIdx * format.SectorSize, magic, 0, PDisk->Cfg->UseT1ha0HashInFooter); if (!restorator.GoodSectorFlags) { continue; @@ -221,7 +221,7 @@ void TSysLogReader::FindLoopOffset() { } bool hasAnotherStart = false; for (ui32 idx = loopOffset + 1; idx < SectorSetInfo.size(); ++idx) { - if (SectorSetInfo[idx].HasStart) { + if (SectorSetInfo[idx].HasStart) { hasAnotherStart = true; } } @@ -357,7 +357,7 @@ void TSysLogReader::FindMaxNonce() { void TSysLogReader::PrepareResult() { TSectorSetInfo &info = SectorSetInfo[BestRecordFirstOffset % SectorSetInfo.size()]; - TString payload(TString::Uninitialized(info.FullPayloadSize)); + TString payload(TString::Uninitialized(info.FullPayloadSize)); VerboseCheck(info.FullPayloadSize >= info.PayloadPartSize, "First payload part too large. Marker# BPS04"); if (IsReplied) { return; @@ -384,7 +384,7 @@ void TSysLogReader::PrepareResult() { } ui32 sectorIdx = JunkBeginOffset * ReplicationFactor + BeginSectorIdx; if (!IsReplied) { - Result->Results.push_back(TLogRecord(info.PayloadSignature, payload, info.PayloadLsn)); + Result->Results.push_back(TLogRecord(info.PayloadSignature, payload, info.PayloadLsn)); Result->NextPosition = PDisk->LogPosition(0, sectorIdx, 0); Result->Status = NKikimrProto::OK; } diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_syslogreader.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_syslogreader.h index a6619baa65..909ff8aee6 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_syslogreader.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_syslogreader.h @@ -83,20 +83,20 @@ namespace NPDisk { // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -class TPDisk; - +class TPDisk; + class TSysLogReader : public TThrRefBase { TPDisk *const PDisk; TActorSystem *const ActorSystem; const TActorId ReplyTo; const TReqId ReqId; - THolder<TEvReadLogResult> Result; + THolder<TEvReadLogResult> Result; - TPDiskStreamCypher Cypher; + TPDiskStreamCypher Cypher; TVector<ui64> BadOffsets; - - ui32 SizeToRead; + + ui32 SizeToRead; TAlignedData Data; struct TSectorSetInfo { @@ -108,7 +108,7 @@ class TSysLogReader : public TThrRefBase { ui64 PayloadLsn = 0; ui32 FirstSectorIdx = 0; ui32 GoodSectorFlags = 0; - TLogSignature PayloadSignature = 0; + TLogSignature PayloadSignature = 0; bool IsIdeal = false; bool HasStart = false; bool HasMiddle = false; @@ -134,7 +134,7 @@ class TSysLogReader : public TThrRefBase { bool IsReplied = false; public: - TSysLogReader(TPDisk *pDisk, TActorSystem *const actorSystem, const TActorId &replyTo, TReqId reqId); + TSysLogReader(TPDisk *pDisk, TActorSystem *const actorSystem, const TActorId &replyTo, TReqId reqId); virtual ~TSysLogReader(); void Start(); diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_tools.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_tools.cpp index 319047673e..152deec999 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_tools.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_tools.cpp @@ -1,19 +1,19 @@ #include "blobstorage_pdisk_impl.h" -#include "blobstorage_pdisk_blockdevice.h" +#include "blobstorage_pdisk_blockdevice.h" #include <ydb/library/pdisk_io/buffers.h> #include "blobstorage_pdisk_completion_impl.h" #include "blobstorage_pdisk_crypto.h" #include "blobstorage_pdisk_data.h" -#include "blobstorage_pdisk_actorsystem_creator.h" +#include "blobstorage_pdisk_actorsystem_creator.h" #include "blobstorage_pdisk_mon.h" #include "blobstorage_pdisk_requestimpl.h" #include "blobstorage_pdisk_state.h" #include "blobstorage_pdisk_thread.h" #include "blobstorage_pdisk_tools.h" #include "blobstorage_pdisk_util_countedqueueoneone.h" -#include "blobstorage_pdisk_writer.h" - +#include "blobstorage_pdisk_writer.h" + #include <ydb/core/blobstorage/base/wilson_events.h> #include <ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.h> #include <ydb/core/node_whiteboard/node_whiteboard.h> @@ -21,11 +21,11 @@ #include <ydb/library/pdisk_io/aio.h> #include <library/cpp/actors/core/actor_bootstrapped.h> -#include <library/cpp/actors/core/executor_pool_io.h> -#include <library/cpp/actors/core/executor_pool_basic.h> +#include <library/cpp/actors/core/executor_pool_io.h> +#include <library/cpp/actors/core/executor_pool_basic.h> #include <library/cpp/actors/core/hfunc.h> #include <library/cpp/actors/core/mon.h> -#include <library/cpp/actors/core/scheduler_basic.h> +#include <library/cpp/actors/core/scheduler_basic.h> #include <library/cpp/monlib/service/pages/templates.h> #include <util/generic/algorithm.h> @@ -52,45 +52,45 @@ LWTRACE_USING(BLOBSTORAGE_PROVIDER); void FormatPDisk(TString path, ui64 diskSizeBytes, ui32 sectorSizeBytes, ui32 userAccessibleChunkSizeBytes, const ui64 &diskGuid, const NPDisk::TKey &chunkKey, const NPDisk::TKey &logKey, const NPDisk::TKey &sysLogKey, const NPDisk::TKey &mainKey, TString textMessage, const bool isErasureEncodeUserLog, bool trimEntireDevice, - TIntrusivePtr<NPDisk::TSectorMap> sectorMap) + TIntrusivePtr<NPDisk::TSectorMap> sectorMap) { - TActorSystemCreator creator; - + TActorSystemCreator creator; + bool isBlockDevice = false; - TPDiskCategory::EDeviceType deviceType = TPDiskCategory::DEVICE_TYPE_ROT; - if (sectorMap) { - if (diskSizeBytes) { - sectorMap->ForceSize(diskSizeBytes); - } else { - if (sectorMap->DeviceSize == 0) { - ythrow yexception() << "Can't create in-memory fake disk map with 0 size, path# " << path.Quote(); - } - diskSizeBytes = sectorMap->DeviceSize; - } - } else { - if (path.StartsWith("PCIe:")) { - deviceType = TPDiskCategory::DEVICE_TYPE_NVME; - } - if (diskSizeBytes == 0) { - creator.GetActorSystem()->AppData<TAppData>()->IoContextFactory - ->DetectFileParameters(path, diskSizeBytes, isBlockDevice); - } + TPDiskCategory::EDeviceType deviceType = TPDiskCategory::DEVICE_TYPE_ROT; + if (sectorMap) { + if (diskSizeBytes) { + sectorMap->ForceSize(diskSizeBytes); + } else { + if (sectorMap->DeviceSize == 0) { + ythrow yexception() << "Can't create in-memory fake disk map with 0 size, path# " << path.Quote(); + } + diskSizeBytes = sectorMap->DeviceSize; + } + } else { + if (path.StartsWith("PCIe:")) { + deviceType = TPDiskCategory::DEVICE_TYPE_NVME; + } + if (diskSizeBytes == 0) { + creator.GetActorSystem()->AppData<TAppData>()->IoContextFactory + ->DetectFileParameters(path, diskSizeBytes, isBlockDevice); + } } - Y_VERIFY_S(diskSizeBytes > 0 && diskSizeBytes / userAccessibleChunkSizeBytes > 200, - " diskSizeBytes# " << diskSizeBytes << - " userAccessibleChunkSizeBytes# " << userAccessibleChunkSizeBytes << - " bool(sectorMap)# " << bool(sectorMap) << - " sectorMap->DeviceSize# " << (sectorMap ? sectorMap->DeviceSize : 0) - ); - - TIntrusivePtr<TPDiskConfig> cfg(new TPDiskConfig(path, diskGuid, 0xffffffffull, - TPDiskCategory(deviceType, 0).GetRaw())); + Y_VERIFY_S(diskSizeBytes > 0 && diskSizeBytes / userAccessibleChunkSizeBytes > 200, + " diskSizeBytes# " << diskSizeBytes << + " userAccessibleChunkSizeBytes# " << userAccessibleChunkSizeBytes << + " bool(sectorMap)# " << bool(sectorMap) << + " sectorMap->DeviceSize# " << (sectorMap ? sectorMap->DeviceSize : 0) + ); + + TIntrusivePtr<TPDiskConfig> cfg(new TPDiskConfig(path, diskGuid, 0xffffffffull, + TPDiskCategory(deviceType, 0).GetRaw())); cfg->SectorMap = sectorMap; - // Disable encryption for SectorMap - cfg->EnableSectorEncryption = !cfg->SectorMap; + // Disable encryption for SectorMap + cfg->EnableSectorEncryption = !cfg->SectorMap; - if (!isBlockDevice && !cfg->UseSpdkNvmeDriver && !sectorMap) { - // path is a regular file + if (!isBlockDevice && !cfg->UseSpdkNvmeDriver && !sectorMap) { + // path is a regular file if (diskSizeBytes == 0) { ythrow yexception() << "Can't create file with 0 size, path# " << path; } @@ -102,34 +102,34 @@ void FormatPDisk(TString path, ui64 diskSizeBytes, ui32 sectorSizeBytes, ui32 us const TIntrusivePtr<NMonitoring::TDynamicCounters> counters(new NMonitoring::TDynamicCounters); - THolder<NPDisk::TPDisk> pDisk(new NPDisk::TPDisk(cfg, counters)); + THolder<NPDisk::TPDisk> pDisk(new NPDisk::TPDisk(cfg, counters)); + + pDisk->Initialize(creator.GetActorSystem(), TActorId()); - pDisk->Initialize(creator.GetActorSystem(), TActorId()); - if (!pDisk->BlockDevice->IsGood()) { - ythrow yexception() << "Device with path# " << path << " is not good, info# " << pDisk->BlockDevice->DebugInfo(); + ythrow yexception() << "Device with path# " << path << " is not good, info# " << pDisk->BlockDevice->DebugInfo(); } - pDisk->WriteDiskFormat(diskSizeBytes, sectorSizeBytes, userAccessibleChunkSizeBytes, diskGuid, + pDisk->WriteDiskFormat(diskSizeBytes, sectorSizeBytes, userAccessibleChunkSizeBytes, diskGuid, chunkKey, logKey, sysLogKey, mainKey, textMessage, isErasureEncodeUserLog, trimEntireDevice); } bool ReadPDiskFormatInfo(const TString &path, const NPDisk::TKey &mainKey, TPDiskInfo &outInfo, - const bool doLock, TIntrusivePtr<NPDisk::TSectorMap> sectorMap) { + const bool doLock, TIntrusivePtr<NPDisk::TSectorMap> sectorMap) { const TIntrusivePtr<NMonitoring::TDynamicCounters> counters(new NMonitoring::TDynamicCounters); - auto mon = std::make_unique<TPDiskMon>(counters, 0, nullptr); + auto mon = std::make_unique<TPDiskMon>(counters, 0, nullptr); bool useSdpkNvmeDriver = path.StartsWith("PCIe:"); - NPDisk::TDeviceMode::TFlags deviceFlags = 0; + NPDisk::TDeviceMode::TFlags deviceFlags = 0; if (useSdpkNvmeDriver) { - deviceFlags |= NPDisk::TDeviceMode::UseSpdk; + deviceFlags |= NPDisk::TDeviceMode::UseSpdk; } if (doLock) { - deviceFlags |= NPDisk::TDeviceMode::LockFile; + deviceFlags |= NPDisk::TDeviceMode::LockFile; } - TActorSystemCreator creator; - THolder<NPDisk::IBlockDevice> blockDevice( - NPDisk::CreateRealBlockDeviceWithDefaults(path, *mon, deviceFlags, sectorMap, creator.GetActorSystem())); + TActorSystemCreator creator; + THolder<NPDisk::IBlockDevice> blockDevice( + NPDisk::CreateRealBlockDeviceWithDefaults(path, *mon, deviceFlags, sectorMap, creator.GetActorSystem())); if (!blockDevice->IsGood()) { TStringStream str; str << "Can't lock file, make sure you have access rights, file exists and is not locked by another process."; @@ -147,7 +147,7 @@ bool ReadPDiskFormatInfo(const TString &path, const NPDisk::TKey &mainKey, TPDis blockDevice->PreadSync(formatRaw->Data(), formatSectorsSize, 0, NPDisk::TReqId(NPDisk::TReqId::ReadFormatInfo, 0), {}); - NPDisk::TPDiskStreamCypher cypher(true); // Format record is always encrypted + NPDisk::TPDiskStreamCypher cypher(true); // Format record is always encrypted cypher.SetKey(mainKey); bool isOk = false; alignas(16) NPDisk::TDiskFormat format; @@ -207,8 +207,8 @@ bool ReadPDiskFormatInfo(const TString &path, const NPDisk::TKey &mainKey, TPDis (sector + format.SectorSize - sizeof(NPDisk::TDataSectorFooter)); ui64 sectorOffset = sysLogOffset + (ui64)((idx / 3) * 3) * (ui64)format.SectorSize; - bool isCrcOk = NPDisk::TPDiskHashCalculator(KIKIMR_PDISK_ENABLE_T1HA_HASH_WRITING).CheckSectorHash( - sectorOffset, format.MagicSysLogChunk, sector, format.SectorSize, logFooter->Hash); + bool isCrcOk = NPDisk::TPDiskHashCalculator(KIKIMR_PDISK_ENABLE_T1HA_HASH_WRITING).CheckSectorHash( + sectorOffset, format.MagicSysLogChunk, sector, format.SectorSize, logFooter->Hash); outInfo.SectorInfo.push_back(TPDiskInfo::TSectorInfo(logFooter->Nonce, logFooter->Version, isCrcOk)); } @@ -221,13 +221,13 @@ bool ReadPDiskFormatInfo(const TString &path, const NPDisk::TKey &mainKey, TPDis return false; } -void ObliterateDisk(TString path) { - TFile f(path, OpenAlways | RdWr); - f.Flock(LOCK_EX | LOCK_NB); +void ObliterateDisk(TString path) { + TFile f(path, OpenAlways | RdWr); + f.Flock(LOCK_EX | LOCK_NB); + + TVector<ui8> zeros(NPDisk::FormatSectorSize * NPDisk::ReplicationFactor, 0); + f.Pwrite(zeros.data(), zeros.size(), 0); + f.Flush(); +} - TVector<ui8> zeros(NPDisk::FormatSectorSize * NPDisk::ReplicationFactor, 0); - f.Pwrite(zeros.data(), zeros.size(), 0); - f.Flush(); -} - } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_tools.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_tools.h index 129cd2a0c7..89b8f6418e 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_tools.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_tools.h @@ -37,17 +37,17 @@ struct TPDiskInfo { TVector<TSectorInfo> SectorInfo; }; -// Throws TFileError in case of errors -void ObliterateDisk(TString path); - +// Throws TFileError in case of errors +void ObliterateDisk(TString path); + void FormatPDisk(TString path, ui64 diskSizeBytes, ui32 sectorSizeBytes, ui32 userAccessibleChunkSizeBytes, const ui64 &diskGuid, const NPDisk::TKey &chunkKey, const NPDisk::TKey &logKey, const NPDisk::TKey &sysLogKey, const NPDisk::TKey &mainKey, TString textMessage, - const bool isErasureEncodeUserLog = false, const bool trimEntireDevice = false, + const bool isErasureEncodeUserLog = false, const bool trimEntireDevice = false, TIntrusivePtr<NPDisk::TSectorMap> sectorMap = nullptr); bool ReadPDiskFormatInfo(const TString &path, const NPDisk::TKey &mainKey, TPDiskInfo &outInfo, - const bool doLock = false, TIntrusivePtr<NPDisk::TSectorMap> sectorMap = nullptr); + const bool doLock = false, TIntrusivePtr<NPDisk::TSectorMap> sectorMap = nullptr); } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp index 3796a22ffd..b49b5bd206 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp @@ -1,24 +1,24 @@ -#include "blobstorage_pdisk_ut.h" - -#include "blobstorage_pdisk_abstract.h" -#include "blobstorage_pdisk_impl.h" - +#include "blobstorage_pdisk_ut.h" + +#include "blobstorage_pdisk_abstract.h" +#include "blobstorage_pdisk_impl.h" + #include <ydb/core/blobstorage/crypto/default.h> #include <ydb/core/testlib/actors/test_runtime.h> - + namespace NKikimr { -Y_UNIT_TEST_SUITE(TPDiskTest) { - Y_UNIT_TEST(TestAbstractPDiskInterface) { - TString path = "/tmp/asdqwe"; - TIntrusivePtr<TPDiskConfig> cfg = new TPDiskConfig(path, 12345, 0xffffffffull, - TPDiskCategory(TPDiskCategory::DEVICE_TYPE_ROT, 0).GetRaw()); - const TIntrusivePtr<NMonitoring::TDynamicCounters> counters(new NMonitoring::TDynamicCounters); - - THolder<NPDisk::IPDisk> pDisk = MakeHolder<NPDisk::TPDisk>(cfg, counters); - pDisk->Wakeup(); - } - +Y_UNIT_TEST_SUITE(TPDiskTest) { + Y_UNIT_TEST(TestAbstractPDiskInterface) { + TString path = "/tmp/asdqwe"; + TIntrusivePtr<TPDiskConfig> cfg = new TPDiskConfig(path, 12345, 0xffffffffull, + TPDiskCategory(TPDiskCategory::DEVICE_TYPE_ROT, 0).GetRaw()); + const TIntrusivePtr<NMonitoring::TDynamicCounters> counters(new NMonitoring::TDynamicCounters); + + THolder<NPDisk::IPDisk> pDisk = MakeHolder<NPDisk::TPDisk>(cfg, counters); + pDisk->Wakeup(); + } + Y_UNIT_TEST(TestThatEveryValueOfEStateEnumKeepsItIntegerValue) { //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Warning! @@ -39,987 +39,987 @@ Y_UNIT_TEST_SUITE(TPDiskTest) { UNIT_ASSERT(NKikimrBlobStorage::TPDiskState::ChunkQuotaError == 12); UNIT_ASSERT(NKikimrBlobStorage::TPDiskState::DeviceIoError == 13); } - -struct TActorTestContext { -private: - std::optional<TActorId> PDiskActor; - THolder<TTestActorRuntime> Runtime; - std::shared_ptr<NPDisk::IIoContextFactory> IoContext; - NPDisk::TPDisk *PDisk = nullptr; - -public: - TActorId Sender; + +struct TActorTestContext { +private: + std::optional<TActorId> PDiskActor; + THolder<TTestActorRuntime> Runtime; + std::shared_ptr<NPDisk::IIoContextFactory> IoContext; + NPDisk::TPDisk *PDisk = nullptr; + +public: + TActorId Sender; NPDisk::TKey MainKey = NPDisk::YdbDefaultPDiskSequence; - TTestContext TestCtx{false, /*use sector map*/ true}; - - TIntrusivePtr<TPDiskConfig> DefaultPDiskConfig(bool isBad) { - TString path; - EntropyPool().Read(&TestCtx.PDiskGuid, sizeof(TestCtx.PDiskGuid)); - ui64 formatGuid = TestCtx.PDiskGuid + static_cast<ui64>(isBad); - FormatPDiskForTest(path, formatGuid, MIN_CHUNK_SIZE, false, TestCtx.SectorMap); - - ui64 pDiskCategory = 0; - TIntrusivePtr<TPDiskConfig> pDiskConfig = new TPDiskConfig(path, TestCtx.PDiskGuid, 1, pDiskCategory); - pDiskConfig->GetDriveDataSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; - pDiskConfig->WriteCacheSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; - pDiskConfig->ChunkSize = MIN_CHUNK_SIZE; - pDiskConfig->SectorMap = TestCtx.SectorMap; - pDiskConfig->EnableSectorEncryption = !pDiskConfig->SectorMap; - return pDiskConfig; - } - - TActorTestContext(bool isBad) - : Runtime(new TTestActorRuntime(1, true)) - { - auto appData = MakeHolder<TAppData>(0, 0, 0, 0, TMap<TString, ui32>(), nullptr, nullptr, nullptr, nullptr); - IoContext = std::make_shared<NPDisk::TIoContextFactoryOSS>(); - appData->IoContextFactory = IoContext.get(); - - Runtime->SetLogBackend(IsLowVerbose ? CreateStderrBackend() : CreateNullBackend()); - Runtime->Initialize(TTestActorRuntime::TEgg{appData.Release(), nullptr, {}}); - Runtime->SetLogPriority(NKikimrServices::BS_PDISK, NLog::PRI_NOTICE); - Runtime->SetLogPriority(NKikimrServices::BS_PDISK_SYSLOG, NLog::PRI_NOTICE); - Runtime->SetLogPriority(NKikimrServices::BS_PDISK_TEST, NLog::PRI_DEBUG); - Sender = Runtime->AllocateEdgeActor(); - - TIntrusivePtr<TPDiskConfig> cfg = DefaultPDiskConfig(isBad); - UpdateConfigRecreatePDisk(cfg); - } - - TIntrusivePtr<TPDiskConfig> GetPDiskConfig() { - return GetPDisk()->Cfg; - } - - void UpdateConfigRecreatePDisk(TIntrusivePtr<TPDiskConfig> cfg) { - if (PDiskActor) { - TestResponce<NPDisk::TEvYardControlResult>( - new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr), - NKikimrProto::OK); - PDisk = nullptr; - Runtime->Send(new IEventHandle(*PDiskActor, Sender, new TKikimrEvents::TEvPoisonPill)); - } - - auto mainCounters = TIntrusivePtr<NMonitoring::TDynamicCounters>(new NMonitoring::TDynamicCounters()); + TTestContext TestCtx{false, /*use sector map*/ true}; + + TIntrusivePtr<TPDiskConfig> DefaultPDiskConfig(bool isBad) { + TString path; + EntropyPool().Read(&TestCtx.PDiskGuid, sizeof(TestCtx.PDiskGuid)); + ui64 formatGuid = TestCtx.PDiskGuid + static_cast<ui64>(isBad); + FormatPDiskForTest(path, formatGuid, MIN_CHUNK_SIZE, false, TestCtx.SectorMap); + + ui64 pDiskCategory = 0; + TIntrusivePtr<TPDiskConfig> pDiskConfig = new TPDiskConfig(path, TestCtx.PDiskGuid, 1, pDiskCategory); + pDiskConfig->GetDriveDataSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; + pDiskConfig->WriteCacheSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; + pDiskConfig->ChunkSize = MIN_CHUNK_SIZE; + pDiskConfig->SectorMap = TestCtx.SectorMap; + pDiskConfig->EnableSectorEncryption = !pDiskConfig->SectorMap; + return pDiskConfig; + } + + TActorTestContext(bool isBad) + : Runtime(new TTestActorRuntime(1, true)) + { + auto appData = MakeHolder<TAppData>(0, 0, 0, 0, TMap<TString, ui32>(), nullptr, nullptr, nullptr, nullptr); + IoContext = std::make_shared<NPDisk::TIoContextFactoryOSS>(); + appData->IoContextFactory = IoContext.get(); + + Runtime->SetLogBackend(IsLowVerbose ? CreateStderrBackend() : CreateNullBackend()); + Runtime->Initialize(TTestActorRuntime::TEgg{appData.Release(), nullptr, {}}); + Runtime->SetLogPriority(NKikimrServices::BS_PDISK, NLog::PRI_NOTICE); + Runtime->SetLogPriority(NKikimrServices::BS_PDISK_SYSLOG, NLog::PRI_NOTICE); + Runtime->SetLogPriority(NKikimrServices::BS_PDISK_TEST, NLog::PRI_DEBUG); + Sender = Runtime->AllocateEdgeActor(); + + TIntrusivePtr<TPDiskConfig> cfg = DefaultPDiskConfig(isBad); + UpdateConfigRecreatePDisk(cfg); + } + + TIntrusivePtr<TPDiskConfig> GetPDiskConfig() { + return GetPDisk()->Cfg; + } + + void UpdateConfigRecreatePDisk(TIntrusivePtr<TPDiskConfig> cfg) { + if (PDiskActor) { + TestResponce<NPDisk::TEvYardControlResult>( + new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr), + NKikimrProto::OK); + PDisk = nullptr; + Runtime->Send(new IEventHandle(*PDiskActor, Sender, new TKikimrEvents::TEvPoisonPill)); + } + + auto mainCounters = TIntrusivePtr<NMonitoring::TDynamicCounters>(new NMonitoring::TDynamicCounters()); IActor* pDiskActor = CreatePDisk(cfg.Get(), MainKey, mainCounters); - PDiskActor = Runtime->Register(pDiskActor); - } - - void Send(IEventBase* ev) { - Runtime->Send(new IEventHandle(*PDiskActor, Sender, ev)); - } - - NPDisk::TPDisk *GetPDisk() { - if (!PDisk) { - // To be sure that pdisk actor is in StateOnline - TestResponce<NPDisk::TEvYardControlResult>( + PDiskActor = Runtime->Register(pDiskActor); + } + + void Send(IEventBase* ev) { + Runtime->Send(new IEventHandle(*PDiskActor, Sender, ev)); + } + + NPDisk::TPDisk *GetPDisk() { + if (!PDisk) { + // To be sure that pdisk actor is in StateOnline + TestResponce<NPDisk::TEvYardControlResult>( new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStart, &MainKey), - NKikimrProto::OK); - - const auto evControlRes = TestResponce<NPDisk::TEvYardControlResult>( - new NPDisk::TEvYardControl(NPDisk::TEvYardControl::GetPDiskPointer, nullptr), - NKikimrProto::OK); - PDisk = reinterpret_cast<NPDisk::TPDisk*>(evControlRes->Cookie); - } - return PDisk; - } - - template<typename T> - auto SafeRunOnPDisk(T&& f) { - TGuard<TMutex> g(GetPDisk()->StateMutex); - return f(GetPDisk()); - } - - void RestartPDiskSync() { - TestResponce<NPDisk::TEvYardControlResult>( - new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr), - NKikimrProto::OK); - PDisk = nullptr; - // wait initialization and update this->PDisk - GetPDisk(); - } - - template<typename TRes> - THolder<TRes> Recv() { - return Runtime->GrabEdgeEvent<TRes>(); - } - - template<typename TRes> - THolder<TRes> TestResponce(IEventBase* ev, NKikimrProto::EReplyStatus status) { - if (ev) { - Send(ev); - } - THolder<TRes> evRes = Recv<TRes>(); - UNIT_ASSERT_C(evRes->Status == status, evRes->ToString()); - UNIT_ASSERT(status == NKikimrProto::OK || !evRes->ErrorReason.empty()); - - // Test that all ToString methods don't VERIFY - Cnull << evRes->ToString(); - return evRes; - } -}; - - Y_UNIT_TEST(TestPDiskActorErrorState) { - TActorTestContext testCtx(true); - - const TVDiskID vDiskID(0, 1, 0, 0, 0); - testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(1, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::CORRUPTED); - testCtx.TestResponce<NPDisk::TEvCheckSpaceResult>( - new NPDisk::TEvCheckSpace(1, 1), - NKikimrProto::CORRUPTED); - testCtx.TestResponce<NPDisk::TEvLogResult>( - new NPDisk::TEvLog(1, 1, 0, nullptr, TLsnSeg(1, 1), nullptr), - NKikimrProto::CORRUPTED); - testCtx.TestResponce<NPDisk::TEvReadLogResult>( - new NPDisk::TEvReadLog(1, 1, NPDisk::TLogPosition{0, 0}), - NKikimrProto::CORRUPTED); - - testCtx.TestResponce<NPDisk::TEvChunkWriteResult>( - new NPDisk::TEvChunkWrite(1, 1, 1, 0, nullptr, nullptr, false, 1), - NKikimrProto::CORRUPTED); - testCtx.TestResponce<NPDisk::TEvChunkReadResult>( - new NPDisk::TEvChunkRead(1, 1, 17, 0, 4096, 1, nullptr), - NKikimrProto::CORRUPTED); - testCtx.TestResponce<NPDisk::TEvHarakiriResult>( - new NPDisk::TEvHarakiri(1, 1), - NKikimrProto::CORRUPTED); - testCtx.TestResponce<NPDisk::TEvSlayResult>( - new NPDisk::TEvSlay(vDiskID, 1, 1, 1), - NKikimrProto::CORRUPTED); - testCtx.TestResponce<NPDisk::TEvChunkReserveResult>( - new NPDisk::TEvChunkReserve(1, 1, 3), - NKikimrProto::CORRUPTED); - testCtx.TestResponce<NPDisk::TEvYardControlResult>( - new NPDisk::TEvYardControl(NPDisk::TEvYardControl::ActionPause, nullptr), - NKikimrProto::CORRUPTED); - - testCtx.Send(new NActors::TEvents::TEvPoisonPill()); - } - - Y_UNIT_TEST(TestPDiskActorPDiskStopStart) { - TActorTestContext testCtx(false); - - const TVDiskID vDiskID(0, 1, 0, 0, 0); - testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(2, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::OK); - testCtx.TestResponce<NPDisk::TEvYardControlResult>( - new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr), - NKikimrProto::OK); - testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(3, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::CORRUPTED); - testCtx.TestResponce<NPDisk::TEvYardControlResult>( + NKikimrProto::OK); + + const auto evControlRes = TestResponce<NPDisk::TEvYardControlResult>( + new NPDisk::TEvYardControl(NPDisk::TEvYardControl::GetPDiskPointer, nullptr), + NKikimrProto::OK); + PDisk = reinterpret_cast<NPDisk::TPDisk*>(evControlRes->Cookie); + } + return PDisk; + } + + template<typename T> + auto SafeRunOnPDisk(T&& f) { + TGuard<TMutex> g(GetPDisk()->StateMutex); + return f(GetPDisk()); + } + + void RestartPDiskSync() { + TestResponce<NPDisk::TEvYardControlResult>( + new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr), + NKikimrProto::OK); + PDisk = nullptr; + // wait initialization and update this->PDisk + GetPDisk(); + } + + template<typename TRes> + THolder<TRes> Recv() { + return Runtime->GrabEdgeEvent<TRes>(); + } + + template<typename TRes> + THolder<TRes> TestResponce(IEventBase* ev, NKikimrProto::EReplyStatus status) { + if (ev) { + Send(ev); + } + THolder<TRes> evRes = Recv<TRes>(); + UNIT_ASSERT_C(evRes->Status == status, evRes->ToString()); + UNIT_ASSERT(status == NKikimrProto::OK || !evRes->ErrorReason.empty()); + + // Test that all ToString methods don't VERIFY + Cnull << evRes->ToString(); + return evRes; + } +}; + + Y_UNIT_TEST(TestPDiskActorErrorState) { + TActorTestContext testCtx(true); + + const TVDiskID vDiskID(0, 1, 0, 0, 0); + testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(1, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::CORRUPTED); + testCtx.TestResponce<NPDisk::TEvCheckSpaceResult>( + new NPDisk::TEvCheckSpace(1, 1), + NKikimrProto::CORRUPTED); + testCtx.TestResponce<NPDisk::TEvLogResult>( + new NPDisk::TEvLog(1, 1, 0, nullptr, TLsnSeg(1, 1), nullptr), + NKikimrProto::CORRUPTED); + testCtx.TestResponce<NPDisk::TEvReadLogResult>( + new NPDisk::TEvReadLog(1, 1, NPDisk::TLogPosition{0, 0}), + NKikimrProto::CORRUPTED); + + testCtx.TestResponce<NPDisk::TEvChunkWriteResult>( + new NPDisk::TEvChunkWrite(1, 1, 1, 0, nullptr, nullptr, false, 1), + NKikimrProto::CORRUPTED); + testCtx.TestResponce<NPDisk::TEvChunkReadResult>( + new NPDisk::TEvChunkRead(1, 1, 17, 0, 4096, 1, nullptr), + NKikimrProto::CORRUPTED); + testCtx.TestResponce<NPDisk::TEvHarakiriResult>( + new NPDisk::TEvHarakiri(1, 1), + NKikimrProto::CORRUPTED); + testCtx.TestResponce<NPDisk::TEvSlayResult>( + new NPDisk::TEvSlay(vDiskID, 1, 1, 1), + NKikimrProto::CORRUPTED); + testCtx.TestResponce<NPDisk::TEvChunkReserveResult>( + new NPDisk::TEvChunkReserve(1, 1, 3), + NKikimrProto::CORRUPTED); + testCtx.TestResponce<NPDisk::TEvYardControlResult>( + new NPDisk::TEvYardControl(NPDisk::TEvYardControl::ActionPause, nullptr), + NKikimrProto::CORRUPTED); + + testCtx.Send(new NActors::TEvents::TEvPoisonPill()); + } + + Y_UNIT_TEST(TestPDiskActorPDiskStopStart) { + TActorTestContext testCtx(false); + + const TVDiskID vDiskID(0, 1, 0, 0, 0); + testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(2, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::OK); + testCtx.TestResponce<NPDisk::TEvYardControlResult>( + new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr), + NKikimrProto::OK); + testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(3, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::CORRUPTED); + testCtx.TestResponce<NPDisk::TEvYardControlResult>( new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStart, reinterpret_cast<void*>(&testCtx.MainKey)), - NKikimrProto::OK); - testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(3, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::OK); - - testCtx.Send(new NActors::TEvents::TEvPoisonPill()); - } - - void TestChunkWriteReleaseRun() { - TActorTestContext testCtx(false); - - const TVDiskID vDiskID(0, 1, 0, 0, 0); - const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(2, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::OK); - const auto evReserveRes = testCtx.TestResponce<NPDisk::TEvChunkReserveResult>( - new NPDisk::TEvChunkReserve(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, 1), - NKikimrProto::OK); - UNIT_ASSERT(evReserveRes->ChunkIds.size() == 1); - - const ui32 reservedChunk = evReserveRes->ChunkIds.front(); - NPDisk::TCommitRecord commitRecord; - commitRecord.CommitChunks.push_back(reservedChunk); - testCtx.TestResponce<NPDisk::TEvLogResult>( - new NPDisk::TEvLog(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, 0, commitRecord, - TString(), TLsnSeg(1, 1), nullptr), - NKikimrProto::OK); - - const auto evControlRes = testCtx.TestResponce<NPDisk::TEvYardControlResult>( - new NPDisk::TEvYardControl(NPDisk::TEvYardControl::GetPDiskPointer, nullptr), - NKikimrProto::OK); - auto *pDisk = reinterpret_cast<NPDisk::TPDisk*>(evControlRes->Cookie); + NKikimrProto::OK); + testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(3, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::OK); + + testCtx.Send(new NActors::TEvents::TEvPoisonPill()); + } + + void TestChunkWriteReleaseRun() { + TActorTestContext testCtx(false); + + const TVDiskID vDiskID(0, 1, 0, 0, 0); + const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(2, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::OK); + const auto evReserveRes = testCtx.TestResponce<NPDisk::TEvChunkReserveResult>( + new NPDisk::TEvChunkReserve(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, 1), + NKikimrProto::OK); + UNIT_ASSERT(evReserveRes->ChunkIds.size() == 1); + + const ui32 reservedChunk = evReserveRes->ChunkIds.front(); + NPDisk::TCommitRecord commitRecord; + commitRecord.CommitChunks.push_back(reservedChunk); + testCtx.TestResponce<NPDisk::TEvLogResult>( + new NPDisk::TEvLog(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, 0, commitRecord, + TString(), TLsnSeg(1, 1), nullptr), + NKikimrProto::OK); + + const auto evControlRes = testCtx.TestResponce<NPDisk::TEvYardControlResult>( + new NPDisk::TEvYardControl(NPDisk::TEvYardControl::GetPDiskPointer, nullptr), + NKikimrProto::OK); + auto *pDisk = reinterpret_cast<NPDisk::TPDisk*>(evControlRes->Cookie); pDisk->PDiskThread.StopSync(); - - { - NPDisk::TCommitRecord commitRecord; - commitRecord.DeleteChunks.push_back(reservedChunk); - NPDisk::TEvLog ev(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, 0, commitRecord, - TString(), TLsnSeg(2, 2), nullptr); - NPDisk::TLogWrite *log = new NPDisk::TLogWrite(ev, testCtx.Sender, 0, {}, {}); - bool ok = pDisk->PreprocessRequest(log); - UNIT_ASSERT(ok); - pDisk->RouteRequest(log); - } - pDisk->ProcessLogWriteQueueAndCommits(); - - { - TString chunkWriteData = PrepareData(1024); - NPDisk::TEvChunkWrite ev(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, reservedChunk, - 0, new NPDisk::TEvChunkWrite::TStrokaBackedUpParts(chunkWriteData), nullptr, false, 0); - NPDisk::TChunkWrite *chunkWrite = new NPDisk::TChunkWrite(ev, testCtx.Sender, {}, {}); - bool ok = pDisk->PreprocessRequest(chunkWrite); - UNIT_ASSERT(!ok); - } - - pDisk->ProcessChunkWriteQueue(); - - testCtx.TestResponce<NPDisk::TEvLogResult>( - nullptr, - NKikimrProto::OK); - testCtx.TestResponce<NPDisk::TEvChunkWriteResult>( - nullptr, - NKikimrProto::ERROR); - - testCtx.Send(new NActors::TEvents::TEvPoisonPill()); - } - - Y_UNIT_TEST(TestChunkWriteRelease) { - for (ui32 i = 0; i < 16; ++i) { - TestChunkWriteReleaseRun(); - } - } - - Y_UNIT_TEST(TestPDiskOwnerRecreation) { - TActorTestContext testCtx(false); - - const TVDiskID vDiskID(0, 1, 0, 0, 0); - for (ui32 i = 2; i < 2000; ++i) { - const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(i, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::OK); - - testCtx.TestResponce<NPDisk::TEvSlayResult>( - new NPDisk::TEvSlay(vDiskID, evInitRes->PDiskParams->OwnerRound + 1, 0, 0), - NKikimrProto::OK); - } - } - - Y_UNIT_TEST(TestPDiskOwnerRecreationWithStableOwner) { - TActorTestContext testCtx(false); - - // Create "stable" owner, who will be alive during all test - ui32 i = 2; - const TVDiskID vDiskID_stable(0, 1, 0, 0, 0); - testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(i++, vDiskID_stable, testCtx.TestCtx.PDiskGuid), - NKikimrProto::OK); - - const TVDiskID vDiskID(1, 1, 0, 0, 0); - for (; i < 2000; ++i) { - const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(i, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::OK); - - testCtx.TestResponce<NPDisk::TEvSlayResult>( - new NPDisk::TEvSlay(vDiskID, evInitRes->PDiskParams->OwnerRound + 1, 0, 0), - NKikimrProto::OK); - } - } - -struct TVDiskIDOwnerRound { - TVDiskID VDiskID; - ui64 OwnerRound; -}; - -void RecreateOwner(TActorTestContext& testCtx, TVDiskIDOwnerRound& vdisk) { - testCtx.TestResponce<NPDisk::TEvSlayResult>( - new NPDisk::TEvSlay(vdisk.VDiskID, vdisk.OwnerRound + 1, 0, 0), - NKikimrProto::OK); - - const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(vdisk.OwnerRound + 1, vdisk.VDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::OK); - - vdisk.OwnerRound = evInitRes->PDiskParams->OwnerRound; + + { + NPDisk::TCommitRecord commitRecord; + commitRecord.DeleteChunks.push_back(reservedChunk); + NPDisk::TEvLog ev(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, 0, commitRecord, + TString(), TLsnSeg(2, 2), nullptr); + NPDisk::TLogWrite *log = new NPDisk::TLogWrite(ev, testCtx.Sender, 0, {}, {}); + bool ok = pDisk->PreprocessRequest(log); + UNIT_ASSERT(ok); + pDisk->RouteRequest(log); + } + pDisk->ProcessLogWriteQueueAndCommits(); + + { + TString chunkWriteData = PrepareData(1024); + NPDisk::TEvChunkWrite ev(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, reservedChunk, + 0, new NPDisk::TEvChunkWrite::TStrokaBackedUpParts(chunkWriteData), nullptr, false, 0); + NPDisk::TChunkWrite *chunkWrite = new NPDisk::TChunkWrite(ev, testCtx.Sender, {}, {}); + bool ok = pDisk->PreprocessRequest(chunkWrite); + UNIT_ASSERT(!ok); + } + + pDisk->ProcessChunkWriteQueue(); + + testCtx.TestResponce<NPDisk::TEvLogResult>( + nullptr, + NKikimrProto::OK); + testCtx.TestResponce<NPDisk::TEvChunkWriteResult>( + nullptr, + NKikimrProto::ERROR); + + testCtx.Send(new NActors::TEvents::TEvPoisonPill()); + } + + Y_UNIT_TEST(TestChunkWriteRelease) { + for (ui32 i = 0; i < 16; ++i) { + TestChunkWriteReleaseRun(); + } + } + + Y_UNIT_TEST(TestPDiskOwnerRecreation) { + TActorTestContext testCtx(false); + + const TVDiskID vDiskID(0, 1, 0, 0, 0); + for (ui32 i = 2; i < 2000; ++i) { + const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(i, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::OK); + + testCtx.TestResponce<NPDisk::TEvSlayResult>( + new NPDisk::TEvSlay(vDiskID, evInitRes->PDiskParams->OwnerRound + 1, 0, 0), + NKikimrProto::OK); + } + } + + Y_UNIT_TEST(TestPDiskOwnerRecreationWithStableOwner) { + TActorTestContext testCtx(false); + + // Create "stable" owner, who will be alive during all test + ui32 i = 2; + const TVDiskID vDiskID_stable(0, 1, 0, 0, 0); + testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(i++, vDiskID_stable, testCtx.TestCtx.PDiskGuid), + NKikimrProto::OK); + + const TVDiskID vDiskID(1, 1, 0, 0, 0); + for (; i < 2000; ++i) { + const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(i, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::OK); + + testCtx.TestResponce<NPDisk::TEvSlayResult>( + new NPDisk::TEvSlay(vDiskID, evInitRes->PDiskParams->OwnerRound + 1, 0, 0), + NKikimrProto::OK); + } + } + +struct TVDiskIDOwnerRound { + TVDiskID VDiskID; + ui64 OwnerRound; +}; + +void RecreateOwner(TActorTestContext& testCtx, TVDiskIDOwnerRound& vdisk) { + testCtx.TestResponce<NPDisk::TEvSlayResult>( + new NPDisk::TEvSlay(vdisk.VDiskID, vdisk.OwnerRound + 1, 0, 0), + NKikimrProto::OK); + + const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(vdisk.OwnerRound + 1, vdisk.VDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::OK); + + vdisk.OwnerRound = evInitRes->PDiskParams->OwnerRound; } - - Y_UNIT_TEST(TestPDiskManyOwnersInitiation) { - TActorTestContext testCtx(false); - - TVector<TVDiskIDOwnerRound> goodIds; - ui64 badIdsCount = 0; - - for (int i = 2; i < 2000; ++i) { - const TVDiskID vDiskID(i, 1, 0, 0, 0); - if (badIdsCount == 0) { - testCtx.Send(new NPDisk::TEvYardInit(i, vDiskID, testCtx.TestCtx.PDiskGuid)); - const auto evInitRes = testCtx.Recv<NPDisk::TEvYardInitResult>(); - if (evInitRes->Status == NKikimrProto::OK) { - goodIds.push_back({vDiskID, evInitRes->PDiskParams->OwnerRound}); - } else { - ++badIdsCount; - } - } else { - const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(i, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::ERROR); - ++badIdsCount; - } - } - - RecreateOwner(testCtx, goodIds.front()); - - UNIT_ASSERT(badIdsCount > 0 && goodIds.size() > 0); - for (auto v : goodIds) { - testCtx.TestResponce<NPDisk::TEvSlayResult>( - new NPDisk::TEvSlay(v.VDiskID, v.OwnerRound + 1, 0, 0), - NKikimrProto::OK); - } - } - -enum class EChunkState { - UNKNOWN, - RESERVED, - COMMIT_INFLIGHT, - COMMITTED, - DELETE_INFLIGHT, - DELETED -}; - -struct TVDiskMock { - static std::atomic<ui64> Idx; - static std::atomic<ui64> OwnerRound; - - TActorTestContext *TestCtx; - const TVDiskID VDiskID; - TIntrusivePtr<TPDiskParams> PDiskParams; - ui64 LastUsedLsn = 0; - ui64 FirstLsnToKeep = 1; - - TMap<EChunkState, TSet<TChunkIdx>> Chunks; - - TVDiskMock(TActorTestContext *testCtx) - : TestCtx(testCtx) - , VDiskID(Idx.fetch_add(1), 1, 0, 0, 0) - {} - - TLsnSeg GetLsnSeg() { - ++LastUsedLsn; - return {LastUsedLsn, LastUsedLsn}; - }; - - void InitFull() { - Init(); - ReadLog(); - SendEvLogImpl(1, {}, true); - } - - void Init() { - const auto evInitRes = TestCtx->TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(OwnerRound.fetch_add(1), VDiskID, TestCtx->TestCtx.PDiskGuid), - NKikimrProto::OK); - PDiskParams = evInitRes->PDiskParams; - - TSet<TChunkIdx> commited = Chunks[EChunkState::COMMITTED]; - for (TChunkIdx chunk : evInitRes->OwnedChunks) { - UNIT_ASSERT_C(commited.count(chunk), "misowned chunk# " << chunk); - commited.erase(chunk); - } - UNIT_ASSERT_C(commited.empty(), "there are leaked chunks# " << FormatList(commited)); - } - - - void ReserveChunk() { - const auto evReserveRes = TestCtx->TestResponce<NPDisk::TEvChunkReserveResult>( - new NPDisk::TEvChunkReserve(PDiskParams->Owner, PDiskParams->OwnerRound, 1), - NKikimrProto::OK); - UNIT_ASSERT(evReserveRes->ChunkIds.size() == 1); - const ui32 reservedChunk = evReserveRes->ChunkIds.front(); - Chunks[EChunkState::RESERVED].emplace(reservedChunk); - } - - void CommitReservedChunks() { - auto& reservedChunks = Chunks[EChunkState::RESERVED]; - NPDisk::TCommitRecord rec; - rec.CommitChunks = TVector<TChunkIdx>(reservedChunks.begin(), reservedChunks.end()); - SendEvLogImpl(1, rec); - Chunks[EChunkState::COMMITTED].insert(reservedChunks.begin(), reservedChunks.end()); - reservedChunks.clear(); - } - - void DeleteCommitedChunks() { - auto& commited = Chunks[EChunkState::COMMITTED]; - NPDisk::TCommitRecord rec; - rec.DeleteChunks = TVector<TChunkIdx>(commited.begin(), commited.end()); - SendEvLogImpl(1, rec); - Chunks[EChunkState::DELETED].insert(commited.begin(), commited.end()); - commited.clear(); - } - - ui64 ReadLog(std::function<void(const NPDisk::TLogRecord&)> logResCallback = {}) { - ui64 logRecordsRead = 0; - - NPDisk::TLogPosition position{0, 0}; - bool endOfLog = false; - do { - UNIT_ASSERT(PDiskParams); - auto logReadRes = TestCtx->TestResponce<NPDisk::TEvReadLogResult>( - new NPDisk::TEvReadLog(PDiskParams->Owner, PDiskParams->OwnerRound, position), - NKikimrProto::OK); - UNIT_ASSERT(position == logReadRes->Position); - for (const NPDisk::TLogRecord& rec : logReadRes->Results) { - ++logRecordsRead; - if (logResCallback) { - logResCallback(rec); - } - LastUsedLsn = Max(LastUsedLsn, rec.Lsn); - } - position = logReadRes->NextPosition; - endOfLog = logReadRes->IsEndOfLog; - } while (!endOfLog); - - return logRecordsRead; - } - - void SendEvLogSync(const ui64 size = 128) { - SendEvLogImpl(size, {}, false); - } - - void CutLogAllButOne() { - SendEvLogImpl(1, LastUsedLsn + 1, true); - } - - ui64 OwnedLogRecords() const { - return LastUsedLsn + 1 - FirstLsnToKeep; - } - -private: - void SendEvLogImpl(const ui64 size, TMaybe<NPDisk::TCommitRecord> commitRec) { - auto evLog = MakeHolder<NPDisk::TEvLog>(PDiskParams->Owner, PDiskParams->OwnerRound, 0, PrepareData(size), - GetLsnSeg(), nullptr); - - if (commitRec) { - evLog->Signature.SetCommitRecord(); - evLog->CommitRecord = std::move(*commitRec); - } - - TestCtx->TestResponce<NPDisk::TEvLogResult>(evLog.Release(), NKikimrProto::OK); - } - - void SendEvLogImpl(const ui64 size, TMaybe<ui64> firstLsnToKeep, bool isStartingPoint) { - - TMaybe<NPDisk::TCommitRecord> rec; - - if (firstLsnToKeep || isStartingPoint) { - rec = NPDisk::TCommitRecord(); - rec->FirstLsnToKeep = firstLsnToKeep.GetOrElse(0); - FirstLsnToKeep = Max(FirstLsnToKeep, firstLsnToKeep.GetOrElse(0)); - rec->IsStartingPoint = isStartingPoint; - } - SendEvLogImpl(size, rec); - } -}; - -std::atomic<ui64> TVDiskMock::Idx = 0; -std::atomic<ui64> TVDiskMock::OwnerRound = 2; - - Y_UNIT_TEST(TestVDiskMock) { - TActorTestContext testCtx(false); - TVDiskMock mock(&testCtx); - - mock.InitFull(); - const int logsSent = 100; - for (int i = 0; i < logsSent; ++i) { - mock.SendEvLogSync(); - } - - mock.Init(); - UNIT_ASSERT(mock.ReadLog() == mock.OwnedLogRecords()); - } - - // Test to reproduce bug from KIKIMR-10192 - Y_UNIT_TEST(TestLogSpliceNonceJump) { - if constexpr (!KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE) { - return; - } - - TActorTestContext testCtx(false); - TVDiskMock sporadicVDisk(&testCtx); - TVDiskMock intensiveVDisk(&testCtx); - - sporadicVDisk.InitFull(); // writes log into logChunk# 1 - intensiveVDisk.InitFull(); - - sporadicVDisk.SendEvLogSync(); - - for (ui32 i = 0; i < 5; i++) { - do { - intensiveVDisk.SendEvLogSync(1024); - } while (!testCtx.GetPDisk()->CommonLogger->OnFirstSectorInChunk()); - } - // expect log chunks list looks like 1 -> 2 -> ... -> 6 (empty) - - testCtx.RestartPDiskSync(); // writes NonceJump - UNIT_ASSERT_C(testCtx.GetPDisk()->CommonLogger->SectorIdx == 1, "To reproduce bug nonce jump record" - " should be written in chunk's first sector"); - sporadicVDisk.InitFull(); // sends EvLog into chunk with recently written nonce jump - intensiveVDisk.InitFull(); - - { - // initiate log splicing, expect transition to be - // 1 -> 2 -> ... -> 6 - NPDisk::TPDisk *pdisk = testCtx.GetPDisk(); - UNIT_ASSERT(KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE); - UNIT_ASSERT(pdisk->LogChunks.size() == 6); - intensiveVDisk.CutLogAllButOne(); - // 1 -> 6 - pdisk->PDiskThread.StopSync(); - while (pdisk->LogChunks.size() != 2) { - pdisk->Update(); - } - } - - testCtx.RestartPDiskSync(); - intensiveVDisk.Init(); - UNIT_ASSERT_VALUES_EQUAL(intensiveVDisk.ReadLog(), intensiveVDisk.OwnedLogRecords()); - sporadicVDisk.Init(); - UNIT_ASSERT_VALUES_EQUAL(sporadicVDisk.ReadLog(), sporadicVDisk.OwnedLogRecords()); - - testCtx.Send(new NActors::TEvents::TEvPoisonPill()); - } - - // Test to reproduce bug with multiple chunk splices from - Y_UNIT_TEST(TestMultipleLogSpliceNonceJump) { - if constexpr (!KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE) { - return; - } - - TActorTestContext testCtx(false); - TVDiskMock sporadicVDisk(&testCtx); - TVDiskMock moderateVDisk(&testCtx); - TVDiskMock intensiveVDisk(&testCtx); - - sporadicVDisk.InitFull(); // writes log into logChunk# 1 - moderateVDisk.InitFull(); // writes log into logChunk# 1 - intensiveVDisk.InitFull(); - - sporadicVDisk.SendEvLogSync(); - - for (ui32 i = 0; i < 8; i++) { - bool alreadyWriteThisChunk = false; - do { - if (1 <= i && i <= 4 && !alreadyWriteThisChunk) { - alreadyWriteThisChunk = true; - moderateVDisk.SendEvLogSync(1024); - } else { - intensiveVDisk.SendEvLogSync(1024); - } - } while (!testCtx.GetPDisk()->CommonLogger->OnFirstSectorInChunk()); - } - // expect log chunks list looks like 1 -> 2 -> ... -> 6 (empty) - - testCtx.RestartPDiskSync(); // writes NonceJump - UNIT_ASSERT_C(testCtx.GetPDisk()->CommonLogger->SectorIdx == 1, "To reproduce bug nonce jump record" - " should be written in chunk's first sector"); - sporadicVDisk.InitFull(); // sends EvLog into chunk with recently written nonce jump - moderateVDisk.InitFull(); - intensiveVDisk.InitFull(); - - { - // initiate log splicing, expect transition to be - // 1 -> 2 -> ... -> 9 - NPDisk::TPDisk *pdisk = testCtx.GetPDisk(); - UNIT_ASSERT(KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE); - UNIT_ASSERT_C(pdisk->LogChunks.size() == 9, pdisk->LogChunks.size()); - intensiveVDisk.CutLogAllButOne(); - // 1 -> 2 -> 6 - pdisk->PDiskThread.StopSync(); - while (pdisk->LogChunks.size() != 6) { - pdisk->Update(); - } - } - - testCtx.RestartPDiskSync(); - moderateVDisk.Init(); - { - // initiate log splicing, expect transition to be - // 1 -> 2 -> ... -> 6 - NPDisk::TPDisk *pdisk = testCtx.GetPDisk(); - UNIT_ASSERT(KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE); - UNIT_ASSERT(pdisk->LogChunks.size() == 6); - moderateVDisk.CutLogAllButOne(); - // 1 -> 2 -> 6 - pdisk->PDiskThread.StopSync(); - while (pdisk->LogChunks.size() != 2) { - pdisk->Update(); - } - } - - testCtx.RestartPDiskSync(); - intensiveVDisk.Init(); - UNIT_ASSERT_VALUES_EQUAL(intensiveVDisk.ReadLog(), intensiveVDisk.OwnedLogRecords()); - moderateVDisk.Init(); - UNIT_ASSERT_VALUES_EQUAL(moderateVDisk.ReadLog(), moderateVDisk.OwnedLogRecords()); - sporadicVDisk.Init(); - UNIT_ASSERT_VALUES_EQUAL(sporadicVDisk.ReadLog(), sporadicVDisk.OwnedLogRecords()); - - testCtx.Send(new NActors::TEvents::TEvPoisonPill()); - } - - Y_UNIT_TEST(TestFakeErrorPDiskManyLogWrite) { - TActorTestContext testCtx(false); - testCtx.TestCtx.SectorMap->ImitateIoErrorProbability = 1e-4; - - const TVDiskID vDiskID(0, 1, 0, 0, 0); - const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(2, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::OK); - - ui32 errors = 0; - ui32 lsn = 2; - for (ui32 i = 0; i < 100'000; ++i) { - testCtx.Send(new NPDisk::TEvLog(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, 0, - TString("abc"), TLsnSeg(lsn, lsn), nullptr)); - ++lsn; - const auto logRes = testCtx.Recv<NPDisk::TEvLogResult>(); - if (logRes->Status != NKikimrProto::OK) { - ++errors; - } else { - UNIT_ASSERT(errors == 0); - } - } - UNIT_ASSERT(errors > 0); - } - - Y_UNIT_TEST(TestFakeErrorPDiskManyChunkRead) { - TActorTestContext testCtx(false); - testCtx.TestCtx.SectorMap->ImitateReadIoErrorProbability = 1e-4; - - TVDiskMock vdisk(&testCtx); - vdisk.InitFull(); - - ui32 errors = 0; - - vdisk.ReserveChunk(); - vdisk.CommitReservedChunks(); - UNIT_ASSERT(vdisk.Chunks[EChunkState::COMMITTED].size() == 1); - const ui32 reservedChunk = *vdisk.Chunks[EChunkState::COMMITTED].begin(); - - TString chunkWriteData = PrepareData(1024); - testCtx.TestResponce<NPDisk::TEvChunkWriteResult>( - new NPDisk::TEvChunkWrite(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, - reservedChunk, 0, new NPDisk::TEvChunkWrite::TStrokaBackedUpParts(chunkWriteData), nullptr, false, 0), - NKikimrProto::OK); - - bool printed = false; - for (ui32 i = 0; i < 100'000; ++i) { - testCtx.Send(new NPDisk::TEvChunkRead(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, - reservedChunk, 0, 1024, 0, nullptr)); - - const auto res = testCtx.Recv<NPDisk::TEvChunkReadResult>(); - //Ctest << res->ToString() << Endl; - if (res->Status != NKikimrProto::OK) { - ++errors; - if (!printed) { - printed = true; - Ctest << res->ToString() << Endl; - } - } - } - // Check that PDisk is in working state now - vdisk.InitFull(); - } - - Y_UNIT_TEST(TestFakeErrorPDiskManyChunkWrite) { - TActorTestContext testCtx(false); - testCtx.TestCtx.SectorMap->ImitateIoErrorProbability = 1e-4; - - const TVDiskID vDiskID(0, 1, 0, 0, 0); - const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(2, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::OK); - - ui32 errors = 0; - const auto evReserveRes = testCtx.TestResponce<NPDisk::TEvChunkReserveResult>( - new NPDisk::TEvChunkReserve(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, 1), - NKikimrProto::OK); - UNIT_ASSERT(evReserveRes->ChunkIds.size() == 1); - const ui32 reservedChunk = evReserveRes->ChunkIds.front(); - - bool printed = false; - for (ui32 i = 0; i < 100'000; ++i) { - TString data = PrepareData(1024); - testCtx.Send(new NPDisk::TEvChunkWrite(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, - reservedChunk, 0, new NPDisk::TEvChunkWrite::TStrokaBackedUpParts(data), nullptr, false, 0)); - - const auto res = testCtx.Recv<NPDisk::TEvChunkWriteResult>(); - //Ctest << res->ToString() << Endl; - if (res->Status != NKikimrProto::OK) { - ++errors; - if (!printed) { - printed = true; - Ctest << res->ToString() << Endl; - } - } else { - UNIT_ASSERT(errors == 0); - } - } - UNIT_ASSERT(errors > 0); - } - - Y_UNIT_TEST(TestSIGSEGVInTUndelivered) { - TActorTestContext testCtx(false); - const TVDiskID vDiskID(0, 1, 0, 0, 0); - const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(2, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::OK); - - TEvents::TEvUndelivered::TPtr ev = reinterpret_cast<TEventHandle<TEvents::TEvUndelivered>*>( - new IEventHandle( - testCtx.Sender, testCtx.Sender, - new TEvents::TEvUndelivered(0, 0) - ) - ); - - const auto& sender = ev->Sender; + + Y_UNIT_TEST(TestPDiskManyOwnersInitiation) { + TActorTestContext testCtx(false); + + TVector<TVDiskIDOwnerRound> goodIds; + ui64 badIdsCount = 0; + + for (int i = 2; i < 2000; ++i) { + const TVDiskID vDiskID(i, 1, 0, 0, 0); + if (badIdsCount == 0) { + testCtx.Send(new NPDisk::TEvYardInit(i, vDiskID, testCtx.TestCtx.PDiskGuid)); + const auto evInitRes = testCtx.Recv<NPDisk::TEvYardInitResult>(); + if (evInitRes->Status == NKikimrProto::OK) { + goodIds.push_back({vDiskID, evInitRes->PDiskParams->OwnerRound}); + } else { + ++badIdsCount; + } + } else { + const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(i, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::ERROR); + ++badIdsCount; + } + } + + RecreateOwner(testCtx, goodIds.front()); + + UNIT_ASSERT(badIdsCount > 0 && goodIds.size() > 0); + for (auto v : goodIds) { + testCtx.TestResponce<NPDisk::TEvSlayResult>( + new NPDisk::TEvSlay(v.VDiskID, v.OwnerRound + 1, 0, 0), + NKikimrProto::OK); + } + } + +enum class EChunkState { + UNKNOWN, + RESERVED, + COMMIT_INFLIGHT, + COMMITTED, + DELETE_INFLIGHT, + DELETED +}; + +struct TVDiskMock { + static std::atomic<ui64> Idx; + static std::atomic<ui64> OwnerRound; + + TActorTestContext *TestCtx; + const TVDiskID VDiskID; + TIntrusivePtr<TPDiskParams> PDiskParams; + ui64 LastUsedLsn = 0; + ui64 FirstLsnToKeep = 1; + + TMap<EChunkState, TSet<TChunkIdx>> Chunks; + + TVDiskMock(TActorTestContext *testCtx) + : TestCtx(testCtx) + , VDiskID(Idx.fetch_add(1), 1, 0, 0, 0) + {} + + TLsnSeg GetLsnSeg() { + ++LastUsedLsn; + return {LastUsedLsn, LastUsedLsn}; + }; + + void InitFull() { + Init(); + ReadLog(); + SendEvLogImpl(1, {}, true); + } + + void Init() { + const auto evInitRes = TestCtx->TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(OwnerRound.fetch_add(1), VDiskID, TestCtx->TestCtx.PDiskGuid), + NKikimrProto::OK); + PDiskParams = evInitRes->PDiskParams; + + TSet<TChunkIdx> commited = Chunks[EChunkState::COMMITTED]; + for (TChunkIdx chunk : evInitRes->OwnedChunks) { + UNIT_ASSERT_C(commited.count(chunk), "misowned chunk# " << chunk); + commited.erase(chunk); + } + UNIT_ASSERT_C(commited.empty(), "there are leaked chunks# " << FormatList(commited)); + } + + + void ReserveChunk() { + const auto evReserveRes = TestCtx->TestResponce<NPDisk::TEvChunkReserveResult>( + new NPDisk::TEvChunkReserve(PDiskParams->Owner, PDiskParams->OwnerRound, 1), + NKikimrProto::OK); + UNIT_ASSERT(evReserveRes->ChunkIds.size() == 1); + const ui32 reservedChunk = evReserveRes->ChunkIds.front(); + Chunks[EChunkState::RESERVED].emplace(reservedChunk); + } + + void CommitReservedChunks() { + auto& reservedChunks = Chunks[EChunkState::RESERVED]; + NPDisk::TCommitRecord rec; + rec.CommitChunks = TVector<TChunkIdx>(reservedChunks.begin(), reservedChunks.end()); + SendEvLogImpl(1, rec); + Chunks[EChunkState::COMMITTED].insert(reservedChunks.begin(), reservedChunks.end()); + reservedChunks.clear(); + } + + void DeleteCommitedChunks() { + auto& commited = Chunks[EChunkState::COMMITTED]; + NPDisk::TCommitRecord rec; + rec.DeleteChunks = TVector<TChunkIdx>(commited.begin(), commited.end()); + SendEvLogImpl(1, rec); + Chunks[EChunkState::DELETED].insert(commited.begin(), commited.end()); + commited.clear(); + } + + ui64 ReadLog(std::function<void(const NPDisk::TLogRecord&)> logResCallback = {}) { + ui64 logRecordsRead = 0; + + NPDisk::TLogPosition position{0, 0}; + bool endOfLog = false; + do { + UNIT_ASSERT(PDiskParams); + auto logReadRes = TestCtx->TestResponce<NPDisk::TEvReadLogResult>( + new NPDisk::TEvReadLog(PDiskParams->Owner, PDiskParams->OwnerRound, position), + NKikimrProto::OK); + UNIT_ASSERT(position == logReadRes->Position); + for (const NPDisk::TLogRecord& rec : logReadRes->Results) { + ++logRecordsRead; + if (logResCallback) { + logResCallback(rec); + } + LastUsedLsn = Max(LastUsedLsn, rec.Lsn); + } + position = logReadRes->NextPosition; + endOfLog = logReadRes->IsEndOfLog; + } while (!endOfLog); + + return logRecordsRead; + } + + void SendEvLogSync(const ui64 size = 128) { + SendEvLogImpl(size, {}, false); + } + + void CutLogAllButOne() { + SendEvLogImpl(1, LastUsedLsn + 1, true); + } + + ui64 OwnedLogRecords() const { + return LastUsedLsn + 1 - FirstLsnToKeep; + } + +private: + void SendEvLogImpl(const ui64 size, TMaybe<NPDisk::TCommitRecord> commitRec) { + auto evLog = MakeHolder<NPDisk::TEvLog>(PDiskParams->Owner, PDiskParams->OwnerRound, 0, PrepareData(size), + GetLsnSeg(), nullptr); + + if (commitRec) { + evLog->Signature.SetCommitRecord(); + evLog->CommitRecord = std::move(*commitRec); + } + + TestCtx->TestResponce<NPDisk::TEvLogResult>(evLog.Release(), NKikimrProto::OK); + } + + void SendEvLogImpl(const ui64 size, TMaybe<ui64> firstLsnToKeep, bool isStartingPoint) { + + TMaybe<NPDisk::TCommitRecord> rec; + + if (firstLsnToKeep || isStartingPoint) { + rec = NPDisk::TCommitRecord(); + rec->FirstLsnToKeep = firstLsnToKeep.GetOrElse(0); + FirstLsnToKeep = Max(FirstLsnToKeep, firstLsnToKeep.GetOrElse(0)); + rec->IsStartingPoint = isStartingPoint; + } + SendEvLogImpl(size, rec); + } +}; + +std::atomic<ui64> TVDiskMock::Idx = 0; +std::atomic<ui64> TVDiskMock::OwnerRound = 2; + + Y_UNIT_TEST(TestVDiskMock) { + TActorTestContext testCtx(false); + TVDiskMock mock(&testCtx); + + mock.InitFull(); + const int logsSent = 100; + for (int i = 0; i < logsSent; ++i) { + mock.SendEvLogSync(); + } + + mock.Init(); + UNIT_ASSERT(mock.ReadLog() == mock.OwnedLogRecords()); + } + + // Test to reproduce bug from KIKIMR-10192 + Y_UNIT_TEST(TestLogSpliceNonceJump) { + if constexpr (!KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE) { + return; + } + + TActorTestContext testCtx(false); + TVDiskMock sporadicVDisk(&testCtx); + TVDiskMock intensiveVDisk(&testCtx); + + sporadicVDisk.InitFull(); // writes log into logChunk# 1 + intensiveVDisk.InitFull(); + + sporadicVDisk.SendEvLogSync(); + + for (ui32 i = 0; i < 5; i++) { + do { + intensiveVDisk.SendEvLogSync(1024); + } while (!testCtx.GetPDisk()->CommonLogger->OnFirstSectorInChunk()); + } + // expect log chunks list looks like 1 -> 2 -> ... -> 6 (empty) + + testCtx.RestartPDiskSync(); // writes NonceJump + UNIT_ASSERT_C(testCtx.GetPDisk()->CommonLogger->SectorIdx == 1, "To reproduce bug nonce jump record" + " should be written in chunk's first sector"); + sporadicVDisk.InitFull(); // sends EvLog into chunk with recently written nonce jump + intensiveVDisk.InitFull(); + + { + // initiate log splicing, expect transition to be + // 1 -> 2 -> ... -> 6 + NPDisk::TPDisk *pdisk = testCtx.GetPDisk(); + UNIT_ASSERT(KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE); + UNIT_ASSERT(pdisk->LogChunks.size() == 6); + intensiveVDisk.CutLogAllButOne(); + // 1 -> 6 + pdisk->PDiskThread.StopSync(); + while (pdisk->LogChunks.size() != 2) { + pdisk->Update(); + } + } + + testCtx.RestartPDiskSync(); + intensiveVDisk.Init(); + UNIT_ASSERT_VALUES_EQUAL(intensiveVDisk.ReadLog(), intensiveVDisk.OwnedLogRecords()); + sporadicVDisk.Init(); + UNIT_ASSERT_VALUES_EQUAL(sporadicVDisk.ReadLog(), sporadicVDisk.OwnedLogRecords()); + + testCtx.Send(new NActors::TEvents::TEvPoisonPill()); + } + + // Test to reproduce bug with multiple chunk splices from + Y_UNIT_TEST(TestMultipleLogSpliceNonceJump) { + if constexpr (!KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE) { + return; + } + + TActorTestContext testCtx(false); + TVDiskMock sporadicVDisk(&testCtx); + TVDiskMock moderateVDisk(&testCtx); + TVDiskMock intensiveVDisk(&testCtx); + + sporadicVDisk.InitFull(); // writes log into logChunk# 1 + moderateVDisk.InitFull(); // writes log into logChunk# 1 + intensiveVDisk.InitFull(); + + sporadicVDisk.SendEvLogSync(); + + for (ui32 i = 0; i < 8; i++) { + bool alreadyWriteThisChunk = false; + do { + if (1 <= i && i <= 4 && !alreadyWriteThisChunk) { + alreadyWriteThisChunk = true; + moderateVDisk.SendEvLogSync(1024); + } else { + intensiveVDisk.SendEvLogSync(1024); + } + } while (!testCtx.GetPDisk()->CommonLogger->OnFirstSectorInChunk()); + } + // expect log chunks list looks like 1 -> 2 -> ... -> 6 (empty) + + testCtx.RestartPDiskSync(); // writes NonceJump + UNIT_ASSERT_C(testCtx.GetPDisk()->CommonLogger->SectorIdx == 1, "To reproduce bug nonce jump record" + " should be written in chunk's first sector"); + sporadicVDisk.InitFull(); // sends EvLog into chunk with recently written nonce jump + moderateVDisk.InitFull(); + intensiveVDisk.InitFull(); + + { + // initiate log splicing, expect transition to be + // 1 -> 2 -> ... -> 9 + NPDisk::TPDisk *pdisk = testCtx.GetPDisk(); + UNIT_ASSERT(KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE); + UNIT_ASSERT_C(pdisk->LogChunks.size() == 9, pdisk->LogChunks.size()); + intensiveVDisk.CutLogAllButOne(); + // 1 -> 2 -> 6 + pdisk->PDiskThread.StopSync(); + while (pdisk->LogChunks.size() != 6) { + pdisk->Update(); + } + } + + testCtx.RestartPDiskSync(); + moderateVDisk.Init(); + { + // initiate log splicing, expect transition to be + // 1 -> 2 -> ... -> 6 + NPDisk::TPDisk *pdisk = testCtx.GetPDisk(); + UNIT_ASSERT(KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE); + UNIT_ASSERT(pdisk->LogChunks.size() == 6); + moderateVDisk.CutLogAllButOne(); + // 1 -> 2 -> 6 + pdisk->PDiskThread.StopSync(); + while (pdisk->LogChunks.size() != 2) { + pdisk->Update(); + } + } + + testCtx.RestartPDiskSync(); + intensiveVDisk.Init(); + UNIT_ASSERT_VALUES_EQUAL(intensiveVDisk.ReadLog(), intensiveVDisk.OwnedLogRecords()); + moderateVDisk.Init(); + UNIT_ASSERT_VALUES_EQUAL(moderateVDisk.ReadLog(), moderateVDisk.OwnedLogRecords()); + sporadicVDisk.Init(); + UNIT_ASSERT_VALUES_EQUAL(sporadicVDisk.ReadLog(), sporadicVDisk.OwnedLogRecords()); + + testCtx.Send(new NActors::TEvents::TEvPoisonPill()); + } + + Y_UNIT_TEST(TestFakeErrorPDiskManyLogWrite) { + TActorTestContext testCtx(false); + testCtx.TestCtx.SectorMap->ImitateIoErrorProbability = 1e-4; + + const TVDiskID vDiskID(0, 1, 0, 0, 0); + const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(2, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::OK); + + ui32 errors = 0; + ui32 lsn = 2; + for (ui32 i = 0; i < 100'000; ++i) { + testCtx.Send(new NPDisk::TEvLog(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, 0, + TString("abc"), TLsnSeg(lsn, lsn), nullptr)); + ++lsn; + const auto logRes = testCtx.Recv<NPDisk::TEvLogResult>(); + if (logRes->Status != NKikimrProto::OK) { + ++errors; + } else { + UNIT_ASSERT(errors == 0); + } + } + UNIT_ASSERT(errors > 0); + } + + Y_UNIT_TEST(TestFakeErrorPDiskManyChunkRead) { + TActorTestContext testCtx(false); + testCtx.TestCtx.SectorMap->ImitateReadIoErrorProbability = 1e-4; + + TVDiskMock vdisk(&testCtx); + vdisk.InitFull(); + + ui32 errors = 0; + + vdisk.ReserveChunk(); + vdisk.CommitReservedChunks(); + UNIT_ASSERT(vdisk.Chunks[EChunkState::COMMITTED].size() == 1); + const ui32 reservedChunk = *vdisk.Chunks[EChunkState::COMMITTED].begin(); + + TString chunkWriteData = PrepareData(1024); + testCtx.TestResponce<NPDisk::TEvChunkWriteResult>( + new NPDisk::TEvChunkWrite(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, + reservedChunk, 0, new NPDisk::TEvChunkWrite::TStrokaBackedUpParts(chunkWriteData), nullptr, false, 0), + NKikimrProto::OK); + + bool printed = false; + for (ui32 i = 0; i < 100'000; ++i) { + testCtx.Send(new NPDisk::TEvChunkRead(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, + reservedChunk, 0, 1024, 0, nullptr)); + + const auto res = testCtx.Recv<NPDisk::TEvChunkReadResult>(); + //Ctest << res->ToString() << Endl; + if (res->Status != NKikimrProto::OK) { + ++errors; + if (!printed) { + printed = true; + Ctest << res->ToString() << Endl; + } + } + } + // Check that PDisk is in working state now + vdisk.InitFull(); + } + + Y_UNIT_TEST(TestFakeErrorPDiskManyChunkWrite) { + TActorTestContext testCtx(false); + testCtx.TestCtx.SectorMap->ImitateIoErrorProbability = 1e-4; + + const TVDiskID vDiskID(0, 1, 0, 0, 0); + const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(2, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::OK); + + ui32 errors = 0; + const auto evReserveRes = testCtx.TestResponce<NPDisk::TEvChunkReserveResult>( + new NPDisk::TEvChunkReserve(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, 1), + NKikimrProto::OK); + UNIT_ASSERT(evReserveRes->ChunkIds.size() == 1); + const ui32 reservedChunk = evReserveRes->ChunkIds.front(); + + bool printed = false; + for (ui32 i = 0; i < 100'000; ++i) { + TString data = PrepareData(1024); + testCtx.Send(new NPDisk::TEvChunkWrite(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, + reservedChunk, 0, new NPDisk::TEvChunkWrite::TStrokaBackedUpParts(data), nullptr, false, 0)); + + const auto res = testCtx.Recv<NPDisk::TEvChunkWriteResult>(); + //Ctest << res->ToString() << Endl; + if (res->Status != NKikimrProto::OK) { + ++errors; + if (!printed) { + printed = true; + Ctest << res->ToString() << Endl; + } + } else { + UNIT_ASSERT(errors == 0); + } + } + UNIT_ASSERT(errors > 0); + } + + Y_UNIT_TEST(TestSIGSEGVInTUndelivered) { + TActorTestContext testCtx(false); + const TVDiskID vDiskID(0, 1, 0, 0, 0); + const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(2, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::OK); + + TEvents::TEvUndelivered::TPtr ev = reinterpret_cast<TEventHandle<TEvents::TEvUndelivered>*>( + new IEventHandle( + testCtx.Sender, testCtx.Sender, + new TEvents::TEvUndelivered(0, 0) + ) + ); + + const auto& sender = ev->Sender; THolder<NPDisk::TUndelivered> req{testCtx.GetPDisk()->ReqCreator.CreateFromEv<NPDisk::TUndelivered>(ev, sender)}; - } - - Y_UNIT_TEST(PDiskRestart) { - TActorTestContext testCtx(false); - TVDiskMock vdisk(&testCtx); - vdisk.InitFull(); - vdisk.SendEvLogSync(); - testCtx.Send(new TEvBlobStorage::TEvRestartPDisk(testCtx.GetPDisk()->PDiskId, testCtx.MainKey, nullptr)); - const auto evInitRes = testCtx.Recv<TEvBlobStorage::TEvRestartPDiskResult>(); - vdisk.InitFull(); - vdisk.SendEvLogSync(); - } - - Y_UNIT_TEST(PDiskRestartManyLogWrites) { - TActorTestContext testCtx(false); - testCtx.TestCtx.SectorMap->ImitateIoErrorProbability = 1e-4; - - const TVDiskID vDiskID(0, 1, 0, 0, 0); - const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(2, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::OK); - - //ui32 errors = 0; - ui32 lsn = 2; - TString logData = PrepareData(4096); - for (ui32 i = 0; i < 1000; ++i) { - testCtx.Send(new NPDisk::TEvLog(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, 0, - logData, TLsnSeg(lsn, lsn), nullptr)); - if (i == 100) { - testCtx.Send(new TEvBlobStorage::TEvRestartPDisk(testCtx.GetPDisk()->PDiskId, testCtx.MainKey, nullptr)); - } - ++lsn; - } - for (ui32 i = 0; i < 100;) { - const auto logRes = testCtx.Recv<NPDisk::TEvLogResult>(); - i += logRes->Results.size(); - if (logRes->Status == NKikimrProto::OK) { - Ctest << "TEvLogResult status is ok" << Endl; - } else { - Ctest << "TEvLogResult status is error" << Endl; - } - } - testCtx.Recv<TEvBlobStorage::TEvRestartPDiskResult>(); - } - - Y_UNIT_TEST(CommitDeleteChunks) { - TActorTestContext testCtx(false); - TVDiskMock intensiveVDisk(&testCtx); - intensiveVDisk.InitFull(); - intensiveVDisk.ReserveChunk(); - intensiveVDisk.ReserveChunk(); - intensiveVDisk.CommitReservedChunks(); - intensiveVDisk.SendEvLogSync(); - intensiveVDisk.DeleteCommitedChunks(); - intensiveVDisk.InitFull(); - } - - // Test to reproduce bug from - Y_UNIT_TEST(TestLogSpliceChunkReserve) { - if constexpr (!KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE) { - return; - } - - TActorTestContext testCtx(false); - TVDiskMock intensiveVDisk(&testCtx); // idx# 1 - TVDiskMock formerVDisk(&testCtx); // idx# 2 - TVDiskMock latterVDisk(&testCtx); // idx# 3 - TVDiskMock fillerVDisk(&testCtx); - - // [1, 2, 3] -> [2] -> [1, 2, 3(+15)] -> [3(-15)] -> [1, 2, 3] -> ... -> [1, 2, 3] - // - // latter (owner 3) cuts log - // [1, 2] -> [2] -> [1, 2] *-> [1, 2] -> ... -> [1, 2, 3] - // - // former (owner 2) cuts log - // [1] *-> [1] *-> [1] -> ... -> [1, 2, 3] - - intensiveVDisk.InitFull(); - formerVDisk.InitFull(); - latterVDisk.InitFull(); - fillerVDisk.InitFull(); - - - auto logChunks = [&] () { - return testCtx.SafeRunOnPDisk([](NPDisk::TPDisk *pdisk) { - return pdisk->LogChunks.size(); - }); - }; - - const ui32 targetLogChunkCount = 9; - for (ui32 i = 0; i < targetLogChunkCount - 1; i++) { - if (i != 1 && i != 3) { - intensiveVDisk.SendEvLogSync(16); - } - - if (i != 3) { - formerVDisk.SendEvLogSync(16); - } - - if (i != 1) { - latterVDisk.SendEvLogSync(16); - } - if (i == 2) { - latterVDisk.ReserveChunk(); - latterVDisk.CommitReservedChunks(); - } - if (i == 3) { - latterVDisk.DeleteCommitedChunks(); - } - - do { - fillerVDisk.SendEvLogSync(16); - } while (logChunks() != i + 2); - } - - // To remove obstructing owner in PDisk's Log prints - fillerVDisk.CutLogAllButOne(); - - { - auto printLog = [&] () { - testCtx.SafeRunOnPDisk([](NPDisk::TPDisk *pdisk) { - TStringStream out; - for (auto& info : pdisk->LogChunks) { - out << "["; - out << info.ChunkIdx << ": "; - for (size_t i = 0; i < info.OwnerLsnRange.size(); ++i) { - const NPDisk::TLogChunkInfo::TLsnRange &range = info.OwnerLsnRange[i]; - if (range.IsPresent) { - out << i << ","; - } - } - out << "]"; - out << " -> "; - } - out << Endl; - Ctest << out.Str(); - }); - }; - - printLog(); - - UNIT_ASSERT_C(logChunks() == targetLogChunkCount, "LogChunks.size()# " << logChunks()); - - do { - latterVDisk.CutLogAllButOne(); - printLog(); - Sleep(TDuration::Seconds(1)); - } while (logChunks() != targetLogChunkCount - 1); - printLog(); - - do { - formerVDisk.CutLogAllButOne(); - printLog(); - Sleep(TDuration::Seconds(1)); - } while (logChunks() != targetLogChunkCount - 2); - printLog(); - - testCtx.RestartPDiskSync(); - } - - intensiveVDisk.InitFull(); - formerVDisk.InitFull(); - latterVDisk.InitFull(); - } - - Y_UNIT_TEST(SpaceColor) { - return; // Enable test after KIKIMR-12880 - - TActorTestContext testCtx(false); - TVDiskMock vdisk(&testCtx); - - using TColor = NKikimrBlobStorage::TPDiskSpaceColor; - for (auto color : { - TColor::GREEN, - TColor::CYAN, + } + + Y_UNIT_TEST(PDiskRestart) { + TActorTestContext testCtx(false); + TVDiskMock vdisk(&testCtx); + vdisk.InitFull(); + vdisk.SendEvLogSync(); + testCtx.Send(new TEvBlobStorage::TEvRestartPDisk(testCtx.GetPDisk()->PDiskId, testCtx.MainKey, nullptr)); + const auto evInitRes = testCtx.Recv<TEvBlobStorage::TEvRestartPDiskResult>(); + vdisk.InitFull(); + vdisk.SendEvLogSync(); + } + + Y_UNIT_TEST(PDiskRestartManyLogWrites) { + TActorTestContext testCtx(false); + testCtx.TestCtx.SectorMap->ImitateIoErrorProbability = 1e-4; + + const TVDiskID vDiskID(0, 1, 0, 0, 0); + const auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(2, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::OK); + + //ui32 errors = 0; + ui32 lsn = 2; + TString logData = PrepareData(4096); + for (ui32 i = 0; i < 1000; ++i) { + testCtx.Send(new NPDisk::TEvLog(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound, 0, + logData, TLsnSeg(lsn, lsn), nullptr)); + if (i == 100) { + testCtx.Send(new TEvBlobStorage::TEvRestartPDisk(testCtx.GetPDisk()->PDiskId, testCtx.MainKey, nullptr)); + } + ++lsn; + } + for (ui32 i = 0; i < 100;) { + const auto logRes = testCtx.Recv<NPDisk::TEvLogResult>(); + i += logRes->Results.size(); + if (logRes->Status == NKikimrProto::OK) { + Ctest << "TEvLogResult status is ok" << Endl; + } else { + Ctest << "TEvLogResult status is error" << Endl; + } + } + testCtx.Recv<TEvBlobStorage::TEvRestartPDiskResult>(); + } + + Y_UNIT_TEST(CommitDeleteChunks) { + TActorTestContext testCtx(false); + TVDiskMock intensiveVDisk(&testCtx); + intensiveVDisk.InitFull(); + intensiveVDisk.ReserveChunk(); + intensiveVDisk.ReserveChunk(); + intensiveVDisk.CommitReservedChunks(); + intensiveVDisk.SendEvLogSync(); + intensiveVDisk.DeleteCommitedChunks(); + intensiveVDisk.InitFull(); + } + + // Test to reproduce bug from + Y_UNIT_TEST(TestLogSpliceChunkReserve) { + if constexpr (!KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE) { + return; + } + + TActorTestContext testCtx(false); + TVDiskMock intensiveVDisk(&testCtx); // idx# 1 + TVDiskMock formerVDisk(&testCtx); // idx# 2 + TVDiskMock latterVDisk(&testCtx); // idx# 3 + TVDiskMock fillerVDisk(&testCtx); + + // [1, 2, 3] -> [2] -> [1, 2, 3(+15)] -> [3(-15)] -> [1, 2, 3] -> ... -> [1, 2, 3] + // + // latter (owner 3) cuts log + // [1, 2] -> [2] -> [1, 2] *-> [1, 2] -> ... -> [1, 2, 3] + // + // former (owner 2) cuts log + // [1] *-> [1] *-> [1] -> ... -> [1, 2, 3] + + intensiveVDisk.InitFull(); + formerVDisk.InitFull(); + latterVDisk.InitFull(); + fillerVDisk.InitFull(); + + + auto logChunks = [&] () { + return testCtx.SafeRunOnPDisk([](NPDisk::TPDisk *pdisk) { + return pdisk->LogChunks.size(); + }); + }; + + const ui32 targetLogChunkCount = 9; + for (ui32 i = 0; i < targetLogChunkCount - 1; i++) { + if (i != 1 && i != 3) { + intensiveVDisk.SendEvLogSync(16); + } + + if (i != 3) { + formerVDisk.SendEvLogSync(16); + } + + if (i != 1) { + latterVDisk.SendEvLogSync(16); + } + if (i == 2) { + latterVDisk.ReserveChunk(); + latterVDisk.CommitReservedChunks(); + } + if (i == 3) { + latterVDisk.DeleteCommitedChunks(); + } + + do { + fillerVDisk.SendEvLogSync(16); + } while (logChunks() != i + 2); + } + + // To remove obstructing owner in PDisk's Log prints + fillerVDisk.CutLogAllButOne(); + + { + auto printLog = [&] () { + testCtx.SafeRunOnPDisk([](NPDisk::TPDisk *pdisk) { + TStringStream out; + for (auto& info : pdisk->LogChunks) { + out << "["; + out << info.ChunkIdx << ": "; + for (size_t i = 0; i < info.OwnerLsnRange.size(); ++i) { + const NPDisk::TLogChunkInfo::TLsnRange &range = info.OwnerLsnRange[i]; + if (range.IsPresent) { + out << i << ","; + } + } + out << "]"; + out << " -> "; + } + out << Endl; + Ctest << out.Str(); + }); + }; + + printLog(); + + UNIT_ASSERT_C(logChunks() == targetLogChunkCount, "LogChunks.size()# " << logChunks()); + + do { + latterVDisk.CutLogAllButOne(); + printLog(); + Sleep(TDuration::Seconds(1)); + } while (logChunks() != targetLogChunkCount - 1); + printLog(); + + do { + formerVDisk.CutLogAllButOne(); + printLog(); + Sleep(TDuration::Seconds(1)); + } while (logChunks() != targetLogChunkCount - 2); + printLog(); + + testCtx.RestartPDiskSync(); + } + + intensiveVDisk.InitFull(); + formerVDisk.InitFull(); + latterVDisk.InitFull(); + } + + Y_UNIT_TEST(SpaceColor) { + return; // Enable test after KIKIMR-12880 + + TActorTestContext testCtx(false); + TVDiskMock vdisk(&testCtx); + + using TColor = NKikimrBlobStorage::TPDiskSpaceColor; + for (auto color : { + TColor::GREEN, + TColor::CYAN, TColor::LIGHT_YELLOW, - TColor::YELLOW, - TColor::LIGHT_ORANGE, - TColor::ORANGE, - TColor::RED, - //TColor::BLACK, - } ){ - auto pdiskConfig = testCtx.GetPDiskConfig(); - pdiskConfig->SpaceColorBorder = color; - pdiskConfig->ExpectedSlotCount = 10; - testCtx.UpdateConfigRecreatePDisk(pdiskConfig); - - vdisk.InitFull(); - auto initialSpace = testCtx.TestResponce<NPDisk::TEvCheckSpaceResult>( - new NPDisk::TEvCheckSpace(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound), - NKikimrProto::OK); - for (ui32 i = 0; i < initialSpace->FreeChunks + 1; ++i) { - vdisk.ReserveChunk(); - } - vdisk.CommitReservedChunks(); - auto resultSpace = testCtx.TestResponce<NPDisk::TEvCheckSpaceResult>( - new NPDisk::TEvCheckSpace(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound), - NKikimrProto::OK); - UNIT_ASSERT(color == StatusFlagToSpaceColor(resultSpace->StatusFlags)); - vdisk.DeleteCommitedChunks(); - } - } - - Y_UNIT_TEST(DeviceHaltTooLong) { - TActorTestContext testCtx(false); - testCtx.TestCtx.SectorMap->ImitateRandomWait = {TDuration::Seconds(1), TDuration::Seconds(2)}; - - TVDiskMock mock(&testCtx); - - mock.InitFull(); - const int logsSent = 100; - for (int i = 0; i < logsSent; ++i) { - mock.SendEvLogSync(); - } - - mock.Init(); - UNIT_ASSERT(mock.ReadLog() == mock.OwnedLogRecords()); - } - - Y_UNIT_TEST(TestPDiskOnDifferentKeys) { - TActorTestContext testCtx(false); - - int round = 2; - const TVDiskID vDiskID(0, 1, 0, 0, 0); - auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(round, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::OK); - testCtx.TestResponce<NPDisk::TEvCheckSpaceResult>( - new NPDisk::TEvCheckSpace(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound), - NKikimrProto::OK); - round = evInitRes->PDiskParams->OwnerRound + 1; - - testCtx.MainKey += 123; - testCtx.UpdateConfigRecreatePDisk(testCtx.GetPDiskConfig()); - - evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( - new NPDisk::TEvYardInit(round, vDiskID, testCtx.TestCtx.PDiskGuid), - NKikimrProto::CORRUPTED); - testCtx.TestResponce<NPDisk::TEvCheckSpaceResult>( - new NPDisk::TEvCheckSpace(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound), - NKikimrProto::CORRUPTED); - } -} + TColor::YELLOW, + TColor::LIGHT_ORANGE, + TColor::ORANGE, + TColor::RED, + //TColor::BLACK, + } ){ + auto pdiskConfig = testCtx.GetPDiskConfig(); + pdiskConfig->SpaceColorBorder = color; + pdiskConfig->ExpectedSlotCount = 10; + testCtx.UpdateConfigRecreatePDisk(pdiskConfig); + + vdisk.InitFull(); + auto initialSpace = testCtx.TestResponce<NPDisk::TEvCheckSpaceResult>( + new NPDisk::TEvCheckSpace(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound), + NKikimrProto::OK); + for (ui32 i = 0; i < initialSpace->FreeChunks + 1; ++i) { + vdisk.ReserveChunk(); + } + vdisk.CommitReservedChunks(); + auto resultSpace = testCtx.TestResponce<NPDisk::TEvCheckSpaceResult>( + new NPDisk::TEvCheckSpace(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound), + NKikimrProto::OK); + UNIT_ASSERT(color == StatusFlagToSpaceColor(resultSpace->StatusFlags)); + vdisk.DeleteCommitedChunks(); + } + } + + Y_UNIT_TEST(DeviceHaltTooLong) { + TActorTestContext testCtx(false); + testCtx.TestCtx.SectorMap->ImitateRandomWait = {TDuration::Seconds(1), TDuration::Seconds(2)}; + + TVDiskMock mock(&testCtx); + + mock.InitFull(); + const int logsSent = 100; + for (int i = 0; i < logsSent; ++i) { + mock.SendEvLogSync(); + } + + mock.Init(); + UNIT_ASSERT(mock.ReadLog() == mock.OwnedLogRecords()); + } + + Y_UNIT_TEST(TestPDiskOnDifferentKeys) { + TActorTestContext testCtx(false); + + int round = 2; + const TVDiskID vDiskID(0, 1, 0, 0, 0); + auto evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(round, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::OK); + testCtx.TestResponce<NPDisk::TEvCheckSpaceResult>( + new NPDisk::TEvCheckSpace(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound), + NKikimrProto::OK); + round = evInitRes->PDiskParams->OwnerRound + 1; + + testCtx.MainKey += 123; + testCtx.UpdateConfigRecreatePDisk(testCtx.GetPDiskConfig()); + + evInitRes = testCtx.TestResponce<NPDisk::TEvYardInitResult>( + new NPDisk::TEvYardInit(round, vDiskID, testCtx.TestCtx.PDiskGuid), + NKikimrProto::CORRUPTED); + testCtx.TestResponce<NPDisk::TEvCheckSpaceResult>( + new NPDisk::TEvCheckSpace(evInitRes->PDiskParams->Owner, evInitRes->PDiskParams->OwnerRound), + NKikimrProto::CORRUPTED); + } +} } // namespace NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.h index 7f51af260c..6bf5c0695d 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.h @@ -1,14 +1,14 @@ #pragma once #include "defs.h" -#include "blobstorage_pdisk_ut_defs.h" -#include "blobstorage_pdisk_blockdevice.h" +#include "blobstorage_pdisk_ut_defs.h" +#include "blobstorage_pdisk_blockdevice.h" #include <ydb/library/pdisk_io/buffers.h> -#include "blobstorage_pdisk_data.h" -#include "blobstorage_pdisk.h" -#include "blobstorage_pdisk_mon.h" -#include "blobstorage_pdisk_tools.h" -#include "blobstorage_pdisk_ut_helpers.h" +#include "blobstorage_pdisk_data.h" +#include "blobstorage_pdisk.h" +#include "blobstorage_pdisk_mon.h" +#include "blobstorage_pdisk_tools.h" +#include "blobstorage_pdisk_ut_helpers.h" #include <ydb/core/base/counters.h> #include <ydb/core/base/tablet.h> @@ -22,16 +22,16 @@ #include <ydb/core/protos/services.pb.h> #include <library/cpp/actors/core/actor_bootstrapped.h> -#include <library/cpp/actors/core/event_local.h> +#include <library/cpp/actors/core/event_local.h> #include <library/cpp/actors/core/events.h> #include <library/cpp/actors/core/executor_pool_basic.h> #include <library/cpp/actors/core/executor_pool_io.h> #include <library/cpp/actors/core/hfunc.h> #include <library/cpp/actors/core/log.h> -#include <library/cpp/actors/core/mon.h> +#include <library/cpp/actors/core/mon.h> #include <library/cpp/actors/core/scheduler_basic.h> #include <library/cpp/actors/interconnect/interconnect.h> -#include <library/cpp/actors/protos/services_common.pb.h> +#include <library/cpp/actors/protos/services_common.pb.h> #include <library/cpp/actors/util/affinity.h> #include <library/cpp/svnversion/svnversion.h> #include <library/cpp/testing/unittest/registar.h> @@ -39,14 +39,14 @@ #include <util/folder/dirut.h> #include <util/generic/hash.h> -#include <util/generic/queue.h> +#include <util/generic/queue.h> #include <util/generic/string.h> #include <util/generic/yexception.h> #include <util/generic/ymath.h> #include <util/random/entropy.h> #include <util/stream/input.h> #include <util/string/printf.h> -#include <util/system/backtrace.h> +#include <util/system/backtrace.h> #include <util/system/defaults.h> #include <util/system/event.h> #include <util/system/sanitizers.h> diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_actions.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_actions.cpp index 687b809362..305cd7a8ea 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_actions.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_actions.cpp @@ -2,7 +2,7 @@ namespace NKikimr { -void TTestInitCorruptedError::TestFSM(const TActorContext &ctx) { +void TTestInitCorruptedError::TestFSM(const TActorContext &ctx) { VERBOSE_COUT("Test step " << TestStep); switch (TestStep) { case 0: @@ -13,18 +13,18 @@ void TTestInitCorruptedError::TestFSM(const TActorContext &ctx) { ctx.Send(Yard, new NPDisk::TEvYardInit(3, VDiskID, *PDiskGuid)); break; case 10: - TEST_RESPONSE(EvYardInitResult, CORRUPTED); + TEST_RESPONSE(EvYardInitResult, CORRUPTED); VERBOSE_COUT(" Sending TEvInit again"); ctx.Send(Yard, new NPDisk::TEvYardInit(4, VDiskID, *PDiskGuid)); break; case 20: - TEST_RESPONSE(EvYardInitResult, CORRUPTED); + TEST_RESPONSE(EvYardInitResult, CORRUPTED); break; case 30: - TEST_RESPONSE(EvYardInitResult, CORRUPTED); + TEST_RESPONSE(EvYardInitResult, CORRUPTED); break; case 40: - TEST_RESPONSE(EvYardInitResult, CORRUPTED); + TEST_RESPONSE(EvYardInitResult, CORRUPTED); VERBOSE_COUT("Done"); SignalDoneEvent(); @@ -126,7 +126,7 @@ void TTestIncorrectRequests::TestFSM(const TActorContext &ctx) { case 20: TEST_RESPONSE(EvLogResult, INVALID_OWNER); VERBOSE_COUT(" Sending TEvLogRead"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 100500})); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 100500})); break; case 30: { @@ -307,13 +307,13 @@ void TTestEmptyLogRead::TestFSM(const TActorContext &ctx) { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; VERBOSE_COUT(" Sending TEvLogRead"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); break; case 20: TEST_RESPONSE(EvReadLogResult, OK); ASSERT_YTHROW(LastResponse.LogRecords.size() == 0, "Unexpected LogRecords size == " << LastResponse.LogRecords.size()); - ASSERT_YTHROW(LastResponse.IsEndOfLog, + ASSERT_YTHROW(LastResponse.IsEndOfLog, "Unexpected IsEndOfLog = " << (int)LastResponse.IsEndOfLog); VERBOSE_COUT("Done"); @@ -342,25 +342,25 @@ void TTestChunkWriteReadWhole::TestFSM(const TActorContext &ctx) { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; ChunkSize = LastResponse.ChunkSize; - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); - break; - } - case 20: - { - TEST_RESPONSE(EvChunkReserveResult, OK); - ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, - "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); - ChunkIdx = LastResponse.ChunkIds[0]; + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); + break; + } + case 20: + { + TEST_RESPONSE(EvChunkReserveResult, OK); + ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, + "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); + ChunkIdx = LastResponse.ChunkIds[0]; VERBOSE_COUT(" Sending TEvChunkWrite"); ChunkWriteParts.Reset(new NPDisk::TEvChunkWrite::TPart[1]); ChunkWriteData = PrepareData(ChunkSize); ChunkWriteParts[0].Data = ChunkWriteData.data(); ChunkWriteParts[0].Size = (ui32)ChunkWriteData.size(); - ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, + ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)42, false, 1)); break; } - case 30: + case 30: { TEST_RESPONSE(EvChunkWriteResult, OK); ASSERT_YTHROW(LastResponse.Cookie == (void*)42, "Unexpected cookie=" << LastResponse.Cookie); @@ -370,12 +370,12 @@ void TTestChunkWriteReadWhole::TestFSM(const TActorContext &ctx) { ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, commitRecord, data2, TLsnSeg(1, 1), (void*)43)); break; } - case 40: + case 40: TEST_RESPONSE(EvLogResult, OK); VERBOSE_COUT(" Sending TEvChunkRead"); ctx.Send(Yard, new NPDisk::TEvChunkRead(Owner, OwnerRound, ChunkIdx, 0, ChunkSize, 1, nullptr)); break; - case 50: + case 50: { TEST_RESPONSE(EvChunkReadResult, OK); TString expectedData = ChunkWriteData; @@ -516,16 +516,16 @@ void TTestChunkRecommit::TestFSM(const TActorContext &ctx) { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; BlockSize = LastResponse.AppendBlockSize; - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); - break; - } - case 20: - { - TEST_RESPONSE(EvChunkReserveResult, OK); - ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, - "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); - ChunkIdx = LastResponse.ChunkIds[0]; - VERBOSE_COUT(" Sending TEvChunkWrite"); + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); + break; + } + case 20: + { + TEST_RESPONSE(EvChunkReserveResult, OK); + ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, + "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); + ChunkIdx = LastResponse.ChunkIds[0]; + VERBOSE_COUT(" Sending TEvChunkWrite"); ChunkWriteData1 = PrepareData(BlockSize * 2); ChunkWriteData2 = PrepareData(BlockSize); Commit1Data = PrepareData(5030); @@ -534,11 +534,11 @@ void TTestChunkRecommit::TestFSM(const TActorContext &ctx) { ChunkWriteParts.Reset(new NPDisk::TEvChunkWrite::TPart[1]); ChunkWriteParts[0].Data = ChunkWriteData1.data(); ChunkWriteParts[0].Size = (ui32)ChunkWriteData1.size(); - ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, + ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)42, false, 1)); break; } - case 30: + case 30: { TEST_RESPONSE(EvChunkWriteResult, OK); ASSERT_YTHROW(LastResponse.Cookie == (void*)42, "Unexpected cookie=" << LastResponse.Cookie); @@ -550,7 +550,7 @@ void TTestChunkRecommit::TestFSM(const TActorContext &ctx) { (void*)43)); break; } - case 40: + case 40: TEST_RESPONSE(EvLogResult, OK); ChunkWriteParts.Reset(new NPDisk::TEvChunkWrite::TPart[1]); ChunkWriteParts[0].Data = ChunkWriteData2.data(); @@ -559,7 +559,7 @@ void TTestChunkRecommit::TestFSM(const TActorContext &ctx) { ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, (ui32)ChunkWriteData1.size(), new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)42, false, 1)); break; - case 50: + case 50: { TEST_RESPONSE(EvChunkWriteResult, OK); ASSERT_YTHROW(LastResponse.Cookie == (void*)42, "Unexpected cookie=" << LastResponse.Cookie); @@ -570,13 +570,13 @@ void TTestChunkRecommit::TestFSM(const TActorContext &ctx) { (void*)43)); break; } - case 60: + case 60: TEST_RESPONSE(EvLogResult, OK); VERBOSE_COUT(" Sending TEvChunkRead"); ctx.Send(Yard, new NPDisk::TEvChunkRead(Owner, OwnerRound, ChunkIdx, 0, (ui32)ChunkData.size(), 1, nullptr)); break; - case 70: + case 70: TEST_RESPONSE(EvChunkReadResult, OK); TEST_DATA_EQUALS(LastResponse.Data.ToString(), ChunkData); VERBOSE_COUT("Done"); @@ -604,24 +604,24 @@ void TTestChunkRestartRecommit1::TestFSM(const TActorContext &ctx) { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; ChunkWriteData1 = PrepareData(LastResponse.AppendBlockSize); - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); - break; - case 20: - { - TEST_RESPONSE(EvChunkReserveResult, OK); - ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, - "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); - ChunkIdx = LastResponse.ChunkIds[0]; + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); + break; + case 20: + { + TEST_RESPONSE(EvChunkReserveResult, OK); + ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, + "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); + ChunkIdx = LastResponse.ChunkIds[0]; Commit1Data = PrepareData(5030); VERBOSE_COUT(" Sending TEvChunkWrite"); ChunkWriteParts.Reset(new NPDisk::TEvChunkWrite::TPart[1]); ChunkWriteParts[0].Data = ChunkWriteData1.data(); ChunkWriteParts[0].Size = (ui32)ChunkWriteData1.size(); - ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, + ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)42, true, 1)); break; } - case 30: + case 30: { TEST_RESPONSE(EvChunkWriteResult, OK); ASSERT_YTHROW(LastResponse.Cookie == (void*)42, "Unexpected cookie=" << LastResponse.Cookie); @@ -635,7 +635,7 @@ void TTestChunkRestartRecommit1::TestFSM(const TActorContext &ctx) { (void*)43)); break; } - case 40: + case 40: TEST_RESPONSE(EvLogResult, OK); VERBOSE_COUT("Done"); SignalDoneEvent(); @@ -804,7 +804,7 @@ void TTestChunkDelete2::TestFSM(const TActorContext &ctx) { ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1000)); break; case 30: - TEST_RESPONSE(EvChunkReserveResult, OUT_OF_SPACE); + TEST_RESPONSE(EvChunkReserveResult, OUT_OF_SPACE); VERBOSE_COUT("Done"); SignalDoneEvent(); break; @@ -998,7 +998,7 @@ void TTestChunksLockUnlockReserve::TestFSM(const TActorContext &ctx) { ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); break; case 50: - TEST_RESPONSE(EvChunkReserveResult, OUT_OF_SPACE); + TEST_RESPONSE(EvChunkReserveResult, OUT_OF_SPACE); ctx.Send(Yard, new NPDisk::TEvChunksUnlock()); break; case 60: @@ -1057,8 +1057,8 @@ void TTestHttpInfoFileDoesntExist::TestFSM(const TActorContext &ctx) { ctx.Send(Yard, new NPDisk::TEvYardInit(2, VDiskID, *PDiskGuid)); break; case 10: - TEST_RESPONSE(EvYardInitResult, CORRUPTED); - ASSERT_YTHROW(LastResponse.Status == NKikimrProto::CORRUPTED, StatusToString(LastResponse.Status)); + TEST_RESPONSE(EvYardInitResult, CORRUPTED); + ASSERT_YTHROW(LastResponse.Status == NKikimrProto::CORRUPTED, StatusToString(LastResponse.Status)); VERBOSE_COUT("Sending TEvHttpInfo"); ctx.Send(Yard, new NMon::TEvHttpInfo(MonService2HttpRequest)); break; @@ -1110,11 +1110,11 @@ void TTestBootingState::TestFSM(const TActorContext &ctx) { } else { ASSERT_YTHROW(false, "Unexpecter response type"); } - VERBOSE_COUT("EvYardAnswered# " << EvYardAnswered << " EvHttpInfo, answered " << AnsweredRequests - << " of " << HttpRequestsCount << " requests answered: " - << BootingAnsweredRequests << " requests answered in Booting state " - << OKAnsweredRequests << " requests answered in OK state " - << ErrorAnsweredRequests << " requests answered in Error state" << Endl); + VERBOSE_COUT("EvYardAnswered# " << EvYardAnswered << " EvHttpInfo, answered " << AnsweredRequests + << " of " << HttpRequestsCount << " requests answered: " + << BootingAnsweredRequests << " requests answered in Booting state " + << OKAnsweredRequests << " requests answered in OK state " + << ErrorAnsweredRequests << " requests answered in Error state" << Endl); if (AnsweredRequests == HttpRequestsCount && EvYardAnswered) { VERBOSE_COUT("Done"); SignalDoneEvent(); @@ -1132,19 +1132,19 @@ void TTestWhiteboard::TestFSM(const TActorContext &ctx) { case 0: { ASSERT_YTHROW(LastResponse.Status == NKikimrProto::OK, StatusToString(LastResponse.Status)); - TActorId whiteboardID = NNodeWhiteboard::MakeNodeWhiteboardServiceId(SelfId().NodeId()); - ctx.ExecutorThread.ActorSystem->RegisterLocalService(whiteboardID, SelfId()); - TActorId nodeWardenId = MakeBlobStorageNodeWardenID(SelfId().NodeId()); - ctx.ExecutorThread.ActorSystem->RegisterLocalService(nodeWardenId, SelfId()); + TActorId whiteboardID = NNodeWhiteboard::MakeNodeWhiteboardServiceId(SelfId().NodeId()); + ctx.ExecutorThread.ActorSystem->RegisterLocalService(whiteboardID, SelfId()); + TActorId nodeWardenId = MakeBlobStorageNodeWardenID(SelfId().NodeId()); + ctx.ExecutorThread.ActorSystem->RegisterLocalService(nodeWardenId, SelfId()); for (int owner = 0; owner < ExpectedOwnerCount; ++owner) { - ctx.Send(Yard, new NPDisk::TEvYardInit(2, TVDiskID(0, 0, 0, 0, owner), *PDiskGuid, TActorId(), SelfId())); + ctx.Send(Yard, new NPDisk::TEvYardInit(2, TVDiskID(0, 0, 0, 0, owner), *PDiskGuid, TActorId(), SelfId())); } TestStep += 10; break; } case 10: ReceiveEvent(); - if (IsPDiskResultReceived && RemainingVDiskResults == 0 + if (IsPDiskResultReceived && RemainingVDiskResults == 0 && IsDiskMetricsResultReceived && IsPDiskLightsResultReceived) { LastResponse.Status = NKikimrProto::OK; SignalDoneEvent(); @@ -1163,33 +1163,33 @@ void TTestWhiteboard::TestFSM(const TActorContext &ctx) { void TTestWhiteboard::ReceiveEvent() { if (LastResponse.whiteboardPDiskResult) { ASSERT_YTHROW(LastResponse.whiteboardPDiskResult->Type() == - NNodeWhiteboard::TEvWhiteboard::EvPDiskStateUpdate, "Unexpected message"); + NNodeWhiteboard::TEvWhiteboard::EvPDiskStateUpdate, "Unexpected message"); if (LastResponse.whiteboardPDiskResult->Record.HasPDiskId() - && LastResponse.whiteboardPDiskResult->Record.HasAvailableSize() - && LastResponse.whiteboardPDiskResult->Record.HasTotalSize() - && LastResponse.whiteboardPDiskResult->Record.HasState()) { + && LastResponse.whiteboardPDiskResult->Record.HasAvailableSize() + && LastResponse.whiteboardPDiskResult->Record.HasTotalSize() + && LastResponse.whiteboardPDiskResult->Record.HasState()) { IsPDiskResultReceived = true; VERBOSE_COUT("Received PDiskResult"); } else if (LastResponse.whiteboardPDiskResult->Record.HasPDiskId() - && LastResponse.whiteboardPDiskResult->Record.HasRealtime() - && LastResponse.whiteboardPDiskResult->Record.HasDevice()) { + && LastResponse.whiteboardPDiskResult->Record.HasRealtime() + && LastResponse.whiteboardPDiskResult->Record.HasDevice()) { IsPDiskLightsResultReceived = true; VERBOSE_COUT("Received PDiskLightsResult"); } - } - - if (LastResponse.whiteboardVDiskResult && LastResponse.whiteboardVDiskResult->Record.HasAllocatedSize()) { + } + + if (LastResponse.whiteboardVDiskResult && LastResponse.whiteboardVDiskResult->Record.HasAllocatedSize()) { ASSERT_YTHROW(LastResponse.whiteboardVDiskResult->Type() == - NNodeWhiteboard::TEvWhiteboard::EvVDiskStateUpdate, "Unexpected message"); + NNodeWhiteboard::TEvWhiteboard::EvVDiskStateUpdate, "Unexpected message"); const TVDiskID vDiskID = VDiskIDFromVDiskID(LastResponse.whiteboardVDiskResult->Record.GetVDiskId()); ui32 vDisk = vDiskID.VDisk; - VERBOSE_COUT("VDiskResult received, VDiskId# " << vDiskID << " vDisk#" << vDisk); - if (--RemainingVDiskResults == 0) { + VERBOSE_COUT("VDiskResult received, VDiskId# " << vDiskID << " vDisk#" << vDisk); + if (--RemainingVDiskResults == 0) { VERBOSE_COUT("Received all VDiskResults"); } - } - if (LastResponse.whiteboardDiskMetricsResult) { + } + if (LastResponse.whiteboardDiskMetricsResult) { ASSERT_YTHROW(LastResponse.whiteboardDiskMetricsResult->Type() == TEvBlobStorage::EvControllerUpdateDiskStatus, "Unexpected message"); IsDiskMetricsResultReceived = true; @@ -1249,7 +1249,7 @@ void TTestFirstRecordToKeepReadB::TestFSM(const TActorContext &ctx) { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; VERBOSE_COUT(" Sending TEvLogRead"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); break; case 20: Data = PrepareData(100500); @@ -1467,15 +1467,15 @@ void TTestHugeChunkAndLotsOfTinyAsyncLogOrder::TestFSM(const TActorContext &ctx) Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; BlockSize = LastResponse.AppendBlockSize; - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); - break; - } - case 20: - { - TEST_RESPONSE(EvChunkReserveResult, OK); - ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, - "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); - ChunkIdx = LastResponse.ChunkIds[0]; + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); + break; + } + case 20: + { + TEST_RESPONSE(EvChunkReserveResult, OK); + ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, + "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); + ChunkIdx = LastResponse.ChunkIds[0]; VERBOSE_COUT(" Sending TEvYardControl Pause"); ctx.Send(Yard, new NPDisk::TEvYardControl(NPDisk::TEvYardControl::ActionPause, nullptr)); VERBOSE_COUT(" Sending TEvChunkWrite"); @@ -1484,7 +1484,7 @@ void TTestHugeChunkAndLotsOfTinyAsyncLogOrder::TestFSM(const TActorContext &ctx) ChunkWriteData = PrepareData(TotalDataSize); ChunkWriteParts[0].Data = ChunkWriteData.data(); ChunkWriteParts[0].Size = (ui32)ChunkWriteData.size(); - ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, + ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)42, false, 6)); VERBOSE_COUT(" Sending TEvLog messages"); MessagesToSend = TotalDataSize / Data.size(); @@ -1501,14 +1501,14 @@ void TTestHugeChunkAndLotsOfTinyAsyncLogOrder::TestFSM(const TActorContext &ctx) Responses = 0; break; } - case 30: + case 30: TEST_RESPONSE(EvYardControlResult, OK); break; - case 40: + case 40: TEST_RESPONSE(EvYardControlResult, OK); break; case 50: - case 60: + case 60: if (LastResponse.EventType == TEvBlobStorage::EvLogResult) { TEST_RESPONSE(EvLogResult, OK); TInstant currentTime = Now(); @@ -1577,13 +1577,13 @@ void TTestHugeChunkAndLotsOfTinyAsyncLogOrder::TestFSM(const TActorContext &ctx) break; } [[fallthrough]]; - case 70: + case 70: TEST_RESPONSE(EvLogResult, OK); VERBOSE_COUT(" Sending TEvChunkRead"); ctx.Send(Yard, new NPDisk::TEvChunkRead(Owner, OwnerRound, ChunkIdx, 0, (ui32)ChunkWriteData.size(), 1, nullptr)); break; - case 80: + case 80: TEST_RESPONSE(EvChunkReadResult, OK); ASSERT_YTHROW(LastResponse.ChunkIdx == ChunkIdx, "Unexpected chunkIdx=" << LastResponse.ChunkIdx); TEST_DATA_EQUALS(LastResponse.Data.ToString(), ChunkWriteData); @@ -1613,13 +1613,13 @@ void TTestChunkPriorityBlock::TestFSM(const TActorContext &ctx) { SafeSize = ((ENABLE_VALGRIND_REQUESTS | IS_SLOW_MACHINE) ? (8 << 20) : (128 << 20)) / LastResponse.AppendBlockSize * LastResponse.AppendBlockSize; ChunkWriteData = PrepareData(SafeSize); - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 5)); - break; - case 20: - TEST_RESPONSE(EvChunkReserveResult, OK); - ASSERT_YTHROW(LastResponse.ChunkIds.size() == PausedChunkWrites, - "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); - ChunkIds = LastResponse.ChunkIds; + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 5)); + break; + case 20: + TEST_RESPONSE(EvChunkReserveResult, OK); + ASSERT_YTHROW(LastResponse.ChunkIds.size() == PausedChunkWrites, + "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); + ChunkIds = LastResponse.ChunkIds; ChunkWriteParts.Reset(new NPDisk::TEvChunkWrite::TPart[1]); ChunkWriteParts[0].Data = ChunkWriteData.data(); @@ -1628,23 +1628,23 @@ void TTestChunkPriorityBlock::TestFSM(const TActorContext &ctx) { VERBOSE_COUT(" Sending TEvYardControl Pause"); ctx.Send(Yard, new NPDisk::TEvYardControl(NPDisk::TEvYardControl::ActionPause, nullptr)); - for (ui32 i = 0; i < PausedChunkWrites; ++i) { - bool last = i == PausedChunkWrites - 1; - ui8 priority = last ? 6 : 5; - ui64 cookie = last ? 1 : 2; - VERBOSE_COUT(" Sending TEvChunkWrite cookie# " << cookie << " priority# " << (int)priority); - ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIds[i], 0, - new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)cookie, true, priority)); - } + for (ui32 i = 0; i < PausedChunkWrites; ++i) { + bool last = i == PausedChunkWrites - 1; + ui8 priority = last ? 6 : 5; + ui64 cookie = last ? 1 : 2; + VERBOSE_COUT(" Sending TEvChunkWrite cookie# " << cookie << " priority# " << (int)priority); + ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIds[i], 0, + new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)cookie, true, priority)); + } VERBOSE_COUT(" Sending TEvYardControl Resume"); ctx.Send(Yard, new NPDisk::TEvYardControl(NPDisk::TEvYardControl::ActionResume, nullptr)); break; case 30: - case 40: + case 40: TEST_RESPONSE(EvYardControlResult, OK); break; - case 50: + case 50: if (LastResponse.EventType == TEvBlobStorage::EvChunkWriteResult) { TEST_RESPONSE(EvChunkWriteResult, OK); @@ -1656,9 +1656,9 @@ void TTestChunkPriorityBlock::TestFSM(const TActorContext &ctx) { ++Iteration; ASSERT_YTHROW(Iteration < 30, "ERROR: Low priority write is blocked."); - ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIds[0], 0, + ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIds[0], 0, new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)2, true, 1)); - VERBOSE_COUT(" Sending TEvChunkWrite cookie# 2 priority# 1"); + VERBOSE_COUT(" Sending TEvChunkWrite cookie# 2 priority# 1"); } return; default: @@ -1723,7 +1723,7 @@ void TTestLogDamageSector3Append1::TestFSM(const TActorContext &ctx) { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; VERBOSE_COUT(" Sending TEvReadLog"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); break; case 20: { @@ -1761,7 +1761,7 @@ void TTestLogRead2Sectors::TestFSM(const TActorContext &ctx) { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; VERBOSE_COUT(" Sending TEvReadLog"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); break; case 20: { @@ -1856,7 +1856,7 @@ void TTestLogKeep5Plus1::TestFSM(const TActorContext &ctx) { ChunkSize = LastResponse.ChunkSize; VERBOSE_COUT(" Owner=" << (int)Owner << " ChunkSize=" << ChunkSize); VERBOSE_COUT(" Sending TEvReadLog"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0}, ChunkSize * 2)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0}, ChunkSize * 2)); break; } case 20: @@ -1923,7 +1923,7 @@ void TTestLogReadRecords2To5::TestFSM(const TActorContext &ctx) { "Expected 1 starting point, got " << LastResponse.StartingPoints.size()); VERBOSE_COUT(" Sending TEvReadLog"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0}, ChunkSize * 2)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0}, ChunkSize * 2)); break; case 20: { @@ -1947,9 +1947,9 @@ void TTestLogReadRecords2To5::TestFSM(const TActorContext &ctx) { TestStep += 10; } -std::atomic<ui32> TTestSysLogReordering::VDiskNum = 0; -std::atomic<ui32> TTestSysLogReorderingLogCheck::VDiskNum = 0; - +std::atomic<ui32> TTestSysLogReordering::VDiskNum = 0; +std::atomic<ui32> TTestSysLogReorderingLogCheck::VDiskNum = 0; + void TTestWriteAndReleaseChunk2A::TestFSM(const TActorContext &ctx) { VERBOSE_COUT("Test step " << TestStep); switch (TestStep) { @@ -1964,14 +1964,14 @@ void TTestWriteAndReleaseChunk2A::TestFSM(const TActorContext &ctx) { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; AppendBlockSize = LastResponse.AppendBlockSize; - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); - break; - } - case 20: - TEST_RESPONSE(EvChunkReserveResult, OK); - ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, - "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); - ChunkIdx = LastResponse.ChunkIds[0]; + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); + break; + } + case 20: + TEST_RESPONSE(EvChunkReserveResult, OK); + ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, + "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); + ChunkIdx = LastResponse.ChunkIds[0]; VERBOSE_COUT(" Sending TEvChunkWrite"); ChunkWriteParts.Reset(new NPDisk::TEvChunkWrite::TPart[2]); ChunkWriteData = PrepareData(AppendBlockSize, 1); @@ -1979,10 +1979,10 @@ void TTestWriteAndReleaseChunk2A::TestFSM(const TActorContext &ctx) { ChunkWriteParts[0].Size = (ui32)ChunkWriteData.size(); ChunkWriteParts[1].Data = ChunkWriteData.data(); ChunkWriteParts[1].Size = (ui32)ChunkWriteData.size(); - ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, + ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 2), (void*)42, false, 1)); break; - case 30: + case 30: { TEST_RESPONSE(EvChunkWriteResult, OK); ASSERT_YTHROW(LastResponse.Cookie == (void*)42, "Unexpected cookie=" << LastResponse.Cookie); @@ -1996,12 +1996,12 @@ void TTestWriteAndReleaseChunk2A::TestFSM(const TActorContext &ctx) { (void*)43)); break; } - case 40: + case 40: TEST_RESPONSE(EvLogResult, OK); VERBOSE_COUT(" Sending TEvChunkRead"); ctx.Send(Yard, new NPDisk::TEvChunkRead(Owner, OwnerRound, ChunkIdx, 0, ChunkWriteData.size(), 1, nullptr)); break; - case 50: + case 50: { TEST_RESPONSE(EvChunkReadResult, OK); TEST_DATA_EQUALS(LastResponse.Data.ToString(), ChunkWriteData); @@ -2014,7 +2014,7 @@ void TTestWriteAndReleaseChunk2A::TestFSM(const TActorContext &ctx) { (void*)43)); break; } - case 60: + case 60: TEST_RESPONSE(EvLogResult, OK); VERBOSE_COUT("Done"); SignalDoneEvent(); @@ -2040,16 +2040,16 @@ void TTestWriteAndCheckChunk2B::TestFSM(const TActorContext &ctx) { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; AppendBlockSize = LastResponse.AppendBlockSize; - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); - break; - } - case 20: - { - TEST_RESPONSE(EvChunkReserveResult, OK); - ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, - "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); - ChunkIdx = LastResponse.ChunkIds[0]; - ASSERT_YTHROW(ChunkIdx == 2, "This test is designed to work with chunk 2, but got chunk " << ChunkIdx); + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); + break; + } + case 20: + { + TEST_RESPONSE(EvChunkReserveResult, OK); + ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, + "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); + ChunkIdx = LastResponse.ChunkIds[0]; + ASSERT_YTHROW(ChunkIdx == 2, "This test is designed to work with chunk 2, but got chunk " << ChunkIdx); VERBOSE_COUT(" Sending TEvChunkWrite"); ChunkWriteParts.Reset(new NPDisk::TEvChunkWrite::TPart[2]); ChunkWriteData = PrepareData(AppendBlockSize, 2); @@ -2057,11 +2057,11 @@ void TTestWriteAndCheckChunk2B::TestFSM(const TActorContext &ctx) { ChunkWriteParts[0].Size = (ui32)ChunkWriteData.size(); ChunkWriteParts[1].Data = ChunkWriteData.data(); ChunkWriteParts[1].Size = (ui32)ChunkWriteData.size(); - ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, + ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 2), (void*)42, false, 1)); break; } - case 30: + case 30: { TEST_RESPONSE(EvChunkWriteResult, OK); ASSERT_YTHROW(LastResponse.Cookie == (void*)42, "Unexpected cookie=" << LastResponse.Cookie); @@ -2072,19 +2072,19 @@ void TTestWriteAndCheckChunk2B::TestFSM(const TActorContext &ctx) { ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, commitRecord, Data, TLsnSeg(3, 3), (void*)43)); break; } - case 40: + case 40: TEST_RESPONSE(EvLogResult, OK); VERBOSE_COUT(" Sending TEvChunkRead"); ctx.Send(Yard, new NPDisk::TEvChunkRead(Owner, OwnerRound, ChunkIdx, 0, AppendBlockSize, 1, nullptr)); break; - case 50: + case 50: TEST_RESPONSE(EvChunkReadResult, OK); TEST_DATA_EQUALS(LastResponse.Data.ToString(), ChunkWriteData); VERBOSE_COUT(" Sending TEvChunkRead"); ctx.Send(Yard, new NPDisk::TEvChunkRead(Owner, OwnerRound, ChunkIdx, AppendBlockSize, AppendBlockSize, 1, nullptr)); break; - case 60: + case 60: TEST_RESPONSE(EvChunkReadResult, OK); TEST_DATA_EQUALS(LastResponse.Data.ToString(), ChunkWriteData); VERBOSE_COUT("Done"); @@ -2379,7 +2379,7 @@ void TTestLastLsn::TestFSM(const TActorContext &ctx) { } -void TTestCheckLog::TestFSM(const TActorContext &ctx) { +void TTestCheckLog::TestFSM(const TActorContext &ctx) { VERBOSE_COUT("Test step " << TestStep); switch (TestStep) { case 0: @@ -2395,10 +2395,10 @@ void TTestCheckLog::TestFSM(const TActorContext &ctx) { AppendBlockSize = LastResponse.AppendBlockSize; ChunkSize = LastResponse.ChunkSize; VERBOSE_COUT(" Sending TEvReadLog"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0}, 16 << 20)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0}, 16 << 20)); break; } - case 20: + case 20: { TEST_RESPONSE(EvReadLogResult, OK); ASSERT_YTHROW(LastResponse.LogRecords.size() == 9 || LastResponse.LogRecords.size() == 10, @@ -2445,24 +2445,24 @@ void TTestChunkFlush::TestFSM(const TActorContext &ctx) { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; AppendBlockSize = LastResponse.AppendBlockSize; - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); - break; - } - case 20: - TEST_RESPONSE(EvChunkReserveResult, OK); - ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, - "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); - ChunkIdx = LastResponse.ChunkIds[0]; - ASSERT_YTHROW(ChunkIdx == 2, "This test is designed to work with chunk 2, but got " << ChunkIdx); + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); + break; + } + case 20: + TEST_RESPONSE(EvChunkReserveResult, OK); + ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, + "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); + ChunkIdx = LastResponse.ChunkIds[0]; + ASSERT_YTHROW(ChunkIdx == 2, "This test is designed to work with chunk 2, but got " << ChunkIdx); VERBOSE_COUT(" Sending TEvChunkWrite"); ChunkWriteParts.Reset(new NPDisk::TEvChunkWrite::TPart[1]); ChunkWriteData = PrepareData(AppendBlockSize, 1); ChunkWriteParts[0].Data = ChunkWriteData.data(); ChunkWriteParts[0].Size = (ui32)ChunkWriteData.size(); - ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, + ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)42, true, 1)); break; - case 30: + case 30: { TEST_RESPONSE(EvChunkWriteResult, OK); ASSERT_YTHROW(LastResponse.Cookie == (void*)42, "Unexpected cookie=" << LastResponse.Cookie); @@ -2475,7 +2475,7 @@ void TTestChunkFlush::TestFSM(const TActorContext &ctx) { (void*)43)); break; } - case 40: + case 40: TEST_RESPONSE(EvLogResult, OK); VERBOSE_COUT("Done"); SignalDoneEvent(); @@ -2784,7 +2784,7 @@ void TTestHarakiri::TestFSM(const TActorContext &ctx) { case 20: TEST_RESPONSE(EvHarakiriResult, OK); VERBOSE_COUT(" Sending TEvReadLog"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); break; case 30: TEST_RESPONSE(EvReadLogResult, INVALID_OWNER); @@ -2879,7 +2879,7 @@ void TTestSlay::TestFSM(const TActorContext &ctx) { case 20: TEST_RESPONSE(EvSlayResult, OK); VERBOSE_COUT(" Sending TEvReadLog"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); break; case 30: TEST_RESPONSE(EvReadLogResult, INVALID_OWNER); @@ -2893,7 +2893,7 @@ void TTestSlay::TestFSM(const TActorContext &ctx) { ChunkWriteData = PrepareData(300); ChunkWriteParts[0].Data = ChunkWriteData.data(); ChunkWriteParts[0].Size = (ui32)ChunkWriteData.size(); - ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, 42, 0, + ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, 42, 0, new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)42, false, 1)); break; case 50: @@ -2927,7 +2927,7 @@ void TTestSlayRace::TestFSM(const TActorContext &ctx) { case 20: TEST_RESPONSE(EvSlayResult, RACE); VERBOSE_COUT(" Sending TEvReadLog"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); break; case 30: TEST_RESPONSE(EvReadLogResult, OK); @@ -2936,24 +2936,24 @@ void TTestSlayRace::TestFSM(const TActorContext &ctx) { break; case 40: TEST_RESPONSE(EvLogResult, OK); - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); - break; - case 50: - { - TEST_RESPONSE(EvChunkReserveResult, OK); - ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, - "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); - ui32 chunkIdx = LastResponse.ChunkIds[0]; + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); + break; + case 50: + { + TEST_RESPONSE(EvChunkReserveResult, OK); + ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, + "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); + ui32 chunkIdx = LastResponse.ChunkIds[0]; VERBOSE_COUT(" Sending TEvChunkWrite"); ChunkWriteParts.Reset(new NPDisk::TEvChunkWrite::TPart[1]); ChunkWriteData = PrepareData(300); ChunkWriteParts[0].Data = ChunkWriteData.data(); ChunkWriteParts[0].Size = (ui32)ChunkWriteData.size(); - ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, chunkIdx, 0, + ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, chunkIdx, 0, new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)42, false, 1)); break; - } - case 60: + } + case 60: TEST_RESPONSE(EvChunkWriteResult, OK); VERBOSE_COUT("Done"); SignalDoneEvent(); @@ -2989,12 +2989,12 @@ void TTestSlayRecreate::TestFSM(const TActorContext &ctx) { break; case 30: TEST_RESPONSE(EvYardInitResult, OK); - // There is no guarantee that unknown VDisk will recieve same OwnerId on evere TEvYardInit - // UNIT_ASSERT_VALUES_EQUAL(Owner, LastResponse.Owner); - Owner = LastResponse.Owner; + // There is no guarantee that unknown VDisk will recieve same OwnerId on evere TEvYardInit + // UNIT_ASSERT_VALUES_EQUAL(Owner, LastResponse.Owner); + Owner = LastResponse.Owner; UNIT_ASSERT(OwnerRound != LastResponse.OwnerRound); VERBOSE_COUT(" Sending TEvReadLog"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); break; case 40: TEST_RESPONSE(EvReadLogResult, INVALID_ROUND); @@ -3073,25 +3073,25 @@ void TTestDestructionWhileReadingChunk::TestFSM(const TActorContext &ctx) { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; ChunkSize = LastResponse.ChunkSize; - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); - break; - } - case 20: - { - TEST_RESPONSE(EvChunkReserveResult, OK); - ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, - "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); - ChunkIdx = LastResponse.ChunkIds[0]; + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); + break; + } + case 20: + { + TEST_RESPONSE(EvChunkReserveResult, OK); + ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, + "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); + ChunkIdx = LastResponse.ChunkIds[0]; VERBOSE_COUT(" Sending TEvChunkWrite"); ChunkWriteParts.Reset(new NPDisk::TEvChunkWrite::TPart[1]); ChunkWriteData = PrepareData(ChunkSize); ChunkWriteParts[0].Data = ChunkWriteData.data(); ChunkWriteParts[0].Size = (ui32)ChunkWriteData.size(); - ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, + ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)42, false, 1)); break; } - case 30: + case 30: { TEST_RESPONSE(EvChunkWriteResult, OK); ASSERT_YTHROW(LastResponse.Cookie == (void*)42, "Unexpected cookie=" << LastResponse.Cookie); @@ -3101,14 +3101,14 @@ void TTestDestructionWhileReadingChunk::TestFSM(const TActorContext &ctx) { ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, commitRecord, data2, TLsnSeg(1, 1), (void*)43)); break; } - case 40: + case 40: TEST_RESPONSE(EvLogResult, OK); VERBOSE_COUT(" Sending TEvChunkRead"); ctx.Send(Yard, new NPDisk::TEvChunkRead(Owner, OwnerRound, ChunkIdx, 0, ChunkSize, 1, nullptr)); VERBOSE_COUT("Done"); SignalDoneEvent(); break; - case 50: + case 50: break; default: ythrow TWithBackTrace<yexception>() << "Unexpected TestStep " << TestStep << Endl; @@ -3166,7 +3166,7 @@ void TTestDestructionWhileReadingLog::TestFSM(const TActorContext &ctx) { break; case 20: TEST_RESPONSE(EvLogResult, OK); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); VERBOSE_COUT("Done"); SignalDoneEvent(); break; @@ -3194,25 +3194,25 @@ void TTestChunkDeletionWhileWritingIt::TestFSM(const TActorContext &ctx) { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; ChunkSize = LastResponse.ChunkSize; - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); - break; - } - case 20: - { - TEST_RESPONSE(EvChunkReserveResult, OK); - ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, - "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); - ChunkIdx = LastResponse.ChunkIds[0]; + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); + break; + } + case 20: + { + TEST_RESPONSE(EvChunkReserveResult, OK); + ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, + "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); + ChunkIdx = LastResponse.ChunkIds[0]; VERBOSE_COUT(" Sending TEvChunkWrite"); ChunkWriteParts.Reset(new NPDisk::TEvChunkWrite::TPart[1]); ChunkWriteData = PrepareData(1); ChunkWriteParts[0].Data = ChunkWriteData.data(); ChunkWriteParts[0].Size = (ui32)ChunkWriteData.size(); - ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, + ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, 0, new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)42, false, 1)); break; } - case 30: + case 30: { TEST_RESPONSE(EvChunkWriteResult, OK); ASSERT_YTHROW(LastResponse.Cookie == (void*)42, "Unexpected cookie=" << LastResponse.Cookie); @@ -3230,7 +3230,7 @@ void TTestChunkDeletionWhileWritingIt::TestFSM(const TActorContext &ctx) { ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, commitRecord, data2, TLsnSeg(1, 1), (void*)43)); break; } - case 40: + case 40: if (LastResponse.EventType == TEvBlobStorage::EvLogResult) { //TEST_RESPONSE(EvLogResult, ERROR); IsOk = false; @@ -3239,7 +3239,7 @@ void TTestChunkDeletionWhileWritingIt::TestFSM(const TActorContext &ctx) { IsOk = true; } break; - case 50: + case 50: if (IsOk) { //TEST_RESPONSE(EvLogResult, OK); } else { @@ -3632,14 +3632,14 @@ void TTestStartingPointRebootsIteration::TestFSM(const TActorContext &ctx) { NextLsn = StartingPointLsn + 1000000; VERBOSE_COUT(" Sending TEvLogRead"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0}, 128 << 20)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0}, 128 << 20)); break; case 20: TEST_RESPONSE(EvReadLogResult, OK); if (StartingPointLsn) { ASSERT_YTHROW(LastResponse.LogRecords.size() != 0, "Unexpected LogRecords size == " << LastResponse.LogRecords.size()); - ASSERT_YTHROW(LastResponse.IsEndOfLog, + ASSERT_YTHROW(LastResponse.IsEndOfLog, "Unexpected IsEndOfLog = " << (int)LastResponse.IsEndOfLog); NextLsn = LastResponse.LogRecords.back().Lsn + 1000000; FirstLsn = LastResponse.LogRecords[0].Lsn; @@ -3651,7 +3651,7 @@ void TTestStartingPointRebootsIteration::TestFSM(const TActorContext &ctx) { } else { ASSERT_YTHROW(LastResponse.LogRecords.size() == 0, "Unexpected LogRecords size == " << LastResponse.LogRecords.size()); - ASSERT_YTHROW(LastResponse.IsEndOfLog, + ASSERT_YTHROW(LastResponse.IsEndOfLog, "Unexpected IsEndOfLog = " << (int)LastResponse.IsEndOfLog); VERBOSE_COUT(" Sending TEvChunkReserve"); ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_actions.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_actions.h index 5d1b843783..7eeb5db0bb 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_actions.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_actions.h @@ -4,10 +4,10 @@ #include "blobstorage_pdisk_ut.h" #include "blobstorage_pdisk_ut_base_test.h" #include "blobstorage_pdisk_ut_http_request.h" -#include "blobstorage_pdisk_chunk_id_formatter.h" +#include "blobstorage_pdisk_chunk_id_formatter.h" + +#include <util/random/mersenne64.h> -#include <util/random/mersenne64.h> - namespace NKikimr { template <bool IsNewOwner, ui32 GroupGeneration> @@ -50,10 +50,10 @@ public: {} }; -class TTestInitCorruptedError : public TBaseTest { +class TTestInitCorruptedError : public TBaseTest { void TestFSM(const TActorContext &ctx); public: - TTestInitCorruptedError(const TIntrusivePtr<TTestConfig> &cfg) + TTestInitCorruptedError(const TIntrusivePtr<TTestConfig> &cfg) : TBaseTest(cfg) {} }; @@ -114,50 +114,50 @@ public: {} }; -class TTestWholeLogRead : public TBaseTest { - NPDisk::TOwner Owner; - NPDisk::TOwnerRound OwnerRound; - - void TestFSM(const TActorContext &ctx) { - Ctest << "Test step " << TestStep << Endl; - switch (TestStep) { - case 0: - ASSERT_YTHROW(LastResponse.Status == NKikimrProto::OK, StatusToString(LastResponse.Status)); - Ctest << " Sending TEvInit" << Endl; - ctx.Send(Yard, new NPDisk::TEvYardInit(5, VDiskID, *PDiskGuid)); - break; - case 10: - TEST_RESPONSE(EvYardInitResult, OK); - Owner = LastResponse.Owner; - OwnerRound = LastResponse.OwnerRound; - Ctest << " Sending TEvLogRead" << Endl; - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); - break; - case 20: - TEST_RESPONSE(EvReadLogResult, OK); - if (!LastResponse.IsEndOfLog) { - TestStep -= 10; - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, LastResponse.NextPosition)); - break; - } else { - Ctest << "Done" << Endl; - SignalDoneEvent(); - break; - } - break; - default: - ythrow TWithBackTrace<yexception>() << "Unexpected TestStep " << TestStep << Endl; - break; - } - TestStep += 10; - } - -public: - TTestWholeLogRead(const TIntrusivePtr<TTestConfig> &cfg) - : TBaseTest(cfg) - {} -}; - +class TTestWholeLogRead : public TBaseTest { + NPDisk::TOwner Owner; + NPDisk::TOwnerRound OwnerRound; + + void TestFSM(const TActorContext &ctx) { + Ctest << "Test step " << TestStep << Endl; + switch (TestStep) { + case 0: + ASSERT_YTHROW(LastResponse.Status == NKikimrProto::OK, StatusToString(LastResponse.Status)); + Ctest << " Sending TEvInit" << Endl; + ctx.Send(Yard, new NPDisk::TEvYardInit(5, VDiskID, *PDiskGuid)); + break; + case 10: + TEST_RESPONSE(EvYardInitResult, OK); + Owner = LastResponse.Owner; + OwnerRound = LastResponse.OwnerRound; + Ctest << " Sending TEvLogRead" << Endl; + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); + break; + case 20: + TEST_RESPONSE(EvReadLogResult, OK); + if (!LastResponse.IsEndOfLog) { + TestStep -= 10; + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, LastResponse.NextPosition)); + break; + } else { + Ctest << "Done" << Endl; + SignalDoneEvent(); + break; + } + break; + default: + ythrow TWithBackTrace<yexception>() << "Unexpected TestStep " << TestStep << Endl; + break; + } + TestStep += 10; + } + +public: + TTestWholeLogRead(const TIntrusivePtr<TTestConfig> &cfg) + : TBaseTest(cfg) + {} +}; + template <ui32 Size> class TTestLogWriteRead : public TBaseTest { NPDisk::TOwner Owner; @@ -189,14 +189,14 @@ class TTestLogWriteRead : public TBaseTest { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; VERBOSE_COUT(" Sending TEvLogRead"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0})); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0})); break; case 40: TEST_RESPONSE(EvReadLogResult, OK); ASSERT_YTHROW(LastResponse.LogRecords.size() == 1, "Unexpected LogRecords size == " << LastResponse.LogRecords.size()); TEST_LOG_RECORD(LastResponse.LogRecords[0], 123, 0, data); - ASSERT_YTHROW(LastResponse.IsEndOfLog, + ASSERT_YTHROW(LastResponse.IsEndOfLog, "Unexpected IsEndOfLog = " << (int)LastResponse.IsEndOfLog); VERBOSE_COUT("Done"); SignalDoneEvent(); @@ -251,248 +251,248 @@ public: {} }; -template<bool Equal> -class TTestLogWriteCut : public TBaseTest { - static constexpr ui32 VDiskCount = 2; - -public: - static TAtomic VDiskNum; - static TVector<TChunkIdx> CommitedChunks[VDiskCount]; - - static void Reset() { - VDiskNum = 0; - for (ui32 i = 0; i < VDiskCount; ++i) { - CommitedChunks[i].clear(); - } - } - -private: - - ui32 MyNum = 0; - NPDisk::TOwner Owner; - NPDisk::TOwnerRound OwnerRound; - ui64 Lsn = 100; - ui32 LogRecordSizeMin = 16 << 10; - ui32 LogRecordSizeMax = 32 << 10; - ui32 ChunkSize; - ui32 EvLogsToSend; - ui32 EvLogsReceived = 0; - - const ui32 LogRecordsToKeep = 100; - - ui32 ChunksToReserve; - TVector<TChunkIdx> ReservedChunks; - ui64 LastCommitLsn = 0; - - - NPrivate::TMersenne64 RandGen = Seed(); - - void CheckOwnedChunks(TVector<TChunkIdx>& owned) { - std::sort(CommitedChunks[MyNum].begin(), CommitedChunks[MyNum].end()); - std::sort(owned.begin(), owned.end()); - - TStringStream str; - str << "MyNum# " << MyNum << " CommitedChunks# "; - NPDisk::TChunkIdFormatter(str).PrintBracedChunksList(CommitedChunks[MyNum]); - str << " owned# "; - NPDisk::TChunkIdFormatter(str).PrintBracedChunksList(owned); - str << Endl; - Ctest << str.Str(); - ASSERT_YTHROW(CommitedChunks[MyNum].size() <= owned.size(), "MyNum# " << MyNum << " size mismatch, " - << CommitedChunks[MyNum].size() << " > " << owned.size()); - for (size_t i = 0; i < CommitedChunks[MyNum].size(); ++i) { - ASSERT_YTHROW(owned[i] == CommitedChunks[MyNum][i], - "MyNum# " << MyNum << " lost CommitedChunks, chunkIdx# " - << CommitedChunks[MyNum][i] << " != " << owned[i]); - } - } - - void TestFSM(const TActorContext &ctx) { - //Ctest << "Test step " << TestStep << Endl; - switch (TestStep) { - case 0: - Ctest << " Sending TEvInit" << Endl; - ctx.Send(Yard, new NPDisk::TEvYardInit(2, VDiskID, *PDiskGuid)); - break; - case 10: - { - auto *ev = Event->Get<NPDisk::TEvYardInitResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); - Owner = ev->PDiskParams->Owner; - OwnerRound = ev->PDiskParams->OwnerRound; - ChunkSize = ev->PDiskParams->ChunkSize; - CheckOwnedChunks(ev->OwnedChunks); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); - break; - } - case 20: - { - auto *ev = Event->Get<NPDisk::TEvReadLogResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK && ev->IsEndOfLog, ev->ToString()); - - for (const auto& res : ev->Results) { - Lsn = Max(Lsn, res.Lsn + 1); - } - ctx.Send(Yard, new NPDisk::TEvCheckSpace(Owner, OwnerRound)); - Ctest << " Sending TEvLog" << Endl; - break; - } - case 30: - { - auto *ev = Event->Get<NPDisk::TEvCheckSpaceResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); - ui32 totalChunks = ev->TotalChunks; - ui32 logRecordSizeMean = LogRecordSizeMin + (LogRecordSizeMax - LogRecordSizeMin) / 2; - if (Equal) { - EvLogsToSend = totalChunks / 4 * ChunkSize / logRecordSizeMean; - } else { - if (MyNum == 0) { - // First VDisk writes only one record to first LogChunk - EvLogsToSend = ChunkSize / 2 / logRecordSizeMean; - } else { - // Second VDisk writes full device - EvLogsToSend = totalChunks / 2 * ChunkSize / logRecordSizeMean + 67; - } - } - Ctest << "totalChunks# " << totalChunks << " ChunkSize# " << ChunkSize << " MyNum# " << MyNum - << " EvLogsToSend# " << EvLogsToSend << Endl; - - ChunksToReserve = ev->FreeChunks / 4; - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, ChunksToReserve)); - break; - } - case 40: - { - auto *ev = Event->Get<NPDisk::TEvChunkReserveResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); - - ASSERT_YTHROW(ev->ChunkIds.size() == ChunksToReserve, - "Unexpected ChunkIds.size() == " << ev->ChunkIds.size()); - ReservedChunks = std::move(ev->ChunkIds); - ui32 logRecordSize = LogRecordSizeMin + RandGen.GenRand() % (LogRecordSizeMax - LogRecordSizeMin); - TString data = PrepareData(logRecordSize); - NPDisk::TCommitRecord comRec{}; - comRec.FirstLsnToKeep = Lsn; - comRec.IsStartingPoint = true; - if (ReservedChunks) { - comRec.CommitChunks.push_back(ReservedChunks.back()); - LastCommitLsn = Lsn; - Ctest << "MyNum# " << MyNum << " try commit chunkIdx# " << ReservedChunks.back() - << " Lsn# " << Lsn << Endl; - } - ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, comRec, data, TLsnSeg(Lsn, Lsn), (void*)0)); - ++Lsn; - break; - } - case 50: - { - auto *ev = Event->Get<NPDisk::TEvLogResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); - - if (LastCommitLsn == Lsn - 1) { - CommitedChunks[MyNum].push_back(ReservedChunks.back()); - Ctest << "MyNum# " << MyNum << " done commit chunkIdx# " << ReservedChunks.back() - << " LastCommitLsn# " << LastCommitLsn << Endl; - ReservedChunks.pop_back(); - } - ++EvLogsReceived; - //Ctest << "MyNum# " << MyNum << " recieve event num# " << EvLogsReceived << " Lsn# " << Lsn << Endl; - if (EvLogsReceived < EvLogsToSend - 1) { - TestStep -= 10; - } - ui32 logRecordSize = LogRecordSizeMin + RandGen.GenRand() % (LogRecordSizeMax - LogRecordSizeMin); - TString data = PrepareData(logRecordSize); - if (Lsn % LogRecordsToKeep == 0) { - NPDisk::TCommitRecord comRec{}; - comRec.FirstLsnToKeep = Lsn - LogRecordsToKeep; - if (ReservedChunks) { - comRec.CommitChunks.push_back(ReservedChunks.back()); - LastCommitLsn = Lsn; - Ctest << "try commit chunkIdx# " << ReservedChunks.back() << " Lsn# " << Lsn << Endl; - } - comRec.IsStartingPoint = true; - ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, comRec, data, TLsnSeg(Lsn, Lsn), (void*)0)); - } else { - ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, data, TLsnSeg(Lsn, Lsn), (void*)0)); - } - ++Lsn; - break; - } - case 60: - { - auto *ev = Event->Get<NPDisk::TEvLogResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); - ++EvLogsReceived; - ASSERT_YTHROW(EvLogsReceived == EvLogsToSend, "PDUT-0001"); - Ctest << "MyNum# " << MyNum << " recieve event num# " << EvLogsReceived << Endl; - - ctx.Send(Yard, new NPDisk::TEvYardInit(3, VDiskID, *PDiskGuid)); - break; - } - case 70: - { - auto *ev = Event->Get<NPDisk::TEvYardInitResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); - Owner = ev->PDiskParams->Owner; - OwnerRound = ev->PDiskParams->OwnerRound; - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); - break; - } - case 80: - { - auto *ev = Event->Get<NPDisk::TEvReadLogResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); - Ctest << "MyNum# " << MyNum << Endl; - for (const NKikimr::NPDisk::TLogRecord &log : ev->Results) { - Ctest << "Read log record# " << log.ToString(); - } - Ctest << Endl; - - const ui64 firstLsnToKeep = Lsn - Lsn % LogRecordsToKeep; - const ui64 lastLsnToKeep = Lsn - 1; - bool isInsideLsnToKeep = false; - TMaybe<const NKikimr::NPDisk::TLogRecord*> prevLog; - for (const NKikimr::NPDisk::TLogRecord &log : ev->Results) { - if (firstLsnToKeep <= log.Lsn) { - isInsideLsnToKeep = true; - } - ASSERT_YTHROW(log.Lsn <= lastLsnToKeep, "TestVDisk didn't wrote log with such lsn# " << log.Lsn); - - if (isInsideLsnToKeep && prevLog) { - ASSERT_YTHROW((**prevLog).Lsn == log.Lsn - 1, "PDisk must send log record with strongly" - << " increasing Lsn, prevLsn# " << (**prevLog).Lsn << " currentLsn# " << log.Lsn); - } - prevLog = &log; - } - ASSERT_YTHROW(isInsideLsnToKeep, "PDisk didn't send expected log records to TestVDisk"); - ASSERT_YTHROW(ev->Results.back().Lsn == lastLsnToKeep, "PDisk didn't send last log record with" - << " Lsn# " << lastLsnToKeep << " last recieved lsn# " << ev->Results.back().Lsn); - - Ctest << "Done" << Endl; - SignalDoneEvent(); - break; - } - default: - ythrow TWithBackTrace<yexception>() << "Unexpected TestStep " << TestStep << Endl; - break; - } - TestStep += 10; - } -public: - TTestLogWriteCut(const TIntrusivePtr<TTestConfig> &cfg) - : TBaseTest(cfg) - { - MyNum = AtomicGetAndIncrement(VDiskNum); - ASSERT_YTHROW(MyNum < VDiskCount, "MyNum should be less than VDiskCount"); - } -}; - -template<bool Equal> -TAtomic TTestLogWriteCut<Equal>::VDiskNum = 0; - -template<bool Equal> -TVector<TChunkIdx> TTestLogWriteCut<Equal>::CommitedChunks[VDiskCount]; - +template<bool Equal> +class TTestLogWriteCut : public TBaseTest { + static constexpr ui32 VDiskCount = 2; + +public: + static TAtomic VDiskNum; + static TVector<TChunkIdx> CommitedChunks[VDiskCount]; + + static void Reset() { + VDiskNum = 0; + for (ui32 i = 0; i < VDiskCount; ++i) { + CommitedChunks[i].clear(); + } + } + +private: + + ui32 MyNum = 0; + NPDisk::TOwner Owner; + NPDisk::TOwnerRound OwnerRound; + ui64 Lsn = 100; + ui32 LogRecordSizeMin = 16 << 10; + ui32 LogRecordSizeMax = 32 << 10; + ui32 ChunkSize; + ui32 EvLogsToSend; + ui32 EvLogsReceived = 0; + + const ui32 LogRecordsToKeep = 100; + + ui32 ChunksToReserve; + TVector<TChunkIdx> ReservedChunks; + ui64 LastCommitLsn = 0; + + + NPrivate::TMersenne64 RandGen = Seed(); + + void CheckOwnedChunks(TVector<TChunkIdx>& owned) { + std::sort(CommitedChunks[MyNum].begin(), CommitedChunks[MyNum].end()); + std::sort(owned.begin(), owned.end()); + + TStringStream str; + str << "MyNum# " << MyNum << " CommitedChunks# "; + NPDisk::TChunkIdFormatter(str).PrintBracedChunksList(CommitedChunks[MyNum]); + str << " owned# "; + NPDisk::TChunkIdFormatter(str).PrintBracedChunksList(owned); + str << Endl; + Ctest << str.Str(); + ASSERT_YTHROW(CommitedChunks[MyNum].size() <= owned.size(), "MyNum# " << MyNum << " size mismatch, " + << CommitedChunks[MyNum].size() << " > " << owned.size()); + for (size_t i = 0; i < CommitedChunks[MyNum].size(); ++i) { + ASSERT_YTHROW(owned[i] == CommitedChunks[MyNum][i], + "MyNum# " << MyNum << " lost CommitedChunks, chunkIdx# " + << CommitedChunks[MyNum][i] << " != " << owned[i]); + } + } + + void TestFSM(const TActorContext &ctx) { + //Ctest << "Test step " << TestStep << Endl; + switch (TestStep) { + case 0: + Ctest << " Sending TEvInit" << Endl; + ctx.Send(Yard, new NPDisk::TEvYardInit(2, VDiskID, *PDiskGuid)); + break; + case 10: + { + auto *ev = Event->Get<NPDisk::TEvYardInitResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); + Owner = ev->PDiskParams->Owner; + OwnerRound = ev->PDiskParams->OwnerRound; + ChunkSize = ev->PDiskParams->ChunkSize; + CheckOwnedChunks(ev->OwnedChunks); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); + break; + } + case 20: + { + auto *ev = Event->Get<NPDisk::TEvReadLogResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK && ev->IsEndOfLog, ev->ToString()); + + for (const auto& res : ev->Results) { + Lsn = Max(Lsn, res.Lsn + 1); + } + ctx.Send(Yard, new NPDisk::TEvCheckSpace(Owner, OwnerRound)); + Ctest << " Sending TEvLog" << Endl; + break; + } + case 30: + { + auto *ev = Event->Get<NPDisk::TEvCheckSpaceResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); + ui32 totalChunks = ev->TotalChunks; + ui32 logRecordSizeMean = LogRecordSizeMin + (LogRecordSizeMax - LogRecordSizeMin) / 2; + if (Equal) { + EvLogsToSend = totalChunks / 4 * ChunkSize / logRecordSizeMean; + } else { + if (MyNum == 0) { + // First VDisk writes only one record to first LogChunk + EvLogsToSend = ChunkSize / 2 / logRecordSizeMean; + } else { + // Second VDisk writes full device + EvLogsToSend = totalChunks / 2 * ChunkSize / logRecordSizeMean + 67; + } + } + Ctest << "totalChunks# " << totalChunks << " ChunkSize# " << ChunkSize << " MyNum# " << MyNum + << " EvLogsToSend# " << EvLogsToSend << Endl; + + ChunksToReserve = ev->FreeChunks / 4; + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, ChunksToReserve)); + break; + } + case 40: + { + auto *ev = Event->Get<NPDisk::TEvChunkReserveResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); + + ASSERT_YTHROW(ev->ChunkIds.size() == ChunksToReserve, + "Unexpected ChunkIds.size() == " << ev->ChunkIds.size()); + ReservedChunks = std::move(ev->ChunkIds); + ui32 logRecordSize = LogRecordSizeMin + RandGen.GenRand() % (LogRecordSizeMax - LogRecordSizeMin); + TString data = PrepareData(logRecordSize); + NPDisk::TCommitRecord comRec{}; + comRec.FirstLsnToKeep = Lsn; + comRec.IsStartingPoint = true; + if (ReservedChunks) { + comRec.CommitChunks.push_back(ReservedChunks.back()); + LastCommitLsn = Lsn; + Ctest << "MyNum# " << MyNum << " try commit chunkIdx# " << ReservedChunks.back() + << " Lsn# " << Lsn << Endl; + } + ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, comRec, data, TLsnSeg(Lsn, Lsn), (void*)0)); + ++Lsn; + break; + } + case 50: + { + auto *ev = Event->Get<NPDisk::TEvLogResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); + + if (LastCommitLsn == Lsn - 1) { + CommitedChunks[MyNum].push_back(ReservedChunks.back()); + Ctest << "MyNum# " << MyNum << " done commit chunkIdx# " << ReservedChunks.back() + << " LastCommitLsn# " << LastCommitLsn << Endl; + ReservedChunks.pop_back(); + } + ++EvLogsReceived; + //Ctest << "MyNum# " << MyNum << " recieve event num# " << EvLogsReceived << " Lsn# " << Lsn << Endl; + if (EvLogsReceived < EvLogsToSend - 1) { + TestStep -= 10; + } + ui32 logRecordSize = LogRecordSizeMin + RandGen.GenRand() % (LogRecordSizeMax - LogRecordSizeMin); + TString data = PrepareData(logRecordSize); + if (Lsn % LogRecordsToKeep == 0) { + NPDisk::TCommitRecord comRec{}; + comRec.FirstLsnToKeep = Lsn - LogRecordsToKeep; + if (ReservedChunks) { + comRec.CommitChunks.push_back(ReservedChunks.back()); + LastCommitLsn = Lsn; + Ctest << "try commit chunkIdx# " << ReservedChunks.back() << " Lsn# " << Lsn << Endl; + } + comRec.IsStartingPoint = true; + ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, comRec, data, TLsnSeg(Lsn, Lsn), (void*)0)); + } else { + ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, data, TLsnSeg(Lsn, Lsn), (void*)0)); + } + ++Lsn; + break; + } + case 60: + { + auto *ev = Event->Get<NPDisk::TEvLogResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); + ++EvLogsReceived; + ASSERT_YTHROW(EvLogsReceived == EvLogsToSend, "PDUT-0001"); + Ctest << "MyNum# " << MyNum << " recieve event num# " << EvLogsReceived << Endl; + + ctx.Send(Yard, new NPDisk::TEvYardInit(3, VDiskID, *PDiskGuid)); + break; + } + case 70: + { + auto *ev = Event->Get<NPDisk::TEvYardInitResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); + Owner = ev->PDiskParams->Owner; + OwnerRound = ev->PDiskParams->OwnerRound; + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); + break; + } + case 80: + { + auto *ev = Event->Get<NPDisk::TEvReadLogResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); + Ctest << "MyNum# " << MyNum << Endl; + for (const NKikimr::NPDisk::TLogRecord &log : ev->Results) { + Ctest << "Read log record# " << log.ToString(); + } + Ctest << Endl; + + const ui64 firstLsnToKeep = Lsn - Lsn % LogRecordsToKeep; + const ui64 lastLsnToKeep = Lsn - 1; + bool isInsideLsnToKeep = false; + TMaybe<const NKikimr::NPDisk::TLogRecord*> prevLog; + for (const NKikimr::NPDisk::TLogRecord &log : ev->Results) { + if (firstLsnToKeep <= log.Lsn) { + isInsideLsnToKeep = true; + } + ASSERT_YTHROW(log.Lsn <= lastLsnToKeep, "TestVDisk didn't wrote log with such lsn# " << log.Lsn); + + if (isInsideLsnToKeep && prevLog) { + ASSERT_YTHROW((**prevLog).Lsn == log.Lsn - 1, "PDisk must send log record with strongly" + << " increasing Lsn, prevLsn# " << (**prevLog).Lsn << " currentLsn# " << log.Lsn); + } + prevLog = &log; + } + ASSERT_YTHROW(isInsideLsnToKeep, "PDisk didn't send expected log records to TestVDisk"); + ASSERT_YTHROW(ev->Results.back().Lsn == lastLsnToKeep, "PDisk didn't send last log record with" + << " Lsn# " << lastLsnToKeep << " last recieved lsn# " << ev->Results.back().Lsn); + + Ctest << "Done" << Endl; + SignalDoneEvent(); + break; + } + default: + ythrow TWithBackTrace<yexception>() << "Unexpected TestStep " << TestStep << Endl; + break; + } + TestStep += 10; + } +public: + TTestLogWriteCut(const TIntrusivePtr<TTestConfig> &cfg) + : TBaseTest(cfg) + { + MyNum = AtomicGetAndIncrement(VDiskNum); + ASSERT_YTHROW(MyNum < VDiskCount, "MyNum should be less than VDiskCount"); + } +}; + +template<bool Equal> +TAtomic TTestLogWriteCut<Equal>::VDiskNum = 0; + +template<bool Equal> +TVector<TChunkIdx> TTestLogWriteCut<Equal>::CommitedChunks[VDiskCount]; + template<ui64 LogRequests> class TTestLogWriteLsnConsistency : public TBaseTest { NPDisk::TOwner Owner; @@ -571,7 +571,7 @@ class TTestLog3Read : public TBaseTest { Owner = LastResponse.Owner; OwnerRound = LastResponse.OwnerRound; VERBOSE_COUT(" Sending TEvLogRead"); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0}, Size1+Size2+Size3)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0}, Size1+Size2+Size3)); break; case 20: { @@ -1049,24 +1049,24 @@ class TTestChunk3WriteRead : public TBaseTest { ctx.Send(Yard, new NPDisk::TEvYardInit(2, VDiskID, *PDiskGuid)); break; case 10: - TEST_RESPONSE(EvYardInitResult, OK); - Owner = LastResponse.Owner; - OwnerRound = LastResponse.OwnerRound; - BlockSize = LastResponse.AppendBlockSize; - DataSize = (WishDataSize + BlockSize - 1) / BlockSize * BlockSize; - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); - break; + TEST_RESPONSE(EvYardInitResult, OK); + Owner = LastResponse.Owner; + OwnerRound = LastResponse.OwnerRound; + BlockSize = LastResponse.AppendBlockSize; + DataSize = (WishDataSize + BlockSize - 1) / BlockSize * BlockSize; + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, 1)); + break; case 20: - TEST_RESPONSE(EvChunkReserveResult, OK); - ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, - "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); - ChunkIdx = LastResponse.ChunkIds[0]; - Iteration = 0; + TEST_RESPONSE(EvChunkReserveResult, OK); + ASSERT_YTHROW(LastResponse.ChunkIds.size() == 1, + "Unexpected ChunkIds.size() == " << LastResponse.ChunkIds.size()); + ChunkIdx = LastResponse.ChunkIds[0]; + Iteration = 0; [[fallthrough]]; // AUTOGENERATED_FALLTHROUGH_FIXME case 30: - case 40: + case 40: { - if (Iteration) { + if (Iteration) { TEST_RESPONSE(EvChunkWriteResult, OK); ASSERT_YTHROW(LastResponse.Cookie == (void*)42, "Unexpected cookie=" << LastResponse.Cookie); } @@ -1077,10 +1077,10 @@ class TTestChunk3WriteRead : public TBaseTest { ChunkWriteParts[0].Size = (ui32)ChunkWriteData.size(); ctx.Send(Yard, new NPDisk::TEvChunkWrite(Owner, OwnerRound, ChunkIdx, DataSize * Iteration, new NPDisk::TEvChunkWrite::TNonOwningParts(ChunkWriteParts.Get(), 1), (void*)42, false, 1)); - ++Iteration; + ++Iteration; break; } - case 50: + case 50: { TEST_RESPONSE(EvChunkWriteResult, OK); ASSERT_YTHROW(LastResponse.Cookie == (void*)42, "Unexpected cookie=" << LastResponse.Cookie); @@ -1091,12 +1091,12 @@ class TTestChunk3WriteRead : public TBaseTest { ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, commitRecord, data, TLsnSeg(1, 1), (void*)43)); break; } - case 60: + case 60: TEST_RESPONSE(EvLogResult, OK); VERBOSE_COUT(" Sending TEvChunkRead"); ctx.Send(Yard, new NPDisk::TEvChunkRead(Owner, OwnerRound, ChunkIdx, 0, DataSize*3, 1, nullptr)); break; - case 70: + case 70: { TString fullData = PrepareData(DataSize) + PrepareData(DataSize) + PrepareData(DataSize); TEST_RESPONSE(EvChunkReadResult, OK); @@ -1169,7 +1169,7 @@ class TTestLogMultipleWriteRead : public TBaseTest { ctx.Send(Yard, new NPDisk::TEvYardInit(3, VDiskID, *PDiskGuid)); TestStep += 10; } else if (TestStep == 30) { - NPDisk::TLogPosition position{0, 0}; + NPDisk::TLogPosition position{0, 0}; if (IsFirstReadLog) { TEST_RESPONSE(EvYardInitResult, OK); Owner = LastResponse.Owner; @@ -1305,7 +1305,7 @@ public: class TTestWhiteboard : public TBaseTest { bool IsPDiskResultReceived = false; const int ExpectedOwnerCount = 5; - int RemainingVDiskResults = ExpectedOwnerCount; + int RemainingVDiskResults = ExpectedOwnerCount; bool IsDiskMetricsResultReceived = false; bool IsPDiskLightsResultReceived = false; @@ -1419,8 +1419,8 @@ class TTestChunkPriorityBlock : public TBaseTest { ui32 SafeSize; TString ChunkWriteData; TArrayHolder<NPDisk::TEvChunkWrite::TPart> ChunkWriteParts; - TVector<ui32> ChunkIds; - ui32 PausedChunkWrites = 5; + TVector<ui32> ChunkIds; + ui32 PausedChunkWrites = 5; ui32 Iteration; void TestFSM(const TActorContext &ctx); @@ -1554,269 +1554,269 @@ public: {} }; -class TTestSysLogReordering : public TBaseTest { - friend class TTestSysLogReorderingLogCheck; - -public: - static std::atomic<ui32> VDiskNum; - -private: - static constexpr ui32 ChunksToReserve = 20; - - enum ELogRecType : ui8 { - EGarbage = 0, - ECommittedChunks = 1, - EDeleteChunk = 2, - }; - -#pragma pack(push, 1) - struct TLogRecAboutChunks { - ELogRecType Type; - - union { - ui32 CommittedChunks[ChunksToReserve]; - ui32 DeletedChunk; - } Data; +class TTestSysLogReordering : public TBaseTest { + friend class TTestSysLogReorderingLogCheck; + +public: + static std::atomic<ui32> VDiskNum; + +private: + static constexpr ui32 ChunksToReserve = 20; + + enum ELogRecType : ui8 { + EGarbage = 0, + ECommittedChunks = 1, + EDeleteChunk = 2, + }; + +#pragma pack(push, 1) + struct TLogRecAboutChunks { + ELogRecType Type; + + union { + ui32 CommittedChunks[ChunksToReserve]; + ui32 DeletedChunk; + } Data; TLogRecAboutChunks() { memset(static_cast<void*>(this), 0, sizeof(TLogRecAboutChunks)); } - }; -#pragma pack(pop) - - ui32 MyNum = 0; - TVDiskID VDiskID; - NPDisk::TOwner Owner; - NPDisk::TOwnerRound OwnerRound; - TVector<ui32> CommittedChunks; - const ui32 LogRecordSize = 2000; - const ui32 LogRecordsToWrite = 2000; - const ui32 ReleaseLsnStepSize = LogRecordsToWrite / ChunksToReserve; - ui32 LogRecordsWritten = 0; - ui32 DeletedChunks = 0; - TString Garbage; - ui32 Lsn = 0; // First written Lsn will be 1, last EvLog with data will be LogRecordsToWrite - - TLsnSeg GenLsnSeg() { - ++Lsn; - return {Lsn, Lsn}; - } - - void SendEvLog(const TActorContext& ctx, TMaybe<NPDisk::TCommitRecord> commit, TString data) { - if (commit) { - ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, *commit, data, GenLsnSeg(), nullptr)); - } else { - ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, data, GenLsnSeg(), nullptr)); - } - } - - void TestFSM(const TActorContext &ctx) { - VERBOSE_COUT("Test step " << TestStep); - switch (TestStep) { - case 0: - VERBOSE_COUT(" Sending TEvInit"); - VDiskID.GroupID = MyNum; - ctx.Send(Yard, new NPDisk::TEvYardInit(2, VDiskID, *PDiskGuid)); - break; - case 10: - { - auto* ev = Event->Get<NPDisk::TEvYardInitResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); - Owner = ev->PDiskParams->Owner; - OwnerRound = ev->PDiskParams->OwnerRound; - ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, ChunksToReserve)); - break; - } - case 20: - { - auto* ev = Event->Get<NPDisk::TEvChunkReserveResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); - ASSERT_YTHROW(ev->ChunkIds.size() == ChunksToReserve, - "Unexpected ChunkIds.size() == " << ev->ChunkIds.size()); - VERBOSE_COUT(" Sending TEvLog to commit"); - - CommittedChunks = ev->ChunkIds; - - NPDisk::TCommitRecord commitRecord; - commitRecord.CommitChunks = ev->ChunkIds; - commitRecord.IsStartingPoint = true; - SendEvLog(ctx, commitRecord, {}); - break; - } - case 30: - { - auto* ev = Event->Get<NPDisk::TEvLogResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); - VERBOSE_COUT(" Sending TEvLog to commit"); - ++LogRecordsWritten; - - if (LogRecordsWritten < LogRecordsToWrite) { - SendEvLog(ctx, {}, Garbage); - TestStep -= 10; - } else { - TLogRecAboutChunks log; - log.Type = ECommittedChunks; - for (ui32 i = 0; i < CommittedChunks.size(); ++i) { - log.Data.CommittedChunks[i] = CommittedChunks[i]; - } - TString commitedChunksList = TString::Uninitialized(sizeof(log)); - memcpy(commitedChunksList.Detach(), &log, sizeof(log)); - NPDisk::TCommitRecord commitRecord; - commitRecord.IsStartingPoint = true; - SendEvLog(ctx, commitRecord, commitedChunksList); - } - break; - } - case 40: - { - auto* ev = Event->Get<NPDisk::TEvLogResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); - - if (DeletedChunks < ChunksToReserve / 2) { - NPDisk::TCommitRecord commitRecord; - commitRecord.FirstLsnToKeep = 1 + ReleaseLsnStepSize * (DeletedChunks + 1); - Y_VERIFY(commitRecord.FirstLsnToKeep <= LogRecordsToWrite + 1); - TLogRecAboutChunks log; - log.Type = EDeleteChunk; - log.Data.DeletedChunk = CommittedChunks.back(); - Ctest << "MyNum# " << MyNum << " Delete chunk# " << CommittedChunks.back() << Endl; - TString deleteChunkLog = TString::Uninitialized(sizeof(log)); - memcpy(deleteChunkLog.Detach(), &log, sizeof(log)); - commitRecord.DeleteChunks.push_back(CommittedChunks.back()); - CommittedChunks.pop_back(); - - SendEvLog(ctx, commitRecord, deleteChunkLog); - ++DeletedChunks; - TestStep -= 10; - } else { - SignalDoneEvent(); - } - break; - } - default: - ythrow TWithBackTrace<yexception>() << "Unexpected TestStep " << TestStep << Endl; - break; - } - TestStep += 10; - } - -public: - TTestSysLogReordering(const TIntrusivePtr<TTestConfig> &cfg) - : TBaseTest(cfg) - { - MyNum = VDiskNum.fetch_add(1); - Garbage = PrepareData(LogRecordSize); - TLogRecAboutChunks log; - log.Type = EGarbage; - Y_VERIFY(LogRecordSize >= sizeof(log)); - memcpy(Garbage.Detach(), &log, sizeof(log)); - } -}; - -class TTestSysLogReorderingLogCheck : public TBaseTest { -public: - static std::atomic<ui32> VDiskNum; - -private: - ui32 MyNum = 0; - TVDiskID VDiskID; - NPDisk::TOwner Owner; - NPDisk::TOwnerRound OwnerRound; - TVector<ui32> CommittedChunks; - TVector<ui32> DeletedChunks; // Delete CommitRecords that was written to disk - TVector<ui32> OwnedChunks; // PDisk version of which chunks is owned - - void CheckChunksNotMissed() { - std::sort(CommittedChunks.begin(), CommittedChunks.end()); - std::sort(DeletedChunks.begin(), DeletedChunks.end()); - std::sort(OwnedChunks.begin(), OwnedChunks.end()); - TVector<ui32> knownChunks(DeletedChunks.begin(), DeletedChunks.end()); - knownChunks.insert(knownChunks.end(), OwnedChunks.begin(), OwnedChunks.end()); - - std::sort(knownChunks.begin(), knownChunks.end()); - - TStringStream str; - str << "MyNum# " << MyNum << " "; - str << Endl; - str << "CommittedChunks# "; - NPDisk::TChunkIdFormatter(str).PrintBracedChunksList(CommittedChunks); - str << Endl; - str << "OwnedChunks# "; - NPDisk::TChunkIdFormatter(str).PrintBracedChunksList(OwnedChunks); - str << Endl; - str << "DeletedChunks# "; - NPDisk::TChunkIdFormatter(str).PrintBracedChunksList(DeletedChunks); - str << Endl; - Ctest << str.Str(); - - ASSERT_YTHROW(CommittedChunks == knownChunks, "Some chunks missed " << str.Str()); - } - - void TestFSM(const TActorContext &ctx) { - VERBOSE_COUT("Test step " << TestStep); - switch (TestStep) { - case 0: - VERBOSE_COUT(" Sending TEvInit"); - VDiskID.GroupID = MyNum; - ctx.Send(Yard, new NPDisk::TEvYardInit(2, VDiskID, *PDiskGuid)); - break; - case 10: - { - auto* ev = Event->Get<NPDisk::TEvYardInitResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); - Owner = ev->PDiskParams->Owner; - OwnerRound = ev->PDiskParams->OwnerRound; - OwnedChunks = ev->OwnedChunks; - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0})); - break; - } - case 20: - { - auto* ev = Event->Get<NPDisk::TEvReadLogResult>(); - ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); - ASSERT_YTHROW(ev->IsEndOfLog, ev->ToString()); - - for (const NKikimr::NPDisk::TLogRecord &logRec : LastResponse.LogRecords) { - auto *log = reinterpret_cast<const TTestSysLogReordering::TLogRecAboutChunks*>(logRec.Data.data()); - switch (log->Type) { - case TTestSysLogReordering::EGarbage: - break; - case TTestSysLogReordering::ECommittedChunks: - for (ui32 i = 0; i < TTestSysLogReordering::ChunksToReserve; ++i) { - const ui32 chunk = log->Data.CommittedChunks[i]; - CommittedChunks.push_back(chunk); - } - break; - case TTestSysLogReordering::EDeleteChunk: - const ui32 chunk = log->Data.DeletedChunk; - DeletedChunks.push_back(chunk); - break; - } - } - - CheckChunksNotMissed(); - SignalDoneEvent(); - break; - } - case 30: - { - break; - } - default: - ythrow TWithBackTrace<yexception>() << "Unexpected TestStep " << TestStep << Endl; - break; - } - TestStep += 10; - } - -public: - TTestSysLogReorderingLogCheck(const TIntrusivePtr<TTestConfig> &cfg) - : TBaseTest(cfg) - { - MyNum = VDiskNum.fetch_add(1); - } -}; - + }; +#pragma pack(pop) + + ui32 MyNum = 0; + TVDiskID VDiskID; + NPDisk::TOwner Owner; + NPDisk::TOwnerRound OwnerRound; + TVector<ui32> CommittedChunks; + const ui32 LogRecordSize = 2000; + const ui32 LogRecordsToWrite = 2000; + const ui32 ReleaseLsnStepSize = LogRecordsToWrite / ChunksToReserve; + ui32 LogRecordsWritten = 0; + ui32 DeletedChunks = 0; + TString Garbage; + ui32 Lsn = 0; // First written Lsn will be 1, last EvLog with data will be LogRecordsToWrite + + TLsnSeg GenLsnSeg() { + ++Lsn; + return {Lsn, Lsn}; + } + + void SendEvLog(const TActorContext& ctx, TMaybe<NPDisk::TCommitRecord> commit, TString data) { + if (commit) { + ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, *commit, data, GenLsnSeg(), nullptr)); + } else { + ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 0, data, GenLsnSeg(), nullptr)); + } + } + + void TestFSM(const TActorContext &ctx) { + VERBOSE_COUT("Test step " << TestStep); + switch (TestStep) { + case 0: + VERBOSE_COUT(" Sending TEvInit"); + VDiskID.GroupID = MyNum; + ctx.Send(Yard, new NPDisk::TEvYardInit(2, VDiskID, *PDiskGuid)); + break; + case 10: + { + auto* ev = Event->Get<NPDisk::TEvYardInitResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); + Owner = ev->PDiskParams->Owner; + OwnerRound = ev->PDiskParams->OwnerRound; + ctx.Send(Yard, new NPDisk::TEvChunkReserve(Owner, OwnerRound, ChunksToReserve)); + break; + } + case 20: + { + auto* ev = Event->Get<NPDisk::TEvChunkReserveResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); + ASSERT_YTHROW(ev->ChunkIds.size() == ChunksToReserve, + "Unexpected ChunkIds.size() == " << ev->ChunkIds.size()); + VERBOSE_COUT(" Sending TEvLog to commit"); + + CommittedChunks = ev->ChunkIds; + + NPDisk::TCommitRecord commitRecord; + commitRecord.CommitChunks = ev->ChunkIds; + commitRecord.IsStartingPoint = true; + SendEvLog(ctx, commitRecord, {}); + break; + } + case 30: + { + auto* ev = Event->Get<NPDisk::TEvLogResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); + VERBOSE_COUT(" Sending TEvLog to commit"); + ++LogRecordsWritten; + + if (LogRecordsWritten < LogRecordsToWrite) { + SendEvLog(ctx, {}, Garbage); + TestStep -= 10; + } else { + TLogRecAboutChunks log; + log.Type = ECommittedChunks; + for (ui32 i = 0; i < CommittedChunks.size(); ++i) { + log.Data.CommittedChunks[i] = CommittedChunks[i]; + } + TString commitedChunksList = TString::Uninitialized(sizeof(log)); + memcpy(commitedChunksList.Detach(), &log, sizeof(log)); + NPDisk::TCommitRecord commitRecord; + commitRecord.IsStartingPoint = true; + SendEvLog(ctx, commitRecord, commitedChunksList); + } + break; + } + case 40: + { + auto* ev = Event->Get<NPDisk::TEvLogResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); + + if (DeletedChunks < ChunksToReserve / 2) { + NPDisk::TCommitRecord commitRecord; + commitRecord.FirstLsnToKeep = 1 + ReleaseLsnStepSize * (DeletedChunks + 1); + Y_VERIFY(commitRecord.FirstLsnToKeep <= LogRecordsToWrite + 1); + TLogRecAboutChunks log; + log.Type = EDeleteChunk; + log.Data.DeletedChunk = CommittedChunks.back(); + Ctest << "MyNum# " << MyNum << " Delete chunk# " << CommittedChunks.back() << Endl; + TString deleteChunkLog = TString::Uninitialized(sizeof(log)); + memcpy(deleteChunkLog.Detach(), &log, sizeof(log)); + commitRecord.DeleteChunks.push_back(CommittedChunks.back()); + CommittedChunks.pop_back(); + + SendEvLog(ctx, commitRecord, deleteChunkLog); + ++DeletedChunks; + TestStep -= 10; + } else { + SignalDoneEvent(); + } + break; + } + default: + ythrow TWithBackTrace<yexception>() << "Unexpected TestStep " << TestStep << Endl; + break; + } + TestStep += 10; + } + +public: + TTestSysLogReordering(const TIntrusivePtr<TTestConfig> &cfg) + : TBaseTest(cfg) + { + MyNum = VDiskNum.fetch_add(1); + Garbage = PrepareData(LogRecordSize); + TLogRecAboutChunks log; + log.Type = EGarbage; + Y_VERIFY(LogRecordSize >= sizeof(log)); + memcpy(Garbage.Detach(), &log, sizeof(log)); + } +}; + +class TTestSysLogReorderingLogCheck : public TBaseTest { +public: + static std::atomic<ui32> VDiskNum; + +private: + ui32 MyNum = 0; + TVDiskID VDiskID; + NPDisk::TOwner Owner; + NPDisk::TOwnerRound OwnerRound; + TVector<ui32> CommittedChunks; + TVector<ui32> DeletedChunks; // Delete CommitRecords that was written to disk + TVector<ui32> OwnedChunks; // PDisk version of which chunks is owned + + void CheckChunksNotMissed() { + std::sort(CommittedChunks.begin(), CommittedChunks.end()); + std::sort(DeletedChunks.begin(), DeletedChunks.end()); + std::sort(OwnedChunks.begin(), OwnedChunks.end()); + TVector<ui32> knownChunks(DeletedChunks.begin(), DeletedChunks.end()); + knownChunks.insert(knownChunks.end(), OwnedChunks.begin(), OwnedChunks.end()); + + std::sort(knownChunks.begin(), knownChunks.end()); + + TStringStream str; + str << "MyNum# " << MyNum << " "; + str << Endl; + str << "CommittedChunks# "; + NPDisk::TChunkIdFormatter(str).PrintBracedChunksList(CommittedChunks); + str << Endl; + str << "OwnedChunks# "; + NPDisk::TChunkIdFormatter(str).PrintBracedChunksList(OwnedChunks); + str << Endl; + str << "DeletedChunks# "; + NPDisk::TChunkIdFormatter(str).PrintBracedChunksList(DeletedChunks); + str << Endl; + Ctest << str.Str(); + + ASSERT_YTHROW(CommittedChunks == knownChunks, "Some chunks missed " << str.Str()); + } + + void TestFSM(const TActorContext &ctx) { + VERBOSE_COUT("Test step " << TestStep); + switch (TestStep) { + case 0: + VERBOSE_COUT(" Sending TEvInit"); + VDiskID.GroupID = MyNum; + ctx.Send(Yard, new NPDisk::TEvYardInit(2, VDiskID, *PDiskGuid)); + break; + case 10: + { + auto* ev = Event->Get<NPDisk::TEvYardInitResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); + Owner = ev->PDiskParams->Owner; + OwnerRound = ev->PDiskParams->OwnerRound; + OwnedChunks = ev->OwnedChunks; + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound, NPDisk::TLogPosition{0, 0})); + break; + } + case 20: + { + auto* ev = Event->Get<NPDisk::TEvReadLogResult>(); + ASSERT_YTHROW(ev->Status == NKikimrProto::OK, ev->ToString()); + ASSERT_YTHROW(ev->IsEndOfLog, ev->ToString()); + + for (const NKikimr::NPDisk::TLogRecord &logRec : LastResponse.LogRecords) { + auto *log = reinterpret_cast<const TTestSysLogReordering::TLogRecAboutChunks*>(logRec.Data.data()); + switch (log->Type) { + case TTestSysLogReordering::EGarbage: + break; + case TTestSysLogReordering::ECommittedChunks: + for (ui32 i = 0; i < TTestSysLogReordering::ChunksToReserve; ++i) { + const ui32 chunk = log->Data.CommittedChunks[i]; + CommittedChunks.push_back(chunk); + } + break; + case TTestSysLogReordering::EDeleteChunk: + const ui32 chunk = log->Data.DeletedChunk; + DeletedChunks.push_back(chunk); + break; + } + } + + CheckChunksNotMissed(); + SignalDoneEvent(); + break; + } + case 30: + { + break; + } + default: + ythrow TWithBackTrace<yexception>() << "Unexpected TestStep " << TestStep << Endl; + break; + } + TestStep += 10; + } + +public: + TTestSysLogReorderingLogCheck(const TIntrusivePtr<TTestConfig> &cfg) + : TBaseTest(cfg) + { + MyNum = VDiskNum.fetch_add(1); + } +}; + template <ui32 Size> class TTestCommitChunks : public TBaseTest { NPDisk::TOwner Owner; @@ -1981,7 +1981,7 @@ public: {} }; -class TTestCheckLog : public TBaseTest { +class TTestCheckLog : public TBaseTest { NPDisk::TOwner Owner; NPDisk::TOwnerRound OwnerRound; ui32 AppendBlockSize; @@ -1992,7 +1992,7 @@ class TTestCheckLog : public TBaseTest { void TestFSM(const TActorContext &ctx); public: - TTestCheckLog(const TIntrusivePtr<TTestConfig> &cfg) + TTestCheckLog(const TIntrusivePtr<TTestConfig> &cfg) : TBaseTest(cfg) {} }; @@ -2140,107 +2140,107 @@ public: {} }; - - -class TActorTestSlayLogWriteRace final : public TCommonBaseTest { - void HandleBoot(TEvTablet::TEvBoot::TPtr &, const TActorContext &ctx) { - ctx.Send(Yard, new NPDisk::TEvYardInit(3, VDiskID, *PDiskGuid)); - } - - void Handle(NPDisk::TEvYardInitResult::TPtr &event, const TActorContext &ctx) { - auto *ev = event->Get(); - if (ev->Status != NKikimrProto::OK) { - SignalError(ev->ToString()); - return; - } - - Owner = ev->PDiskParams->Owner; - OwnerRound = ev->PDiskParams->OwnerRound; - if (FirstRound) { - for (size_t i = 0; i < LogWriteCount; ++i) { - if (i == LogWriteCount - 10) { - ctx.Send(Yard, new NPDisk::TEvSlay(VDiskID, OwnerRound + 1, 1, 1)); - } - TString data = PrepareData(842); - if (i == 0) { - NPDisk::TCommitRecord commit; - commit.IsStartingPoint = true; - ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 1, commit, data, TLsnSeg(i+1, i+1), nullptr)); - } else { - ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 1, data, TLsnSeg(i+1, i+1), nullptr)); - } - } - } else { - if (!ev->OwnedChunks.empty()) { - SignalError(ev->ToString()); - return; - } - // This sleep doesn't fix a problem with old VDisk's log enteries after slay - // Sleep(TDuration::Seconds(5)); - ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); - } - } - - void Handle(NPDisk::TEvLogResult::TPtr &event, const TActorContext &) { - auto *ev = event->Get(); - if (ev->Status != NKikimrProto::OK) { - return; - } - - RecievedLogWrites += ev->Results.size(); - } - - void Handle(NPDisk::TEvReadLogResult::TPtr &event, const TActorContext &) { - auto *ev = event->Get(); - if (ev->Status != NKikimrProto::OK) { - SignalError(ev->ToString()); - return; - } - - if (ev->Results) { - SignalError("Non empty result for newly created Owner" + ev->ToString()); - } else { - SignalDoneEvent(); - } - } - - void Handle(NPDisk::TEvSlayResult::TPtr &event, const TActorContext &ctx) { - auto *ev = event->Get(); - if (ev->Status != NKikimrProto::OK) { - SignalError(ev->ToString()); - return; - } - FirstRound = false; - // This sleep prevents the problem - // Sleep(TDuration::Seconds(5)); - ctx.Send(Yard, new NPDisk::TEvYardInit(OwnerRound + 1, VDiskID, *PDiskGuid)); - } - -public: - STFUNC(StateFunc) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvTablet::TEvBoot, HandleBoot); - HFunc(NPDisk::TEvYardInitResult, Handle); - HFunc(NPDisk::TEvLogResult, Handle); - HFunc(NPDisk::TEvReadLogResult, Handle); - HFunc(NPDisk::TEvSlayResult, Handle); - } - } - - TActorTestSlayLogWriteRace(const TIntrusivePtr<TTestConfig> &cfg) - : TCommonBaseTest(cfg) - { - Become(&TActorTestSlayLogWriteRace::StateFunc); - } - -private: - NPDisk::TOwner Owner; - NPDisk::TOwnerRound OwnerRound; - const size_t LogWriteCount = 1e5; - size_t RecievedLogWrites = 0; - bool FirstRound = true; -}; - + + +class TActorTestSlayLogWriteRace final : public TCommonBaseTest { + void HandleBoot(TEvTablet::TEvBoot::TPtr &, const TActorContext &ctx) { + ctx.Send(Yard, new NPDisk::TEvYardInit(3, VDiskID, *PDiskGuid)); + } + + void Handle(NPDisk::TEvYardInitResult::TPtr &event, const TActorContext &ctx) { + auto *ev = event->Get(); + if (ev->Status != NKikimrProto::OK) { + SignalError(ev->ToString()); + return; + } + + Owner = ev->PDiskParams->Owner; + OwnerRound = ev->PDiskParams->OwnerRound; + if (FirstRound) { + for (size_t i = 0; i < LogWriteCount; ++i) { + if (i == LogWriteCount - 10) { + ctx.Send(Yard, new NPDisk::TEvSlay(VDiskID, OwnerRound + 1, 1, 1)); + } + TString data = PrepareData(842); + if (i == 0) { + NPDisk::TCommitRecord commit; + commit.IsStartingPoint = true; + ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 1, commit, data, TLsnSeg(i+1, i+1), nullptr)); + } else { + ctx.Send(Yard, new NPDisk::TEvLog(Owner, OwnerRound, 1, data, TLsnSeg(i+1, i+1), nullptr)); + } + } + } else { + if (!ev->OwnedChunks.empty()) { + SignalError(ev->ToString()); + return; + } + // This sleep doesn't fix a problem with old VDisk's log enteries after slay + // Sleep(TDuration::Seconds(5)); + ctx.Send(Yard, new NPDisk::TEvReadLog(Owner, OwnerRound)); + } + } + + void Handle(NPDisk::TEvLogResult::TPtr &event, const TActorContext &) { + auto *ev = event->Get(); + if (ev->Status != NKikimrProto::OK) { + return; + } + + RecievedLogWrites += ev->Results.size(); + } + + void Handle(NPDisk::TEvReadLogResult::TPtr &event, const TActorContext &) { + auto *ev = event->Get(); + if (ev->Status != NKikimrProto::OK) { + SignalError(ev->ToString()); + return; + } + + if (ev->Results) { + SignalError("Non empty result for newly created Owner" + ev->ToString()); + } else { + SignalDoneEvent(); + } + } + + void Handle(NPDisk::TEvSlayResult::TPtr &event, const TActorContext &ctx) { + auto *ev = event->Get(); + if (ev->Status != NKikimrProto::OK) { + SignalError(ev->ToString()); + return; + } + FirstRound = false; + // This sleep prevents the problem + // Sleep(TDuration::Seconds(5)); + ctx.Send(Yard, new NPDisk::TEvYardInit(OwnerRound + 1, VDiskID, *PDiskGuid)); + } + +public: + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + HFunc(TEvTablet::TEvBoot, HandleBoot); + HFunc(NPDisk::TEvYardInitResult, Handle); + HFunc(NPDisk::TEvLogResult, Handle); + HFunc(NPDisk::TEvReadLogResult, Handle); + HFunc(NPDisk::TEvSlayResult, Handle); + } + } + + TActorTestSlayLogWriteRace(const TIntrusivePtr<TTestConfig> &cfg) + : TCommonBaseTest(cfg) + { + Become(&TActorTestSlayLogWriteRace::StateFunc); + } + +private: + NPDisk::TOwner Owner; + NPDisk::TOwnerRound OwnerRound; + const size_t LogWriteCount = 1e5; + size_t RecievedLogWrites = 0; + bool FirstRound = true; +}; + class TTestDestructionWhileWritingChunk : public TBaseTest { NPDisk::TOwner Owner; NPDisk::TOwnerRound OwnerRound; diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_base_test.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_base_test.h index f24cec60e0..e6c75e6000 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_base_test.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_base_test.h @@ -7,63 +7,63 @@ namespace NKikimr { -class TCommonBaseTest : public TActor<TCommonBaseTest> { +class TCommonBaseTest : public TActor<TCommonBaseTest> { +protected: + void SignalDoneEvent() { + AtomicIncrement(*DoneCounter); + DoneEvent->Signal(); + } + + void SignalError(const TString& error) { + SignalExceptionEvent(TWithBackTrace<yexception>() << error); + } + + void SignalExceptionEvent(const yexception& ex) { + *LastException = ex; + AtomicSet(*IsLastExceptionSet, 1); + SignalDoneEvent(); + } + +public: + TCommonBaseTest(const TIntrusivePtr<TTestConfig> &cfg) + : TActor(&TThis::FakeState) + , Yard(cfg->YardActorID) + , VDiskID(cfg->VDiskID) + , TestStep(0) + {} + + void Init(TAtomic *doneCounter, TSystemEvent *doneEvent, yexception *lastException, TAtomic *isLastExceptionSet, + ui64 *pDiskGuid) { + DoneCounter = doneCounter; + DoneEvent = doneEvent; + LastException = lastException; + IsLastExceptionSet = isLastExceptionSet; + PDiskGuid = pDiskGuid; + } + + STFUNC(FakeState) { + Y_UNUSED(ev); + Y_UNUSED(ctx); + Y_FAIL("This class cannot be used directly. For tests inherit from it"); + } + protected: - void SignalDoneEvent() { - AtomicIncrement(*DoneCounter); - DoneEvent->Signal(); - } - - void SignalError(const TString& error) { - SignalExceptionEvent(TWithBackTrace<yexception>() << error); - } - - void SignalExceptionEvent(const yexception& ex) { - *LastException = ex; - AtomicSet(*IsLastExceptionSet, 1); - SignalDoneEvent(); - } - -public: - TCommonBaseTest(const TIntrusivePtr<TTestConfig> &cfg) - : TActor(&TThis::FakeState) - , Yard(cfg->YardActorID) - , VDiskID(cfg->VDiskID) - , TestStep(0) - {} - - void Init(TAtomic *doneCounter, TSystemEvent *doneEvent, yexception *lastException, TAtomic *isLastExceptionSet, - ui64 *pDiskGuid) { - DoneCounter = doneCounter; - DoneEvent = doneEvent; - LastException = lastException; - IsLastExceptionSet = isLastExceptionSet; - PDiskGuid = pDiskGuid; - } - - STFUNC(FakeState) { - Y_UNUSED(ev); - Y_UNUSED(ctx); - Y_FAIL("This class cannot be used directly. For tests inherit from it"); - } - -protected: const TActorId Yard; - const TVDiskID VDiskID; - int TestStep; - - TAtomic *DoneCounter = nullptr; - TSystemEvent *DoneEvent = nullptr; - yexception *LastException = nullptr; - TAtomic *IsLastExceptionSet = nullptr; - ui64 *PDiskGuid = nullptr; -}; - -class TBaseTest : public TCommonBaseTest { - -protected: + const TVDiskID VDiskID; + int TestStep; + + TAtomic *DoneCounter = nullptr; + TSystemEvent *DoneEvent = nullptr; + yexception *LastException = nullptr; + TAtomic *IsLastExceptionSet = nullptr; + ui64 *PDiskGuid = nullptr; +}; + +class TBaseTest : public TCommonBaseTest { + +protected: struct TResponseData { - TMap<TLogSignature, NPDisk::TLogRecord> StartingPoints; + TMap<TLogSignature, NPDisk::TLogRecord> StartingPoints; TBufferWithGaps Data; TVector<NPDisk::TLogRecord> LogRecords; NPDisk::TEvLogResult::TResults LogResults; @@ -71,7 +71,7 @@ protected: TVector<TChunkIdx> OwnedChunks; void *Cookie; - NPDisk::TLogPosition NextPosition; + NPDisk::TLogPosition NextPosition; ui32 ChunkIdx; ui32 Offset; ui32 ChunkSize; @@ -83,7 +83,7 @@ protected: NKikimrProto::EReplyStatus Status; NPDisk::TOwner Owner; NPDisk::TOwnerRound OwnerRound; - TLogSignature Signature; + TLogSignature Signature; bool IsEndOfLog; NPDisk::TStatusFlags StatusFlags; NMon::TEvHttpInfoRes *HttpResult; @@ -104,7 +104,7 @@ protected: OwnedChunks.clear(); Cookie = (void*)((ui64)-1); - NextPosition = NPDisk::TLogPosition{0, 0}; + NextPosition = NPDisk::TLogPosition{0, 0}; ChunkIdx = 0; Offset = (ui32)-1; ChunkSize = (ui32)-1; @@ -123,7 +123,7 @@ protected: } void Check() { - for (TMap<TLogSignature, NPDisk::TLogRecord>::iterator it = StartingPoints.begin(); + for (TMap<TLogSignature, NPDisk::TLogRecord>::iterator it = StartingPoints.begin(); it != StartingPoints.end(); ++it) { REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&it->first, sizeof(it->first)); it->second.Verify(); @@ -164,7 +164,7 @@ protected: }; TResponseData LastResponse; - IEventHandle *Event; + IEventHandle *Event; virtual void TestFSM(const TActorContext &ctx) = 0; @@ -174,8 +174,8 @@ protected: try { TestFSM(ctx); LastResponse.Clear(); - } catch (const yexception& ex) { - SignalExceptionEvent(ex); + } catch (const yexception& ex) { + SignalExceptionEvent(ex); } } @@ -213,10 +213,10 @@ protected: void Handle(NPDisk::TEvLogResult::TPtr &ev, const TActorContext &ctx) { NPDisk::TEvLogResult &result = *(ev->Get()); - // Print before move - VERBOSE_COUT("Got " << result.ToString()); + // Print before move + VERBOSE_COUT("Got " << result.ToString()); LastResponse.Status = result.Status; - LastResponse.LogResults = std::move(result.Results); + LastResponse.LogResults = std::move(result.Results); LastResponse.EventType = (TEvBlobStorage::EEv)result.Type(); LastResponse.StatusFlags = result.StatusFlags; ActTestFSM(ctx); @@ -346,14 +346,14 @@ protected: public: TBaseTest(const TIntrusivePtr<TTestConfig> &cfg) - : TCommonBaseTest(cfg) - { - Become(&TBaseTest::StateRegister); + : TCommonBaseTest(cfg) + { + Become(&TBaseTest::StateRegister); } - + STFUNC(StateRegister) { - Event = ev.Get(); + Event = ev.Get(); switch (ev->GetTypeRewrite()) { HFunc(NPDisk::TEvYardInitResult, Handle); HFunc(NPDisk::TEvCheckSpaceResult, Handle); diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_context.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_context.h index 042f8aac0f..24000dcbd0 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_context.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_context.h @@ -2,7 +2,7 @@ #include "defs.h" #include <ydb/library/pdisk_io/sector_map.h> - + #include <util/folder/tempdir.h> #include <util/folder/dirut.h> @@ -18,7 +18,7 @@ public: TTestContext(bool makeTempDir, bool useSectorMap) { if (makeTempDir) { TempDir.Reset(new TTempDir); - Dir = TempDir->Name().c_str(); + Dir = TempDir->Name().c_str(); } if (useSectorMap) { SectorMap = new NPDisk::TSectorMap; diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_defs.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_defs.h index 2e0117cd47..586be4e78e 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_defs.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_defs.h @@ -1,11 +1,11 @@ #pragma once #include "defs.h" -#include <util/stream/null.h> +#include <util/stream/null.h> #include <util/system/valgrind.h> -#include <cassert> - +#include <cassert> + namespace NKikimr { #define ENABLE_SPEED_TESTS 0 @@ -18,11 +18,11 @@ constexpr ui32 TEST_TIMEOUT = NSan::PlainOrUnderSanitizer( ); constexpr ui32 MIN_CHUNK_SIZE = 1620 << 10; -#ifdef NDEBUG -constexpr bool IsLowVerbose = false; -#else -constexpr bool IsLowVerbose = true; -#endif +#ifdef NDEBUG +constexpr bool IsLowVerbose = false; +#else +constexpr bool IsLowVerbose = true; +#endif constexpr bool IsVerbose = false; constexpr bool IsMonitoringEnabled = false; constexpr bool IsRealBlockDevice = false; @@ -53,14 +53,14 @@ do { \ ui32 eventGroup = ((int)LastResponse.EventType & 0xffff) >> 9; \ ui32 eventId = ((int)LastResponse.EventType & 0x1ff); \ ASSERT_YTHROW(LastResponse.EventType == TEvBlobStorage::msg, \ - "Unexpected message in space " << eventSpace << ": 512 * " << eventGroup << " + " << eventId << "\n"); \ + "Unexpected message in space " << eventSpace << ": 512 * " << eventGroup << " + " << eventId << "\n"); \ ASSERT_YTHROW(LastResponse.Status == NKikimrProto::st, \ - "Unexpected status, got# " << StatusToString(LastResponse.Status) << \ - " expect# " << StatusToString(NKikimrProto::st) << "\n"); \ + "Unexpected status, got# " << StatusToString(LastResponse.Status) << \ + " expect# " << StatusToString(NKikimrProto::st) << "\n"); \ } while(false) -#define Ctest (IsVerbose ? Cerr : Cnull) - +#define Ctest (IsVerbose ? Cerr : Cnull) + #define VERBOSE_COUT(str) \ do { \ if (IsVerbose) { \ diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_helpers.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_helpers.cpp index 153766e6c6..9fe03f5808 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_helpers.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_helpers.cpp @@ -1,6 +1,6 @@ #include <ydb/library/pdisk_io/buffers.h> -#include "blobstorage_pdisk_actorsystem_creator.h" -#include "blobstorage_pdisk_ut.h" +#include "blobstorage_pdisk_actorsystem_creator.h" +#include "blobstorage_pdisk_ut.h" #include "blobstorage_pdisk_ut_helpers.h" #include <ydb/core/blobstorage/crypto/default.h> @@ -12,9 +12,9 @@ namespace NKikimr { TString PrepareData(ui32 size, ui32 flavor) { - TString data = TString::Uninitialized(size); - for (ui32 i = 0; i < size; ++i) { - data[i] = '0' + (i + size + flavor) % 8; + TString data = TString::Uninitialized(size); + for (ui32 i = 0; i < size; ++i) { + data[i] = '0' + (i + size + flavor) % 8; } return data; } @@ -37,8 +37,8 @@ TString MakePDiskPath(const char *dir) { } } -void FormatPDiskForTest(TString path, ui64 guid, ui32 chunkSize, ui64 diskSize, bool isErasureEncodeUserLog, - TIntrusivePtr<NPDisk::TSectorMap> sectorMap) { +void FormatPDiskForTest(TString path, ui64 guid, ui32 chunkSize, ui64 diskSize, bool isErasureEncodeUserLog, + TIntrusivePtr<NPDisk::TSectorMap> sectorMap) { NPDisk::TKey chunkKey; NPDisk::TKey logKey; NPDisk::TKey sysLogKey; @@ -50,10 +50,10 @@ void FormatPDiskForTest(TString path, ui64 guid, ui32 chunkSize, ui64 diskSize, NPDisk::YdbDefaultPDiskSequence, "Info", isErasureEncodeUserLog, false, sectorMap); } -void FormatPDiskForTest(TString path, ui64 guid, ui32 chunkSize, bool isErasureEncodeUserLog, +void FormatPDiskForTest(TString path, ui64 guid, ui32 chunkSize, bool isErasureEncodeUserLog, TIntrusivePtr<NPDisk::TSectorMap> sectorMap) { ui64 diskSizeHeuristic = (ui64)chunkSize * 1000; - FormatPDiskForTest(path, guid, chunkSize, diskSizeHeuristic, isErasureEncodeUserLog, sectorMap); + FormatPDiskForTest(path, guid, chunkSize, diskSizeHeuristic, isErasureEncodeUserLog, sectorMap); } void ReadPdiskFile(TTestContext *tc, ui32 dataSize, NPDisk::TAlignedData &outData) { @@ -62,9 +62,9 @@ void ReadPdiskFile(TTestContext *tc, ui32 dataSize, NPDisk::TAlignedData &outDat { TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; THolder<TPDiskMon> mon(new TPDiskMon(counters, 0, nullptr)); - TActorSystemCreator creator; + TActorSystemCreator creator; THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDeviceWithDefaults(path, *mon, - NPDisk::TDeviceMode::LockFile, tc->SectorMap, creator.GetActorSystem())); + NPDisk::TDeviceMode::LockFile, tc->SectorMap, creator.GetActorSystem())); VERBOSE_COUT(" Performing Pread of " << dataSize); device->PreadSync(outData.Get(), dataSize, 0, NPDisk::TReqId(NPDisk::TReqId::Test4, 0), {}); } @@ -99,9 +99,9 @@ ui64 DestroyLastSectors(TTestContext *tc, NPDisk::TAlignedData &dataBefore, NPDi THolder<TPDiskMon> mon(new TPDiskMon(counters, 0, nullptr)); NPDisk::TAlignedData buffer(sectorSize * count); memset(buffer.Get(), 0xf, sectorSize * count); - TActorSystemCreator creator; + TActorSystemCreator creator; THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDeviceWithDefaults(path, *mon, - NPDisk::TDeviceMode::LockFile, tc->SectorMap, creator.GetActorSystem())); + NPDisk::TDeviceMode::LockFile, tc->SectorMap, creator.GetActorSystem())); REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(buffer.Get(), sectorSize * count); device->PwriteSync(buffer.Get(), sectorSize * count, offset, NPDisk::TReqId(NPDisk::TReqId::Test4, 0), {}); } @@ -136,9 +136,9 @@ ui64 RestoreLastSectors(TTestContext *tc, NPDisk::TAlignedData &dataBefore, NPDi { TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; THolder<TPDiskMon> mon(new TPDiskMon(counters, 0, nullptr)); - TActorSystemCreator creator; + TActorSystemCreator creator; THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDeviceWithDefaults(path, *mon, - NPDisk::TDeviceMode::LockFile, tc->SectorMap, creator.GetActorSystem())); + NPDisk::TDeviceMode::LockFile, tc->SectorMap, creator.GetActorSystem())); VERBOSE_COUT("Offset = " << offset << " sectorIdx = " << offset/sectorSize); REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(dataBefore.Get() + offset, sectorSize * count); device->PwriteSync(dataBefore.Get() + offset, sectorSize * count, offset, NPDisk::TReqId(NPDisk::TReqId::Test4, 0), {}); @@ -166,18 +166,18 @@ void FillDeviceWithPattern(TTestContext *tc, ui64 chunkSize, ui64 pattern) { } const ui32 formatSectorsSize = NPDisk::FormatSectorSize * NPDisk::ReplicationFactor; - NPDisk::TAlignedData data(formatSectorsSize); + NPDisk::TAlignedData data(formatSectorsSize); + + Y_VERIFY(data.Size() % sizeof(ui64) == 0); + Fill((ui64*)data.Get(), (ui64*)(data.Get() + data.Size()), pattern); - Y_VERIFY(data.Size() % sizeof(ui64) == 0); - Fill((ui64*)data.Get(), (ui64*)(data.Get() + data.Size()), pattern); - { TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; THolder<TPDiskMon> mon(new TPDiskMon(counters, 0, nullptr)); - TActorSystemCreator creator; + TActorSystemCreator creator; THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDeviceWithDefaults(path, *mon, - NPDisk::TDeviceMode::LockFile, tc->SectorMap, creator.GetActorSystem())); - VERBOSE_COUT("Filling first " << data.Size() << "bytes of device with data"); + NPDisk::TDeviceMode::LockFile, tc->SectorMap, creator.GetActorSystem())); + VERBOSE_COUT("Filling first " << data.Size() << "bytes of device with data"); device->PwriteSync(data.Get(), data.Size(), 0, NPDisk::TReqId(NPDisk::TReqId::Test4, 0), {}); } VERBOSE_COUT("Done"); @@ -186,8 +186,8 @@ void FillDeviceWithPattern(TTestContext *tc, ui64 chunkSize, ui64 pattern) { void FillDeviceWithZeroes(TTestContext *tc, ui64 chunkSize) { FillDeviceWithPattern(tc, chunkSize, 0); -} - +} + void WriteSectors(TTestContext *tc, NPDisk::TAlignedData &dataAfter, ui64 firstSector, ui32 count) { VERBOSE_COUT("WriteSectors: restoring " << count << " sectors."); TString path = EnsurePDiskExists(tc); @@ -197,9 +197,9 @@ void WriteSectors(TTestContext *tc, NPDisk::TAlignedData &dataAfter, ui64 firstS { TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; THolder<TPDiskMon> mon(new TPDiskMon(counters, 0, nullptr)); - TActorSystemCreator creator; + TActorSystemCreator creator; THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDeviceWithDefaults(path, *mon, - NPDisk::TDeviceMode::LockFile, tc->SectorMap, creator.GetActorSystem())); + NPDisk::TDeviceMode::LockFile, tc->SectorMap, creator.GetActorSystem())); VERBOSE_COUT("Offset = " << offset << " sectorIdx = " << offset/sectorSize); device->PwriteSync(dataAfter.Get() + offset, sectorSize * count, offset, NPDisk::TReqId(NPDisk::TReqId::Test4, 0), {}); } @@ -207,7 +207,7 @@ void WriteSectors(TTestContext *tc, NPDisk::TAlignedData &dataAfter, ui64 firstS return; } -void DestroySectors(TTestContext *tc, const NPDisk::TAlignedData &dataAfter, +void DestroySectors(TTestContext *tc, const NPDisk::TAlignedData &dataAfter, ui32 dataSize, ui64 firstSector, ui32 period) { VERBOSE_COUT("DestroySectors: destroying " << firstSector << " + k * " << period << " sectors."); TString path = EnsurePDiskExists(tc); @@ -221,9 +221,9 @@ void DestroySectors(TTestContext *tc, const NPDisk::TAlignedData &dataAfter, for (ui64 i = firstSector; i < dataSize / sectorSize; i += period) { memset(buffer.Get() + i * sectorSize, 0xf, sectorSize); } - TActorSystemCreator creator; + TActorSystemCreator creator; THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDeviceWithDefaults(path, *mon, - NPDisk::TDeviceMode::LockFile, tc->SectorMap, creator.GetActorSystem())); + NPDisk::TDeviceMode::LockFile, tc->SectorMap, creator.GetActorSystem())); REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(buffer.Get(), buffer.Size()); device->PwriteSync(buffer.Get(), buffer.Size(), 0, NPDisk::TReqId(NPDisk::TReqId::Test4, 0), {}); } diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_helpers.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_helpers.h index fe372f91b8..c5d606306c 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_helpers.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_helpers.h @@ -5,7 +5,7 @@ #include <ydb/library/pdisk_io/buffers.h> #include <ydb/core/protos/base.pb.h> - + namespace NKikimr { TString EnsurePDiskExists(TTestContext *tc); @@ -13,10 +13,10 @@ TString PrepareData(ui32 size, ui32 flavor = 0); TString StatusToString(const NKikimrProto::EReplyStatus status); TString MakeDatabasePath(const char *dir); TString MakePDiskPath(const char *dir); -void FormatPDiskForTest(TString path, ui64 guid, ui32 chunkSize, ui64 diskSize, bool isErasureEncodeUserLog, - TIntrusivePtr<NPDisk::TSectorMap> sectorMap); -void FormatPDiskForTest(TString path, ui64 guid, ui32 chunkSize, bool isErasureEncodeUserLog, - TIntrusivePtr<NPDisk::TSectorMap> sectorMap); +void FormatPDiskForTest(TString path, ui64 guid, ui32 chunkSize, ui64 diskSize, bool isErasureEncodeUserLog, + TIntrusivePtr<NPDisk::TSectorMap> sectorMap); +void FormatPDiskForTest(TString path, ui64 guid, ui32 chunkSize, bool isErasureEncodeUserLog, + TIntrusivePtr<NPDisk::TSectorMap> sectorMap); void ReadPdiskFile(TTestContext *tc, ui32 dataSize, NPDisk::TAlignedData &outData); i64 FindLastDifferingBytes(NPDisk::TAlignedData &dataBefore, NPDisk::TAlignedData &dataAfter, ui32 dataSize); @@ -27,7 +27,7 @@ ui64 RestoreLastSectors(TTestContext *tc, NPDisk::TAlignedData &dataBefore, NPDi void FillDeviceWithPattern(TTestContext *tc, ui64 chunkSize, ui64 pattern); void FillDeviceWithZeroes(TTestContext *tc, ui64 chunkSize); void WriteSectors(TTestContext *tc, NPDisk::TAlignedData &dataAfter, ui64 firstSector, ui32 count); -void DestroySectors(TTestContext *tc, const NPDisk::TAlignedData &dataAfter, +void DestroySectors(TTestContext *tc, const NPDisk::TAlignedData &dataAfter, ui32 dataSize, ui64 firstSector, ui32 period); void OutputSectorMap(NPDisk::TAlignedData &dataBefore, NPDisk::TAlignedData &dataAfter, ui32 dataSize); diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_run.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_run.cpp index dc8beddb63..5935d13229 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_run.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_run.cpp @@ -15,14 +15,14 @@ namespace NPDisk { extern const ui64 YdbDefaultPDiskSequence = 0x7e5700007e570000; } -void Run(TVector<IActor*> tests, TTestRunConfig runCfg) { +void Run(TVector<IActor*> tests, TTestRunConfig runCfg) { TTempDir tempDir; TVector<TActorId> testIds; TActorId pDiskId; TAppData appData(0, 0, 0, 0, TMap<TString, ui32>(), nullptr, nullptr, nullptr, nullptr); - auto ioContext = std::make_shared<NPDisk::TIoContextFactoryOSS>(); - appData.IoContextFactory = ioContext.get(); - + auto ioContext = std::make_shared<NPDisk::TIoContextFactoryOSS>(); + appData.IoContextFactory = ioContext.get(); + THolder<TActorSystem> actorSystem1; TIntrusivePtr<NMonitoring::TDynamicCounters> mainCounters; THolder<NActors::TMon> monitoring; @@ -32,14 +32,14 @@ void Run(TVector<IActor*> tests, TTestRunConfig runCfg) { yexception lastException; TAtomic isLastExceptionSet = 0; for (size_t i = 0; i < tests.size(); ++i) { - auto *p = static_cast<TCommonBaseTest*>(tests[i]); + auto *p = static_cast<TCommonBaseTest*>(tests[i]); p->Init(&doneCounter, &doneEvent, &lastException, &isLastExceptionSet, &runCfg.TestContext->PDiskGuid); } try { mainCounters = TIntrusivePtr<NMonitoring::TDynamicCounters>(new NMonitoring::TDynamicCounters()); - testIds.resize(runCfg.Instances); + testIds.resize(runCfg.Instances); TIntrusivePtr<TTableNameserverSetup> nameserverTable(new TTableNameserverSetup()); TPortManager pm; @@ -68,12 +68,12 @@ void Run(TVector<IActor*> tests, TTestRunConfig runCfg) { } EntropyPool().Read(&runCfg.TestContext->PDiskGuid, sizeof(runCfg.TestContext->PDiskGuid)); - if (!runCfg.IsBad) { - FormatPDiskForTest(dataPath, runCfg.TestContext->PDiskGuid, runCfg.ChunkSize, - runCfg.IsErasureEncodeUserLog, runCfg.TestContext->SectorMap); + if (!runCfg.IsBad) { + FormatPDiskForTest(dataPath, runCfg.TestContext->PDiskGuid, runCfg.ChunkSize, + runCfg.IsErasureEncodeUserLog, runCfg.TestContext->SectorMap); } } else { - Y_VERIFY(!runCfg.IsBad); + Y_VERIFY(!runCfg.IsBad); } pDiskId = MakeBlobStoragePDiskID(1, 1); @@ -81,16 +81,16 @@ void Run(TVector<IActor*> tests, TTestRunConfig runCfg) { TIntrusivePtr<TPDiskConfig> pDiskConfig = new TPDiskConfig(dataPath, runCfg.TestContext->PDiskGuid, 1, pDiskCategory); pDiskConfig->GetDriveDataSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; pDiskConfig->WriteCacheSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; - pDiskConfig->ChunkSize = runCfg.ChunkSize; + pDiskConfig->ChunkSize = runCfg.ChunkSize; pDiskConfig->SectorMap = runCfg.TestContext->SectorMap; - pDiskConfig->EnableSectorEncryption = !pDiskConfig->SectorMap; - pDiskConfig->UseT1ha0HashInFooter = runCfg.UseT1ha0Hasher; + pDiskConfig->EnableSectorEncryption = !pDiskConfig->SectorMap; + pDiskConfig->UseT1ha0HashInFooter = runCfg.UseT1ha0Hasher; TActorSetupCmd pDiskSetup(CreatePDisk(pDiskConfig.Get(), NPDisk::YdbDefaultPDiskSequence, mainCounters), TMailboxType::Revolving, 0); setup1->LocalServices.push_back(std::pair<TActorId, TActorSetupCmd>(pDiskId, pDiskSetup)); - for (ui32 i = 0; i < runCfg.Instances; ++i) { + for (ui32 i = 0; i < runCfg.Instances; ++i) { testIds[i] = MakeBlobStorageProxyID(1 + i); TActorSetupCmd testSetup(tests[i], TMailboxType::Revolving, 0); setup1->LocalServices.push_back(std::pair<TActorId, TActorSetupCmd>(testIds[i], testSetup)); @@ -119,15 +119,15 @@ void Run(TVector<IActor*> tests, TTestRunConfig runCfg) { ); TString explanation; - if (!IsLowVerbose) { + if (!IsLowVerbose) { // We use Null log backend and test log message generation in non-verbose tests logSettings->SetLevel(NLog::PRI_TRACE, NKikimrServices::BS_PDISK, explanation); } else { - logSettings->SetLevel(NLog::PRI_NOTICE, NKikimrServices::BS_PDISK, explanation); + logSettings->SetLevel(NLog::PRI_NOTICE, NKikimrServices::BS_PDISK, explanation); } NActors::TLoggerActor *loggerActor = new NActors::TLoggerActor(logSettings, - IsLowVerbose ? NActors::CreateStderrBackend() : NActors::CreateNullBackend(), + IsLowVerbose ? NActors::CreateStderrBackend() : NActors::CreateNullBackend(), GetServiceCounters(counters, "utils")); NActors::TActorSetupCmd loggerActorCmd(loggerActor, NActors::TMailboxType::Simple, 2); std::pair<NActors::TActorId, NActors::TActorSetupCmd> loggerActorPair(loggerActorId, loggerActorCmd); @@ -149,10 +149,10 @@ void Run(TVector<IActor*> tests, TTestRunConfig runCfg) { } actorSystem1->Start(); - Sleep(TDuration::MilliSeconds(runCfg.BeforeTestSleepMs)); + Sleep(TDuration::MilliSeconds(runCfg.BeforeTestSleepMs)); VERBOSE_COUT("Sending TEvBoot to test"); - for (ui32 i = 0; i < runCfg.Instances; ++i) { + for (ui32 i = 0; i < runCfg.Instances; ++i) { actorSystem1->Send(testIds[i], new TEvTablet::TEvBoot( MakeTabletID(0, 0, 1), 0, nullptr, TActorId(), nullptr)); } @@ -160,7 +160,7 @@ void Run(TVector<IActor*> tests, TTestRunConfig runCfg) { TAtomicBase doneCount = 0; bool isOk = true; TInstant startTime = Now(); - while (doneCount < runCfg.Instances && isOk) { + while (doneCount < runCfg.Instances && isOk) { ui32 msRemaining = TEST_TIMEOUT - (ui32)(Now() - startTime).MilliSeconds(); isOk = doneEvent.Wait(msRemaining); doneCount = AtomicGet(doneCounter); @@ -178,7 +178,7 @@ void Run(TVector<IActor*> tests, TTestRunConfig runCfg) { + deviceGroup->GetCounter("DeviceBytesWritten")->Val() << "; IOs done# " << deviceGroup->GetCounter("DeviceReads")->Val() + deviceGroup->GetCounter("DeviceWrites")->Val(); - UNIT_ASSERT_VALUES_EQUAL_C(doneCount, runCfg.Instances, errorStr.Str()); + UNIT_ASSERT_VALUES_EQUAL_C(doneCount, runCfg.Instances, errorStr.Str()); } catch (yexception ex) { lastException = ex; AtomicSet(isLastExceptionSet, 1); @@ -192,7 +192,7 @@ void Run(TVector<IActor*> tests, TTestRunConfig runCfg) { doneEvent.Reset(); if (AtomicGet(isLastExceptionSet)) { AtomicSet(isLastExceptionSet, 0); - Cerr << lastException.what() << Endl; + Cerr << lastException.what() << Endl; ythrow lastException; } } diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_run.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_run.h index 66bedd1474..28418e3f58 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_run.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_run.h @@ -6,50 +6,50 @@ #include <ydb/core/base/compile_time_flags.h> #include <ydb/core/base/services/blobstorage_service_id.h> - + #include <util/folder/tempdir.h> namespace NKikimr { -struct TTestRunConfig { - TTestRunConfig(TTestContext *testContext) - : TestContext(testContext) - {} +struct TTestRunConfig { + TTestRunConfig(TTestContext *testContext) + : TestContext(testContext) + {} TTestContext *TestContext; - ui32 Instances = 1; - ui32 ChunkSize = 128 << 20; - bool IsBad = false; - bool IsErasureEncodeUserLog = false; - ui32 BeforeTestSleepMs = 100; - bool UseT1ha0Hasher = KIKIMR_PDISK_ENABLE_T1HA_HASH_WRITING; -}; - -void Run(TVector<IActor*> tests, TTestRunConfig runCfg); - + ui32 Instances = 1; + ui32 ChunkSize = 128 << 20; + bool IsBad = false; + bool IsErasureEncodeUserLog = false; + ui32 BeforeTestSleepMs = 100; + bool UseT1ha0Hasher = KIKIMR_PDISK_ENABLE_T1HA_HASH_WRITING; +}; + +void Run(TVector<IActor*> tests, TTestRunConfig runCfg); + template <class T> -void Run(TTestRunConfig runCfg) { +void Run(TTestRunConfig runCfg) { const TActorId pDiskId = MakeBlobStoragePDiskID(1, 1); TVector<IActor*> tests; - for (ui32 i = 0; i < runCfg.Instances; ++i) { + for (ui32 i = 0; i < runCfg.Instances; ++i) { TIntrusivePtr<TTestConfig> testConfig = new TTestConfig(TVDiskID(0, 1, 0, 0, i), pDiskId); tests.push_back(new T(testConfig.Get())); } - Run(tests, runCfg); -} + Run(tests, runCfg); +} -template <class T> +template <class T> static void Run(TTestContext *tc, ui32 instances = 1, ui32 chunkSize = 128 << 20, bool isBad = false, TString = TString(), ui32 beforeTestSleepMs = 100) { - - TTestRunConfig cfg(tc); - cfg.Instances = instances; - cfg.ChunkSize = chunkSize; - cfg.IsBad = isBad; + + TTestRunConfig cfg(tc); + cfg.Instances = instances; + cfg.ChunkSize = chunkSize; + cfg.IsBad = isBad; cfg.IsErasureEncodeUserLog = false; - cfg.BeforeTestSleepMs = beforeTestSleepMs; - - Run<T>(cfg); + cfg.BeforeTestSleepMs = beforeTestSleepMs; + + Run<T>(cfg); } } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_yard.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_yard.cpp index 9bbfb1c18f..cab783a62b 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_yard.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_yard.cpp @@ -1,958 +1,958 @@ -#include "blobstorage_pdisk_abstract.h" -#include "blobstorage_pdisk_impl.h" - -#include "blobstorage_pdisk_ut.h" -#include "blobstorage_pdisk_ut_actions.h" -#include "blobstorage_pdisk_ut_helpers.h" -#include "blobstorage_pdisk_ut_run.h" - +#include "blobstorage_pdisk_abstract.h" +#include "blobstorage_pdisk_impl.h" + +#include "blobstorage_pdisk_ut.h" +#include "blobstorage_pdisk_ut_actions.h" +#include "blobstorage_pdisk_ut_helpers.h" +#include "blobstorage_pdisk_ut_run.h" + #include <ydb/core/blobstorage/crypto/default.h> - + #include <ydb/core/testlib/actors/test_runtime.h> - -namespace NKikimr { - -Y_UNIT_TEST_SUITE(TYardTest) { - -/* -YARD_UNIT_TEST(TestLotsOfNonceJumps) { - TTestContext tc(false, true); - Run<TTestInit<true, 1>>(&tc, 1, MIN_CHUNK_SIZE); - // for (size_t i = 0; i < 3 * MIN_CHUNK_SIZE / 4096 / 5; ++i) { - for (size_t i = 0; i < 204; ++i) { - // Cerr << "i# " << i << Endl; - // Run<TTestInit<false>>(&tc, 1, MIN_CHUNK_SIZE, false, disk); - Run<TTestLogWrite<1, 2023>>(&tc, 1, MIN_CHUNK_SIZE); - } - Run<TTestInit<false, 302>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 303>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 304>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 305>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 306>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 307>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 308>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 309>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 310>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 311>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 312>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 313>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 314>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 315>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 316>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 317>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 318>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 319>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 320>>(&tc, 1, MIN_CHUNK_SIZE); -} -*/ - -YARD_UNIT_TEST(TestBadDeviceInit) { - TTestContext tc(false, true); - Run<TTestInitCorruptedError>(&tc, 1, MIN_CHUNK_SIZE, true); -} - -YARD_UNIT_TEST(TestInit) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestInit<true, 1>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 2>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInit<false, 3>>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestInitOnIncompleteFormat) { - TTestContext tc(false, true); - TTestRunConfig cfg(&tc); - cfg.ChunkSize = MIN_CHUNK_SIZE; - cfg.TestContext = &tc; - FillDeviceWithPattern(&tc, MIN_CHUNK_SIZE, NPDisk::MagicIncompleteFormat); - Run<TTestInit<true, 1>>(cfg); - Run<TTestInit<false, 2>>(cfg); - Run<TTestInit<false, 3>>(cfg); -} - -YARD_UNIT_TEST(TestInitOwner) { - TTestContext tc(false, true); - Run<TTestInitOwner>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestIncorrectRequests) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestIncorrectRequests>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestEmptyLogRead) { - TTestContext tc(false, true); - Run<TTestEmptyLogRead>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestWholeLogRead) { - TTestContext tc(false, true); - Run<TTestWholeLogRead>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestLogWriteRead) { - TTestContext tc(false, true); - Run<TTestLogWriteRead<17>>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestLogWriteReadMedium) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestLogWriteRead<6000>>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestLogWriteReadLarge) { - TTestContext tc(false, true); - Run<TTestLogWriteRead<9000>>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestLogWriteCutEqual) { - for (int i = 0; i < 10; ++i) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestLogWriteCut<true>>(&tc, 2, MIN_CHUNK_SIZE); - TTestLogWriteCut<true>::Reset(); - Run<TTestWholeLogRead>(&tc, 1, MIN_CHUNK_SIZE); - } -} - -YARD_UNIT_TEST(TestLogWriteCutEqualRandomWait) { - for (int i = 0; i < 10; ++i) { - TTestContext tc(false, true); - tc.SectorMap->ImitateRandomWait = {TDuration::MicroSeconds(500), TDuration::MicroSeconds(1000)}; - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestLogWriteCut<true>>(&tc, 2, MIN_CHUNK_SIZE); - TTestLogWriteCut<true>::Reset(); - Run<TTestWholeLogRead>(&tc, 1, MIN_CHUNK_SIZE); - } -} - -YARD_UNIT_TEST(TestSysLogReordering) { - for (int i = 0; i < 10; ++i) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestSysLogReordering>(&tc, 5, MIN_CHUNK_SIZE); - TTestSysLogReordering::VDiskNum = 0; - Run<TTestSysLogReorderingLogCheck>(&tc, 5, MIN_CHUNK_SIZE); - TTestSysLogReorderingLogCheck::VDiskNum = 0; - } -} - -YARD_UNIT_TEST(TestLogWriteReaDifferentHashers) { - for (ui32 i = 0; i < 4; ++i) { - TTestContext tc(false, true); - TTestRunConfig cfg(&tc); - - cfg.UseT1ha0Hasher = i / 2; - Run<TTestLogWriteRead<6000>>(cfg); - cfg.UseT1ha0Hasher = i % 2; - Run<TTestWholeLogRead>(cfg); - } -} - -YARD_UNIT_TEST(TestChunkWriteReadDifferentHashers) { - for (ui32 i = 0; i < 2; ++i) { - TTestContext tc(false, true); - TTestRunConfig cfg(&tc); - - cfg.UseT1ha0Hasher = i; - Run<TTestChunkWriteRead<1000000, 1500000>>(cfg); - } -} - -YARD_UNIT_TEST(TestLogWriteCutUnequal) { - if constexpr (KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestLogWriteCut<false>>(&tc, 2, MIN_CHUNK_SIZE); - TTestLogWriteCut<false>::Reset(); - Run<TTestWholeLogRead>(&tc, 2, MIN_CHUNK_SIZE); - - Run<TTestLogWriteCut<false>>(&tc, 2, MIN_CHUNK_SIZE); - TTestLogWriteCut<false>::Reset(); - } -} - -YARD_UNIT_TEST(TestChunkReadRandomOffset) { - { - TTestContext tc(false, true); - Run<TTestChunkReadRandomOffset<4096, 117, 10>>(&tc, 1, 8 << 20, false); - } - { - TTestContext tc(false, true); - constexpr ui32 sectorPayload = 4064; - NPDisk::TDiskFormat format; - format.Clear(); - UNIT_ASSERT(sectorPayload == format.SectorPayloadSize()); - constexpr ui32 sizeWithHalfOfBlockSize = sectorPayload * 512 - sectorPayload / 2; - Run<TTestChunkReadRandomOffset<sizeWithHalfOfBlockSize, 217, 20>>(&tc, 1, 8 << 20, false); - } - { - TTestContext tc(false, true); - Run<TTestChunkReadRandomOffset<1 << 20, 1525, 10>>(&tc, 1, 8 << 20, false); - } - { - TTestContext tc(false, true); - Run<TTestChunkReadRandomOffset<2079573, 1450, 10>>(&tc, 1, 8 << 20, false); - } -} - -YARD_UNIT_TEST(TestChunkWriteRead) { - TTestContext tc(false, true); - Run<TTestChunkWriteRead<30000, 2 << 20>>(&tc, 1, 5 << 20); -} - -YARD_UNIT_TEST(TestChunkWriteReadMultiple) { - { - TTestContext tc(false, true); - Run<TTestChunkWriteRead<6000000, 6500000>>(&tc, 1, 16 << 20, false); - } - { - TTestContext tc(false, true); - Run<TTestChunkWriteRead<3000000, 3500000>>(&tc, 1, 8 << 20, false); - } - { - TTestContext tc(false, true); - Run<TTestChunkWriteRead<2 << 20, 2 << 20>>(&tc, 1, 8 << 20, false); - } - { - TTestContext tc(false, true); - Run<TTestChunkWriteRead<1000000, 1500000>>(&tc, 1, 4 << 20, false); - } -} - -YARD_UNIT_TEST(TestChunkWriteReadWhole) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestChunkWriteReadWhole>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestChunkWrite20Read02) { - TTestContext tc(false, true); - // 2 << 20 is the read/write burst size, that's why. - Run<TTestChunkWrite20Read02>(&tc, 1, 2 << 20); -} - -YARD_UNIT_TEST(TestStartingPoints) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestLogStartingPoint>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestInitStartingPoints>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestLogMultipleWriteRead) { - TTestContext tc(false, true); - Run<TTestLogMultipleWriteRead<4000, 4100, 5000>>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestLogContinuityPersistence) { - TTestContext tc(false, true); - Run<TTestLogWrite<9000, 123>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestLogWrite<1000, 124>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestLogWrite<7000, 125>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestLog3Read<9000, 1000, 7000>>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestLogContinuityPersistenceLarge) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - for (int i = 0; i < 4; ++i) { - Run<TTestLogWrite<20000, 123>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestLogWrite<20000, 124>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestLogWrite<20000, 125>>(&tc, 1, MIN_CHUNK_SIZE); - } - Run<TTestLog3Read<20000, 20000, 20000>>(&tc, 1, MIN_CHUNK_SIZE); - -} - -YARD_UNIT_TEST(TestLogWriteLsnConsistency) { - TTestContext tc(false, true); - Run<TTestLogWriteLsnConsistency<150>>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestChunkContinuity2) { - TTestContext tc(false, true); - Run<TTestChunk3WriteRead<2>>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestChunkContinuity3000) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestChunk3WriteRead<3000>>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestChunkContinuity9000) { - TTestContext tc(false, true); - Run<TTestChunk3WriteRead<9000>>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestChunkReserve) { - TTestContext tc(false, true); - Run<TTestChunkReserve>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestCheckSpace) { - TTestContext tc(false, true); - Run<TTestCheckSpace>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestChunksLockByRange) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestChunksLockByRange>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestChunksLockUnlockReserve) { - TTestContext tc(false, true); - Run<TTestChunksLockUnlockReserve>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestHttpInfo) { - TTestContext tc(false, true); - Run<TTestHttpInfo>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestHttpInfoFileDoesntExist) { - TTestContext tc(false, true); - Run<TTestHttpInfoFileDoesntExist>(&tc, 1, MIN_CHUNK_SIZE, true); -} - -YARD_UNIT_TEST(TestBootingState) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + +namespace NKikimr { + +Y_UNIT_TEST_SUITE(TYardTest) { + +/* +YARD_UNIT_TEST(TestLotsOfNonceJumps) { + TTestContext tc(false, true); + Run<TTestInit<true, 1>>(&tc, 1, MIN_CHUNK_SIZE); + // for (size_t i = 0; i < 3 * MIN_CHUNK_SIZE / 4096 / 5; ++i) { + for (size_t i = 0; i < 204; ++i) { + // Cerr << "i# " << i << Endl; + // Run<TTestInit<false>>(&tc, 1, MIN_CHUNK_SIZE, false, disk); + Run<TTestLogWrite<1, 2023>>(&tc, 1, MIN_CHUNK_SIZE); + } + Run<TTestInit<false, 302>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 303>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 304>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 305>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 306>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 307>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 308>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 309>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 310>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 311>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 312>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 313>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 314>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 315>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 316>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 317>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 318>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 319>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 320>>(&tc, 1, MIN_CHUNK_SIZE); +} +*/ + +YARD_UNIT_TEST(TestBadDeviceInit) { + TTestContext tc(false, true); + Run<TTestInitCorruptedError>(&tc, 1, MIN_CHUNK_SIZE, true); +} + +YARD_UNIT_TEST(TestInit) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestInit<true, 1>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 2>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInit<false, 3>>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestInitOnIncompleteFormat) { + TTestContext tc(false, true); + TTestRunConfig cfg(&tc); + cfg.ChunkSize = MIN_CHUNK_SIZE; + cfg.TestContext = &tc; + FillDeviceWithPattern(&tc, MIN_CHUNK_SIZE, NPDisk::MagicIncompleteFormat); + Run<TTestInit<true, 1>>(cfg); + Run<TTestInit<false, 2>>(cfg); + Run<TTestInit<false, 3>>(cfg); +} + +YARD_UNIT_TEST(TestInitOwner) { + TTestContext tc(false, true); + Run<TTestInitOwner>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestIncorrectRequests) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestIncorrectRequests>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestEmptyLogRead) { + TTestContext tc(false, true); + Run<TTestEmptyLogRead>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestWholeLogRead) { + TTestContext tc(false, true); + Run<TTestWholeLogRead>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestLogWriteRead) { + TTestContext tc(false, true); + Run<TTestLogWriteRead<17>>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestLogWriteReadMedium) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestLogWriteRead<6000>>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestLogWriteReadLarge) { + TTestContext tc(false, true); + Run<TTestLogWriteRead<9000>>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestLogWriteCutEqual) { + for (int i = 0; i < 10; ++i) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestLogWriteCut<true>>(&tc, 2, MIN_CHUNK_SIZE); + TTestLogWriteCut<true>::Reset(); + Run<TTestWholeLogRead>(&tc, 1, MIN_CHUNK_SIZE); + } +} + +YARD_UNIT_TEST(TestLogWriteCutEqualRandomWait) { + for (int i = 0; i < 10; ++i) { + TTestContext tc(false, true); + tc.SectorMap->ImitateRandomWait = {TDuration::MicroSeconds(500), TDuration::MicroSeconds(1000)}; + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestLogWriteCut<true>>(&tc, 2, MIN_CHUNK_SIZE); + TTestLogWriteCut<true>::Reset(); + Run<TTestWholeLogRead>(&tc, 1, MIN_CHUNK_SIZE); + } +} + +YARD_UNIT_TEST(TestSysLogReordering) { + for (int i = 0; i < 10; ++i) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestSysLogReordering>(&tc, 5, MIN_CHUNK_SIZE); + TTestSysLogReordering::VDiskNum = 0; + Run<TTestSysLogReorderingLogCheck>(&tc, 5, MIN_CHUNK_SIZE); + TTestSysLogReorderingLogCheck::VDiskNum = 0; + } +} + +YARD_UNIT_TEST(TestLogWriteReaDifferentHashers) { + for (ui32 i = 0; i < 4; ++i) { + TTestContext tc(false, true); + TTestRunConfig cfg(&tc); + + cfg.UseT1ha0Hasher = i / 2; + Run<TTestLogWriteRead<6000>>(cfg); + cfg.UseT1ha0Hasher = i % 2; + Run<TTestWholeLogRead>(cfg); + } +} + +YARD_UNIT_TEST(TestChunkWriteReadDifferentHashers) { + for (ui32 i = 0; i < 2; ++i) { + TTestContext tc(false, true); + TTestRunConfig cfg(&tc); + + cfg.UseT1ha0Hasher = i; + Run<TTestChunkWriteRead<1000000, 1500000>>(cfg); + } +} + +YARD_UNIT_TEST(TestLogWriteCutUnequal) { + if constexpr (KIKIMR_PDISK_ENABLE_CUT_LOG_FROM_THE_MIDDLE) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestLogWriteCut<false>>(&tc, 2, MIN_CHUNK_SIZE); + TTestLogWriteCut<false>::Reset(); + Run<TTestWholeLogRead>(&tc, 2, MIN_CHUNK_SIZE); + + Run<TTestLogWriteCut<false>>(&tc, 2, MIN_CHUNK_SIZE); + TTestLogWriteCut<false>::Reset(); + } +} + +YARD_UNIT_TEST(TestChunkReadRandomOffset) { + { + TTestContext tc(false, true); + Run<TTestChunkReadRandomOffset<4096, 117, 10>>(&tc, 1, 8 << 20, false); + } + { + TTestContext tc(false, true); + constexpr ui32 sectorPayload = 4064; + NPDisk::TDiskFormat format; + format.Clear(); + UNIT_ASSERT(sectorPayload == format.SectorPayloadSize()); + constexpr ui32 sizeWithHalfOfBlockSize = sectorPayload * 512 - sectorPayload / 2; + Run<TTestChunkReadRandomOffset<sizeWithHalfOfBlockSize, 217, 20>>(&tc, 1, 8 << 20, false); + } + { + TTestContext tc(false, true); + Run<TTestChunkReadRandomOffset<1 << 20, 1525, 10>>(&tc, 1, 8 << 20, false); + } + { + TTestContext tc(false, true); + Run<TTestChunkReadRandomOffset<2079573, 1450, 10>>(&tc, 1, 8 << 20, false); + } +} + +YARD_UNIT_TEST(TestChunkWriteRead) { + TTestContext tc(false, true); + Run<TTestChunkWriteRead<30000, 2 << 20>>(&tc, 1, 5 << 20); +} + +YARD_UNIT_TEST(TestChunkWriteReadMultiple) { + { + TTestContext tc(false, true); + Run<TTestChunkWriteRead<6000000, 6500000>>(&tc, 1, 16 << 20, false); + } + { + TTestContext tc(false, true); + Run<TTestChunkWriteRead<3000000, 3500000>>(&tc, 1, 8 << 20, false); + } + { + TTestContext tc(false, true); + Run<TTestChunkWriteRead<2 << 20, 2 << 20>>(&tc, 1, 8 << 20, false); + } + { + TTestContext tc(false, true); + Run<TTestChunkWriteRead<1000000, 1500000>>(&tc, 1, 4 << 20, false); + } +} + +YARD_UNIT_TEST(TestChunkWriteReadWhole) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestChunkWriteReadWhole>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestChunkWrite20Read02) { + TTestContext tc(false, true); + // 2 << 20 is the read/write burst size, that's why. + Run<TTestChunkWrite20Read02>(&tc, 1, 2 << 20); +} + +YARD_UNIT_TEST(TestStartingPoints) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestLogStartingPoint>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestInitStartingPoints>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestLogMultipleWriteRead) { + TTestContext tc(false, true); + Run<TTestLogMultipleWriteRead<4000, 4100, 5000>>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestLogContinuityPersistence) { + TTestContext tc(false, true); + Run<TTestLogWrite<9000, 123>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestLogWrite<1000, 124>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestLogWrite<7000, 125>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestLog3Read<9000, 1000, 7000>>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestLogContinuityPersistenceLarge) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + for (int i = 0; i < 4; ++i) { + Run<TTestLogWrite<20000, 123>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestLogWrite<20000, 124>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestLogWrite<20000, 125>>(&tc, 1, MIN_CHUNK_SIZE); + } + Run<TTestLog3Read<20000, 20000, 20000>>(&tc, 1, MIN_CHUNK_SIZE); + +} + +YARD_UNIT_TEST(TestLogWriteLsnConsistency) { + TTestContext tc(false, true); + Run<TTestLogWriteLsnConsistency<150>>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestChunkContinuity2) { + TTestContext tc(false, true); + Run<TTestChunk3WriteRead<2>>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestChunkContinuity3000) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestChunk3WriteRead<3000>>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestChunkContinuity9000) { + TTestContext tc(false, true); + Run<TTestChunk3WriteRead<9000>>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestChunkReserve) { + TTestContext tc(false, true); + Run<TTestChunkReserve>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestCheckSpace) { + TTestContext tc(false, true); + Run<TTestCheckSpace>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestChunksLockByRange) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestChunksLockByRange>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestChunksLockUnlockReserve) { + TTestContext tc(false, true); + Run<TTestChunksLockUnlockReserve>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestHttpInfo) { + TTestContext tc(false, true); + Run<TTestHttpInfo>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestHttpInfoFileDoesntExist) { + TTestContext tc(false, true); + Run<TTestHttpInfoFileDoesntExist>(&tc, 1, MIN_CHUNK_SIZE, true); +} + +YARD_UNIT_TEST(TestBootingState) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); Run<TTestBootingState>(&tc, 1, MIN_CHUNK_SIZE, false, TString(), 5); -} - -YARD_UNIT_TEST(TestWhiteboard) { - TTestContext tc(false, true); - Run<TTestWhiteboard>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(Test3AsyncLog) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestLog3Write<100, 101, 102>>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestLog3Read<100, 101, 102>>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestFirstRecordToKeep) { - TTestContext tc(false, true); - Run<TTestFirstRecordToKeepWriteAB>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestFirstRecordToKeepReadB>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestChunkRecommit) { - TTestContext tc(false, true); - Run<TTestChunkRecommit>(&tc); -} - -YARD_UNIT_TEST(TestChunkRestartRecommit) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestChunkRestartRecommit1>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestChunkRestartRecommit2>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestChunkDelete) { - TTestContext tc(false, true); - Run<TTestChunkDelete1>(&tc, 1, MIN_CHUNK_SIZE); - Run<TTestChunkDelete2>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(Test3HugeAsyncLog) { - TTestContext tc(false, true); - constexpr ui32 chunkSize = MIN_CHUNK_SIZE; - Run<TTestLog3Write<chunkSize / 2, chunkSize / 2, chunkSize * 2>>(&tc, 1, chunkSize); - Run<TTestLog3Read<chunkSize / 2, chunkSize / 2, chunkSize * 2>>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestLotsOfTinyAsyncLogLatency) { - TTestContext tc(false, true); - Run<TTestLotsOfTinyAsyncLogLatency>(&tc); -} - -YARD_UNIT_TEST(TestHugeChunkAndLotsOfTinyAsyncLogOrder) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, 128 << 20); - Run<TTestHugeChunkAndLotsOfTinyAsyncLogOrder>(&tc); -} - -YARD_UNIT_TEST(TestLogLatency) { - TTestContext tc(false, true); - Run<TTestLogLatency>(&tc); -} - -YARD_UNIT_TEST(TestMultiYardLogLatency) { - TTestContext tc(false, true); - Run<TTestLogLatency>(&tc, 4); -} - -YARD_UNIT_TEST(TestMultiYardFirstRecordToKeep) { - TTestContext tc(false, true); - Run<TTestFirstRecordToKeepWriteAB>(&tc, 4, MIN_CHUNK_SIZE); - Run<TTestFirstRecordToKeepReadB>(&tc, 4, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestMultiYardStartingPoints) { - TTestContext tc(false, true); - FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); - Run<TTestLogStartingPoint>(&tc, 4, MIN_CHUNK_SIZE); - Run<TTestInitStartingPoints>(&tc, 4, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestMultiYardLogMultipleWriteRead) { - TTestContext tc(false, true); - Run<TTestLogMultipleWriteRead<4000, 4100, 5000>>(&tc, 4); -} - -YARD_UNIT_TEST(TestSysLogOverwrite) { - TTestContext tc(false, true); - ui32 chunkSize = 128 << 10; - - TString dataPath; - if (tc.TempDir) { - TString databaseDirectory = MakeDatabasePath((*tc.TempDir)().c_str()); - dataPath = MakePDiskPath((*tc.TempDir)().c_str()); - MakeDirIfNotExist(databaseDirectory.c_str()); - } - EntropyPool().Read(&tc.PDiskGuid, sizeof(tc.PDiskGuid)); - FormatPDiskForTest(dataPath, tc.PDiskGuid, chunkSize, 2048ull << 20, false, tc.SectorMap); - +} + +YARD_UNIT_TEST(TestWhiteboard) { + TTestContext tc(false, true); + Run<TTestWhiteboard>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(Test3AsyncLog) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestLog3Write<100, 101, 102>>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestLog3Read<100, 101, 102>>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestFirstRecordToKeep) { + TTestContext tc(false, true); + Run<TTestFirstRecordToKeepWriteAB>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestFirstRecordToKeepReadB>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestChunkRecommit) { + TTestContext tc(false, true); + Run<TTestChunkRecommit>(&tc); +} + +YARD_UNIT_TEST(TestChunkRestartRecommit) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestChunkRestartRecommit1>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestChunkRestartRecommit2>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestChunkDelete) { + TTestContext tc(false, true); + Run<TTestChunkDelete1>(&tc, 1, MIN_CHUNK_SIZE); + Run<TTestChunkDelete2>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(Test3HugeAsyncLog) { + TTestContext tc(false, true); + constexpr ui32 chunkSize = MIN_CHUNK_SIZE; + Run<TTestLog3Write<chunkSize / 2, chunkSize / 2, chunkSize * 2>>(&tc, 1, chunkSize); + Run<TTestLog3Read<chunkSize / 2, chunkSize / 2, chunkSize * 2>>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestLotsOfTinyAsyncLogLatency) { + TTestContext tc(false, true); + Run<TTestLotsOfTinyAsyncLogLatency>(&tc); +} + +YARD_UNIT_TEST(TestHugeChunkAndLotsOfTinyAsyncLogOrder) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, 128 << 20); + Run<TTestHugeChunkAndLotsOfTinyAsyncLogOrder>(&tc); +} + +YARD_UNIT_TEST(TestLogLatency) { + TTestContext tc(false, true); + Run<TTestLogLatency>(&tc); +} + +YARD_UNIT_TEST(TestMultiYardLogLatency) { + TTestContext tc(false, true); + Run<TTestLogLatency>(&tc, 4); +} + +YARD_UNIT_TEST(TestMultiYardFirstRecordToKeep) { + TTestContext tc(false, true); + Run<TTestFirstRecordToKeepWriteAB>(&tc, 4, MIN_CHUNK_SIZE); + Run<TTestFirstRecordToKeepReadB>(&tc, 4, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestMultiYardStartingPoints) { + TTestContext tc(false, true); + FillDeviceWithZeroes(&tc, MIN_CHUNK_SIZE); + Run<TTestLogStartingPoint>(&tc, 4, MIN_CHUNK_SIZE); + Run<TTestInitStartingPoints>(&tc, 4, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestMultiYardLogMultipleWriteRead) { + TTestContext tc(false, true); + Run<TTestLogMultipleWriteRead<4000, 4100, 5000>>(&tc, 4); +} + +YARD_UNIT_TEST(TestSysLogOverwrite) { + TTestContext tc(false, true); + ui32 chunkSize = 128 << 10; + + TString dataPath; + if (tc.TempDir) { + TString databaseDirectory = MakeDatabasePath((*tc.TempDir)().c_str()); + dataPath = MakePDiskPath((*tc.TempDir)().c_str()); + MakeDirIfNotExist(databaseDirectory.c_str()); + } + EntropyPool().Read(&tc.PDiskGuid, sizeof(tc.PDiskGuid)); + FormatPDiskForTest(dataPath, tc.PDiskGuid, chunkSize, 2048ull << 20, false, tc.SectorMap); + Run<TTestInit<true, 1>>(&tc, 1, chunkSize, false); - - ui32 dataSize = chunkSize*3; - NPDisk::TAlignedData dataBefore(dataSize); - ReadPdiskFile(&tc, dataSize, dataBefore); - + + ui32 dataSize = chunkSize*3; + NPDisk::TAlignedData dataBefore(dataSize); + ReadPdiskFile(&tc, dataSize, dataBefore); + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false); - NPDisk::TAlignedData dataAfter1(dataSize); - ReadPdiskFile(&tc, dataSize, dataAfter1); - - OutputSectorMap(dataBefore, dataAfter1, dataSize); - - ui64 firstSector = RestoreLastSectors(&tc, dataBefore, dataAfter1, dataSize, 3); - ReadPdiskFile(&tc, dataSize, dataAfter1); - - OutputSectorMap(dataBefore, dataAfter1, dataSize); - + NPDisk::TAlignedData dataAfter1(dataSize); + ReadPdiskFile(&tc, dataSize, dataAfter1); + + OutputSectorMap(dataBefore, dataAfter1, dataSize); + + ui64 firstSector = RestoreLastSectors(&tc, dataBefore, dataAfter1, dataSize, 3); + ReadPdiskFile(&tc, dataSize, dataAfter1); + + OutputSectorMap(dataBefore, dataAfter1, dataSize); + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false); - NPDisk::TAlignedData dataAfter2(dataSize); - ReadPdiskFile(&tc, dataSize, dataAfter2); - OutputSectorMap(dataAfter1, dataAfter2, dataSize); - - WriteSectors(&tc, dataAfter1, firstSector - 3, 6 * 3); - ReadPdiskFile(&tc, dataSize, dataAfter2); - OutputSectorMap(dataBefore, dataAfter2, dataSize); - OutputSectorMap(dataAfter1, dataAfter2, dataSize); - + NPDisk::TAlignedData dataAfter2(dataSize); + ReadPdiskFile(&tc, dataSize, dataAfter2); + OutputSectorMap(dataAfter1, dataAfter2, dataSize); + + WriteSectors(&tc, dataAfter1, firstSector - 3, 6 * 3); + ReadPdiskFile(&tc, dataSize, dataAfter2); + OutputSectorMap(dataBefore, dataAfter2, dataSize); + OutputSectorMap(dataAfter1, dataAfter2, dataSize); + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false); -} - +} + /* -YARD_UNIT_TEST(TestDamagedLogContinuityPersistence) { - TTestContext tc(false, true); - ui32 chunkSize = 8 << 20; - Run<TTestInit<true, 1>>(&tc, 1, chunkSize, false, true); - - ui32 dataSize = 3 * chunkSize; - NPDisk::TAlignedData dataBefore(dataSize); - ReadPdiskFile(&tc, dataSize, dataBefore); - - Run<TTestLog2Records3Sectors>(&tc, 1, chunkSize, false, true); - - NPDisk::TAlignedData dataAfter(dataSize); - ReadPdiskFile(&tc, dataSize, dataAfter); - - DestroyLastSectors(&tc, dataBefore, dataAfter, dataSize, 2); - - Run<TTestLogDamageSector3Append1>(&tc, 1, chunkSize, false, true); - Run<TTestLogRead2Sectors>(&tc, 1, chunkSize, false, true); -} +YARD_UNIT_TEST(TestDamagedLogContinuityPersistence) { + TTestContext tc(false, true); + ui32 chunkSize = 8 << 20; + Run<TTestInit<true, 1>>(&tc, 1, chunkSize, false, true); + + ui32 dataSize = 3 * chunkSize; + NPDisk::TAlignedData dataBefore(dataSize); + ReadPdiskFile(&tc, dataSize, dataBefore); + + Run<TTestLog2Records3Sectors>(&tc, 1, chunkSize, false, true); + + NPDisk::TAlignedData dataAfter(dataSize); + ReadPdiskFile(&tc, dataSize, dataAfter); + + DestroyLastSectors(&tc, dataBefore, dataAfter, dataSize, 2); + + Run<TTestLogDamageSector3Append1>(&tc, 1, chunkSize, false, true); + Run<TTestLogRead2Sectors>(&tc, 1, chunkSize, false, true); +} */ - -YARD_UNIT_TEST(TestDamagedFirstRecordToKeep) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - Run<TTestLogFillChunkPlus1>(&tc, 1, chunkSize); - - // Read format info to get raw chunk size - TString dataPath; - if (tc.TempDir) { - TString databaseDirectory = MakeDatabasePath((*tc.TempDir)().c_str()); - dataPath = MakePDiskPath((*tc.TempDir)().c_str()); - MakeDirIfNotExist(databaseDirectory.c_str()); - } - TPDiskInfo info; + +YARD_UNIT_TEST(TestDamagedFirstRecordToKeep) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + Run<TTestLogFillChunkPlus1>(&tc, 1, chunkSize); + + // Read format info to get raw chunk size + TString dataPath; + if (tc.TempDir) { + TString databaseDirectory = MakeDatabasePath((*tc.TempDir)().c_str()); + dataPath = MakePDiskPath((*tc.TempDir)().c_str()); + MakeDirIfNotExist(databaseDirectory.c_str()); + } + TPDiskInfo info; bool isOk = ReadPDiskFormatInfo(dataPath, NPDisk::YdbDefaultPDiskSequence, info, false, tc.SectorMap); - UNIT_ASSERT_VALUES_EQUAL(isOk, true); - - ui32 dataSize = info.SystemChunkCount * info.RawChunkSizeBytes; - NPDisk::TAlignedData dataBefore(dataSize); - ReadPdiskFile(&tc, dataSize, dataBefore); - - Run<TTestLogKeep5Plus1>(&tc, 1, chunkSize); - - NPDisk::TAlignedData dataAfter(dataSize); - ReadPdiskFile(&tc, dataSize, dataAfter); - - DestroyLastSectors(&tc, dataBefore, dataAfter, dataSize, 3); - - Run<TTestLogReadRecords2To5>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestUpsAndDownsAtTheBoundary) { - ui32 chunkSize = 8 << 20; - { - TTestContext tc(false, true); - Run<TTestLogWrite<7009 << 10, 1>>(&tc, 1, chunkSize); - Run<TTestLogWrite<4000, 2>>(&tc, 1, chunkSize); - Run<TTestLogWrite<4000, 3>>(&tc, 1, chunkSize); - } - { - TTestContext tc(false, true); - Run<TTestLogWrite<7014 << 10, 4>>(&tc, 1, chunkSize); - Run<TTestLogWrite<4000, 5>>(&tc, 1, chunkSize); - Run<TTestLogWrite<4000, 6>>(&tc, 1, chunkSize); - } - { - TTestContext tc(false, true); - Run<TTestLogWrite<7019 << 10, 7>>(&tc, 1, chunkSize); - Run<TTestLogWrite<4000, 8>>(&tc, 1, chunkSize); - Run<TTestLogWrite<4000, 9>>(&tc, 1, chunkSize); - } - { - TTestContext tc(false, true); - Run<TTestLogWrite<7024 << 10, 10>>(&tc, 1, chunkSize); - Run<TTestLogWrite<4000, 11>>(&tc, 1, chunkSize); - Run<TTestLogWrite<4000, 12>>(&tc, 1, chunkSize); - } -} - -YARD_UNIT_TEST(TestDamageAtTheBoundary) { - ui32 chunkSize = 8 << 20; - ui32 dataSize = 3 * chunkSize; - NPDisk::TAlignedData dataBefore(dataSize); - NPDisk::TAlignedData dataAfter(dataSize); - { - TTestContext tc(false, true); - Run<TTestLogWrite<4000, 1>>(&tc, 1, chunkSize); - ReadPdiskFile(&tc, dataSize, dataBefore); - Run<TTestLogWrite<7009 << 10, 2>>(&tc, 1, chunkSize); - ReadPdiskFile(&tc, dataSize, dataAfter); - DestroyLastSectors(&tc, dataBefore, dataAfter, dataSize, 3); - Run<TTestLogWrite<4000, 3>>(&tc, 1, chunkSize); - Run<TTestLogWrite<4000, 4>>(&tc, 1, chunkSize); - } - { - TTestContext tc(false, true); - Run<TTestLogWrite<4000, 5>>(&tc, 1, chunkSize); - ReadPdiskFile(&tc, dataSize, dataBefore); - Run<TTestLogWrite<7014 << 10, 6>>(&tc, 1, chunkSize); - ReadPdiskFile(&tc, dataSize, dataAfter); - DestroyLastSectors(&tc, dataBefore, dataAfter, dataSize, 3); - Run<TTestLogWrite<4000, 7>>(&tc, 1, chunkSize); - Run<TTestLogWrite<4000, 8>>(&tc, 1, chunkSize); - } - { - TTestContext tc(false, true); - Run<TTestLogWrite<4000, 9>>(&tc, 1, chunkSize); - ReadPdiskFile(&tc, dataSize, dataBefore); - Run<TTestLogWrite<7019 << 10, 10>>(&tc, 1, chunkSize); - ReadPdiskFile(&tc, dataSize, dataAfter); - DestroyLastSectors(&tc, dataBefore, dataAfter, dataSize, 3); - Run<TTestLogWrite<4000, 11>>(&tc, 1, chunkSize); - Run<TTestLogWrite<4000, 12>>(&tc, 1, chunkSize); - } - { - TTestContext tc(false, true); - Run<TTestLogWrite<4000, 13>>(&tc, 1, chunkSize); - ReadPdiskFile(&tc, dataSize, dataBefore); - Run<TTestLogWrite<7024 << 10, 14>>(&tc, 1, chunkSize); - ReadPdiskFile(&tc, dataSize, dataAfter); - DestroyLastSectors(&tc, dataBefore, dataAfter, dataSize, 3); - Run<TTestLogWrite<4000, 15>>(&tc, 1, chunkSize); - Run<TTestLogWrite<4000, 16>>(&tc, 1, chunkSize); - } -} - -YARD_UNIT_TEST(TestUnflushedChunk) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - Run<TTestWriteAndReleaseChunk2A>(&tc, 1, chunkSize); - - ui32 dataSize = 6 * chunkSize; - NPDisk::TAlignedData dataBefore(dataSize); - ReadPdiskFile(&tc, dataSize, dataBefore); - - Run<TTestWriteAndCheckChunk2B>(&tc, 1, chunkSize); - - NPDisk::TAlignedData dataAfter(dataSize); - ReadPdiskFile(&tc, dataSize, dataAfter); - - RestoreLastSectors(&tc, dataBefore, dataAfter, dataSize, 100); - - Run<TTestCheckErrorChunk2B>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestLogOverwriteRestarts) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - Run<TTestWriteAndCutLogChunk>(&tc, 1, chunkSize); - for (ui32 i = 0; i < 15; ++i) { - Run<TTestLogMoreSectors<1000>>(&tc, 1, chunkSize); - } - for (ui32 i = 0; i < 15; ++i) { - Run<TTestLogMoreSectors<5000>>(&tc, 1, chunkSize); - } - for (ui32 i = 0; i < 15; ++i) { - Run<TTestLogMoreSectors<9000>>(&tc, 1, chunkSize); - } - for (ui32 i = 0; i < 15; ++i) { - Run<TTestLogMoreSectors<13000>>(&tc, 1, chunkSize); - } - for (ui32 i = 0; i < 15; ++i) { - Run<TTestLogMoreSectors<17000>>(&tc, 1, chunkSize); - } -} - -YARD_UNIT_TEST(TestChunkFlushReboot) { - TTestContext tc(false, true); - Run<TTestChunkFlush>(&tc); - Run<TTestChunkUnavailable>(&tc); -} - -YARD_UNIT_TEST(TestRedZoneSurvivability) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - Ctest << "TestRedZoneSurvivability chunkSize# " << chunkSize << Endl; - Run<TTestRedZoneSurvivability>(&tc, 1, chunkSize); -} - + UNIT_ASSERT_VALUES_EQUAL(isOk, true); + + ui32 dataSize = info.SystemChunkCount * info.RawChunkSizeBytes; + NPDisk::TAlignedData dataBefore(dataSize); + ReadPdiskFile(&tc, dataSize, dataBefore); + + Run<TTestLogKeep5Plus1>(&tc, 1, chunkSize); + + NPDisk::TAlignedData dataAfter(dataSize); + ReadPdiskFile(&tc, dataSize, dataAfter); + + DestroyLastSectors(&tc, dataBefore, dataAfter, dataSize, 3); + + Run<TTestLogReadRecords2To5>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestUpsAndDownsAtTheBoundary) { + ui32 chunkSize = 8 << 20; + { + TTestContext tc(false, true); + Run<TTestLogWrite<7009 << 10, 1>>(&tc, 1, chunkSize); + Run<TTestLogWrite<4000, 2>>(&tc, 1, chunkSize); + Run<TTestLogWrite<4000, 3>>(&tc, 1, chunkSize); + } + { + TTestContext tc(false, true); + Run<TTestLogWrite<7014 << 10, 4>>(&tc, 1, chunkSize); + Run<TTestLogWrite<4000, 5>>(&tc, 1, chunkSize); + Run<TTestLogWrite<4000, 6>>(&tc, 1, chunkSize); + } + { + TTestContext tc(false, true); + Run<TTestLogWrite<7019 << 10, 7>>(&tc, 1, chunkSize); + Run<TTestLogWrite<4000, 8>>(&tc, 1, chunkSize); + Run<TTestLogWrite<4000, 9>>(&tc, 1, chunkSize); + } + { + TTestContext tc(false, true); + Run<TTestLogWrite<7024 << 10, 10>>(&tc, 1, chunkSize); + Run<TTestLogWrite<4000, 11>>(&tc, 1, chunkSize); + Run<TTestLogWrite<4000, 12>>(&tc, 1, chunkSize); + } +} + +YARD_UNIT_TEST(TestDamageAtTheBoundary) { + ui32 chunkSize = 8 << 20; + ui32 dataSize = 3 * chunkSize; + NPDisk::TAlignedData dataBefore(dataSize); + NPDisk::TAlignedData dataAfter(dataSize); + { + TTestContext tc(false, true); + Run<TTestLogWrite<4000, 1>>(&tc, 1, chunkSize); + ReadPdiskFile(&tc, dataSize, dataBefore); + Run<TTestLogWrite<7009 << 10, 2>>(&tc, 1, chunkSize); + ReadPdiskFile(&tc, dataSize, dataAfter); + DestroyLastSectors(&tc, dataBefore, dataAfter, dataSize, 3); + Run<TTestLogWrite<4000, 3>>(&tc, 1, chunkSize); + Run<TTestLogWrite<4000, 4>>(&tc, 1, chunkSize); + } + { + TTestContext tc(false, true); + Run<TTestLogWrite<4000, 5>>(&tc, 1, chunkSize); + ReadPdiskFile(&tc, dataSize, dataBefore); + Run<TTestLogWrite<7014 << 10, 6>>(&tc, 1, chunkSize); + ReadPdiskFile(&tc, dataSize, dataAfter); + DestroyLastSectors(&tc, dataBefore, dataAfter, dataSize, 3); + Run<TTestLogWrite<4000, 7>>(&tc, 1, chunkSize); + Run<TTestLogWrite<4000, 8>>(&tc, 1, chunkSize); + } + { + TTestContext tc(false, true); + Run<TTestLogWrite<4000, 9>>(&tc, 1, chunkSize); + ReadPdiskFile(&tc, dataSize, dataBefore); + Run<TTestLogWrite<7019 << 10, 10>>(&tc, 1, chunkSize); + ReadPdiskFile(&tc, dataSize, dataAfter); + DestroyLastSectors(&tc, dataBefore, dataAfter, dataSize, 3); + Run<TTestLogWrite<4000, 11>>(&tc, 1, chunkSize); + Run<TTestLogWrite<4000, 12>>(&tc, 1, chunkSize); + } + { + TTestContext tc(false, true); + Run<TTestLogWrite<4000, 13>>(&tc, 1, chunkSize); + ReadPdiskFile(&tc, dataSize, dataBefore); + Run<TTestLogWrite<7024 << 10, 14>>(&tc, 1, chunkSize); + ReadPdiskFile(&tc, dataSize, dataAfter); + DestroyLastSectors(&tc, dataBefore, dataAfter, dataSize, 3); + Run<TTestLogWrite<4000, 15>>(&tc, 1, chunkSize); + Run<TTestLogWrite<4000, 16>>(&tc, 1, chunkSize); + } +} + +YARD_UNIT_TEST(TestUnflushedChunk) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + Run<TTestWriteAndReleaseChunk2A>(&tc, 1, chunkSize); + + ui32 dataSize = 6 * chunkSize; + NPDisk::TAlignedData dataBefore(dataSize); + ReadPdiskFile(&tc, dataSize, dataBefore); + + Run<TTestWriteAndCheckChunk2B>(&tc, 1, chunkSize); + + NPDisk::TAlignedData dataAfter(dataSize); + ReadPdiskFile(&tc, dataSize, dataAfter); + + RestoreLastSectors(&tc, dataBefore, dataAfter, dataSize, 100); + + Run<TTestCheckErrorChunk2B>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestLogOverwriteRestarts) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + Run<TTestWriteAndCutLogChunk>(&tc, 1, chunkSize); + for (ui32 i = 0; i < 15; ++i) { + Run<TTestLogMoreSectors<1000>>(&tc, 1, chunkSize); + } + for (ui32 i = 0; i < 15; ++i) { + Run<TTestLogMoreSectors<5000>>(&tc, 1, chunkSize); + } + for (ui32 i = 0; i < 15; ++i) { + Run<TTestLogMoreSectors<9000>>(&tc, 1, chunkSize); + } + for (ui32 i = 0; i < 15; ++i) { + Run<TTestLogMoreSectors<13000>>(&tc, 1, chunkSize); + } + for (ui32 i = 0; i < 15; ++i) { + Run<TTestLogMoreSectors<17000>>(&tc, 1, chunkSize); + } +} + +YARD_UNIT_TEST(TestChunkFlushReboot) { + TTestContext tc(false, true); + Run<TTestChunkFlush>(&tc); + Run<TTestChunkUnavailable>(&tc); +} + +YARD_UNIT_TEST(TestRedZoneSurvivability) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + Ctest << "TestRedZoneSurvivability chunkSize# " << chunkSize << Endl; + Run<TTestRedZoneSurvivability>(&tc, 1, chunkSize); +} + /* -YARD_UNIT_TEST(TestNonceJumpRewriteMin) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - ui32 dataSize = 6 * chunkSize; - NPDisk::TAlignedData data0(dataSize); - NPDisk::TAlignedData data1(dataSize); - NPDisk::TAlignedData data2(dataSize); - - Run<TTestInit<true, 1>>(&tc, 1, chunkSize, false, true); - Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); - Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); - - ReadPdiskFile(&tc, dataSize, data0); - - Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); - - ReadPdiskFile(&tc, dataSize, data1); - i64 lastDifferenceA = FindLastDifferingBytes(data0, data1, dataSize); - - Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); - - ReadPdiskFile(&tc, dataSize, data2); - i64 lastDifferenceB = FindLastDifferingBytes(data1, data2, dataSize); - - i64 distance = lastDifferenceB - lastDifferenceA; - - if (Abs(distance) >= 8) { - Sleep(TDuration::Seconds(5)); - } - - ASSERT_YTHROW(Abs(distance) <= 8, - "Log length changed while it wasnt expected to. lastDifferenceA# " << lastDifferenceA - << " lastDifferenceB# " << lastDifferenceB); +YARD_UNIT_TEST(TestNonceJumpRewriteMin) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + ui32 dataSize = 6 * chunkSize; + NPDisk::TAlignedData data0(dataSize); + NPDisk::TAlignedData data1(dataSize); + NPDisk::TAlignedData data2(dataSize); + + Run<TTestInit<true, 1>>(&tc, 1, chunkSize, false, true); + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); + + ReadPdiskFile(&tc, dataSize, data0); + + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); + + ReadPdiskFile(&tc, dataSize, data1); + i64 lastDifferenceA = FindLastDifferingBytes(data0, data1, dataSize); + + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); + + ReadPdiskFile(&tc, dataSize, data2); + i64 lastDifferenceB = FindLastDifferingBytes(data1, data2, dataSize); + + i64 distance = lastDifferenceB - lastDifferenceA; + + if (Abs(distance) >= 8) { + Sleep(TDuration::Seconds(5)); + } + + ASSERT_YTHROW(Abs(distance) <= 8, + "Log length changed while it wasnt expected to. lastDifferenceA# " << lastDifferenceA + << " lastDifferenceB# " << lastDifferenceB); }*/ - + /* -YARD_UNIT_TEST(TestNonceJumpRewrite) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - ui32 dataSize = 6 * chunkSize; - NPDisk::TAlignedData data0(dataSize); - NPDisk::TAlignedData data1(dataSize); - - Run<TTestInit<true, 1>>(&tc, 1, chunkSize, false, true); - Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); - Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); - Run<TTestLogWrite<2000, 123>>(&tc, 1, chunkSize, false, true); - Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); - Run<TTestLogWrite<2000, 124>>(&tc, 1, chunkSize, false, true); - Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); - Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); - Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); - - ReadPdiskFile(&tc, dataSize, data0); - - Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); - - ReadPdiskFile(&tc, dataSize, data1); - i64 lastDifferenceA = FindLastDifferingBytes(data0, data1, dataSize); - - Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); - - ReadPdiskFile(&tc, dataSize, data0); - i64 lastDifferenceB = FindLastDifferingBytes(data0, data1, dataSize); - - i64 distance = lastDifferenceB - lastDifferenceA; - - ASSERT_YTHROW(Abs(distance) <= 8, - "Log length changed while it wasnt expected to. lastDifferenceA# " << lastDifferenceA - << " lastDifferenceB# " << lastDifferenceB); - Run<TTestLogWrite<2000, 125>>(&tc, 1, chunkSize, false, true); - Run<TTestLog3Read<2000, 2000, 2000>>(&tc, 1, chunkSize, false, true); -} +YARD_UNIT_TEST(TestNonceJumpRewrite) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + ui32 dataSize = 6 * chunkSize; + NPDisk::TAlignedData data0(dataSize); + NPDisk::TAlignedData data1(dataSize); + + Run<TTestInit<true, 1>>(&tc, 1, chunkSize, false, true); + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); + Run<TTestLogWrite<2000, 123>>(&tc, 1, chunkSize, false, true); + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); + Run<TTestLogWrite<2000, 124>>(&tc, 1, chunkSize, false, true); + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); + + ReadPdiskFile(&tc, dataSize, data0); + + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); + + ReadPdiskFile(&tc, dataSize, data1); + i64 lastDifferenceA = FindLastDifferingBytes(data0, data1, dataSize); + + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); + + ReadPdiskFile(&tc, dataSize, data0); + i64 lastDifferenceB = FindLastDifferingBytes(data0, data1, dataSize); + + i64 distance = lastDifferenceB - lastDifferenceA; + + ASSERT_YTHROW(Abs(distance) <= 8, + "Log length changed while it wasnt expected to. lastDifferenceA# " << lastDifferenceA + << " lastDifferenceB# " << lastDifferenceB); + Run<TTestLogWrite<2000, 125>>(&tc, 1, chunkSize, false, true); + Run<TTestLog3Read<2000, 2000, 2000>>(&tc, 1, chunkSize, false, true); +} */ - -YARD_UNIT_TEST(TestSlay) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - FillDeviceWithZeroes(&tc, chunkSize); - Run<TTestSlay>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestSlayRace) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - Run<TTestSlayRace>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestSlayRecreate) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - Run<TTestSlayRecreate>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestSlayLogWriteRaceActor) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - Run<TActorTestSlayLogWriteRace>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestMultiYardHarakiri) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - Ctest << Endl << "Fill with zeroes" << Endl; - FillDeviceWithZeroes(&tc, chunkSize * 2); - Ctest << Endl << "Phase 1" << Endl; - Run<TTestFillDiskPhase1>(&tc, 4, chunkSize); - Ctest << Endl << "Phase 2" << Endl; - Run<TTestFillDiskPhase2>(&tc, 4, chunkSize); - Ctest << Endl << "TestHarakiri" << Endl; - Run<TTestHarakiri>(&tc, 1, chunkSize); - Ctest << Endl << "TestLogWrite" << Endl; - Run<TTestLogWrite<1000, 1>>(&tc, 1, chunkSize); - Ctest << Endl << "TestSimpleHarakiri" << Endl; - Run<TTestSimpleHarakiri>(&tc, 4, chunkSize); - Ctest << Endl << "TestLogWrite 2" << Endl; - Run<TTestLogWrite<1000, 2>>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestDestroySystem) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - Run<TTestInit<true, 1>>(&tc, 1, chunkSize); - - ui32 dataSize = chunkSize; - NPDisk::TAlignedData dataAfter(dataSize); - ReadPdiskFile(&tc, dataSize, dataAfter); - - DestroySectors(&tc, dataAfter, dataSize, 0, 1); - Run<TTestInitCorruptedError>(&tc, 1, chunkSize); - - DestroySectors(&tc, dataAfter, dataSize, 24, 1); - Run<TTestInitCorruptedError>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestAllocateAllChunks) { - TTestContext tc(false, true); - Run<TTestAllocateAllChunks>(&tc, 1, MIN_CHUNK_SIZE); -} - -YARD_UNIT_TEST(TestChunkDeletionWhileWriting) { - TTestContext tc(false, true); - ui32 chunkSize = 16 << 20; - Run<TTestChunkDeletionWhileWritingIt>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestCutMultipleLogChunks) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - FillDeviceWithZeroes(&tc, chunkSize); - Run<TTestCutMultipleLogChunks1>(&tc, 1, chunkSize); - Run<TTestCutMultipleLogChunks2>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestLogOwerwrite) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - Run<TTestLogOwerwrite1>(&tc, 1, chunkSize); - Run<TTestLogOwerwrite2>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestDestructionWhileWritingChunk) { - TTestContext tc(false, true); - ui32 chunkSize = 8 << 20; - FillDeviceWithZeroes(&tc, chunkSize); - Run<TTestDestructionWhileWritingChunk>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestDestructionWhileReadingChunk) { - TTestContext tc(false, true); - ui32 chunkSize = 8 << 20; - Run<TTestDestructionWhileReadingChunk>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestDestructionWhileReadingLog) { - TTestContext tc(false, true); - ui32 chunkSize = 8 << 20; - Run<TTestDestructionWhileReadingLog>(&tc, 1, chunkSize); -} - -YARD_UNIT_TEST(TestChunkPriorityBlock) { - TTestContext tc(false, true); - Run<TTestChunkPriorityBlock>(&tc); -} - -YARD_UNIT_TEST(TestFormatInfo) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - TString dataPath; - if (tc.TempDir) { - TString databaseDirectory = MakeDatabasePath((*tc.TempDir)().c_str()); - dataPath = MakePDiskPath((*tc.TempDir)().c_str()); - if (!NFs::Exists(databaseDirectory.c_str())) { - MakeDirIfNotExist(databaseDirectory.c_str()); - } - } - EntropyPool().Read(&tc.PDiskGuid, sizeof(tc.PDiskGuid)); - FormatPDiskForTest(dataPath, tc.PDiskGuid, chunkSize, 1 << 30, false, tc.SectorMap); - - TPDiskInfo info; + +YARD_UNIT_TEST(TestSlay) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + FillDeviceWithZeroes(&tc, chunkSize); + Run<TTestSlay>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestSlayRace) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + Run<TTestSlayRace>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestSlayRecreate) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + Run<TTestSlayRecreate>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestSlayLogWriteRaceActor) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + Run<TActorTestSlayLogWriteRace>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestMultiYardHarakiri) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + Ctest << Endl << "Fill with zeroes" << Endl; + FillDeviceWithZeroes(&tc, chunkSize * 2); + Ctest << Endl << "Phase 1" << Endl; + Run<TTestFillDiskPhase1>(&tc, 4, chunkSize); + Ctest << Endl << "Phase 2" << Endl; + Run<TTestFillDiskPhase2>(&tc, 4, chunkSize); + Ctest << Endl << "TestHarakiri" << Endl; + Run<TTestHarakiri>(&tc, 1, chunkSize); + Ctest << Endl << "TestLogWrite" << Endl; + Run<TTestLogWrite<1000, 1>>(&tc, 1, chunkSize); + Ctest << Endl << "TestSimpleHarakiri" << Endl; + Run<TTestSimpleHarakiri>(&tc, 4, chunkSize); + Ctest << Endl << "TestLogWrite 2" << Endl; + Run<TTestLogWrite<1000, 2>>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestDestroySystem) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + Run<TTestInit<true, 1>>(&tc, 1, chunkSize); + + ui32 dataSize = chunkSize; + NPDisk::TAlignedData dataAfter(dataSize); + ReadPdiskFile(&tc, dataSize, dataAfter); + + DestroySectors(&tc, dataAfter, dataSize, 0, 1); + Run<TTestInitCorruptedError>(&tc, 1, chunkSize); + + DestroySectors(&tc, dataAfter, dataSize, 24, 1); + Run<TTestInitCorruptedError>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestAllocateAllChunks) { + TTestContext tc(false, true); + Run<TTestAllocateAllChunks>(&tc, 1, MIN_CHUNK_SIZE); +} + +YARD_UNIT_TEST(TestChunkDeletionWhileWriting) { + TTestContext tc(false, true); + ui32 chunkSize = 16 << 20; + Run<TTestChunkDeletionWhileWritingIt>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestCutMultipleLogChunks) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + FillDeviceWithZeroes(&tc, chunkSize); + Run<TTestCutMultipleLogChunks1>(&tc, 1, chunkSize); + Run<TTestCutMultipleLogChunks2>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestLogOwerwrite) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + Run<TTestLogOwerwrite1>(&tc, 1, chunkSize); + Run<TTestLogOwerwrite2>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestDestructionWhileWritingChunk) { + TTestContext tc(false, true); + ui32 chunkSize = 8 << 20; + FillDeviceWithZeroes(&tc, chunkSize); + Run<TTestDestructionWhileWritingChunk>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestDestructionWhileReadingChunk) { + TTestContext tc(false, true); + ui32 chunkSize = 8 << 20; + Run<TTestDestructionWhileReadingChunk>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestDestructionWhileReadingLog) { + TTestContext tc(false, true); + ui32 chunkSize = 8 << 20; + Run<TTestDestructionWhileReadingLog>(&tc, 1, chunkSize); +} + +YARD_UNIT_TEST(TestChunkPriorityBlock) { + TTestContext tc(false, true); + Run<TTestChunkPriorityBlock>(&tc); +} + +YARD_UNIT_TEST(TestFormatInfo) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + TString dataPath; + if (tc.TempDir) { + TString databaseDirectory = MakeDatabasePath((*tc.TempDir)().c_str()); + dataPath = MakePDiskPath((*tc.TempDir)().c_str()); + if (!NFs::Exists(databaseDirectory.c_str())) { + MakeDirIfNotExist(databaseDirectory.c_str()); + } + } + EntropyPool().Read(&tc.PDiskGuid, sizeof(tc.PDiskGuid)); + FormatPDiskForTest(dataPath, tc.PDiskGuid, chunkSize, 1 << 30, false, tc.SectorMap); + + TPDiskInfo info; bool isOk = ReadPDiskFormatInfo(dataPath, NPDisk::YdbDefaultPDiskSequence, info, false, tc.SectorMap); - UNIT_ASSERT_VALUES_EQUAL(isOk, true); - UNIT_ASSERT_VALUES_EQUAL(info.TextMessage, "Info"); -} - -YARD_UNIT_TEST(TestStartingPointReboots) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - TString dataPath; - if (tc.TempDir) { - TString databaseDirectory = MakeDatabasePath((*tc.TempDir)().c_str()); - dataPath = MakePDiskPath((*tc.TempDir)().c_str()); - MakeDirIfNotExist(databaseDirectory.c_str()); - } - EntropyPool().Read(&tc.PDiskGuid, sizeof(tc.PDiskGuid)); - FormatPDiskForTest(dataPath, tc.PDiskGuid, chunkSize, 1 << 30, false, tc.SectorMap); - for (ui32 i = 0; i < 32; ++i) { - Run<TTestStartingPointRebootsIteration>(&tc, 1, chunkSize); - } -} - -YARD_UNIT_TEST(TestRestartAtNonceJump) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - // Write a full chunk of logs (assume it's chunk# SystemChunkCount) + UNIT_ASSERT_VALUES_EQUAL(isOk, true); + UNIT_ASSERT_VALUES_EQUAL(info.TextMessage, "Info"); +} + +YARD_UNIT_TEST(TestStartingPointReboots) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + TString dataPath; + if (tc.TempDir) { + TString databaseDirectory = MakeDatabasePath((*tc.TempDir)().c_str()); + dataPath = MakePDiskPath((*tc.TempDir)().c_str()); + MakeDirIfNotExist(databaseDirectory.c_str()); + } + EntropyPool().Read(&tc.PDiskGuid, sizeof(tc.PDiskGuid)); + FormatPDiskForTest(dataPath, tc.PDiskGuid, chunkSize, 1 << 30, false, tc.SectorMap); + for (ui32 i = 0; i < 32; ++i) { + Run<TTestStartingPointRebootsIteration>(&tc, 1, chunkSize); + } +} + +YARD_UNIT_TEST(TestRestartAtNonceJump) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + // Write a full chunk of logs (assume it's chunk# SystemChunkCount) Run<TTestContinueWriteLogChunk>(&tc, 1, chunkSize, false); - // Read format info to get raw chunk size - TString dataPath; - if (tc.TempDir) { - TString databaseDirectory = MakeDatabasePath((*tc.TempDir)().c_str()); - dataPath = MakePDiskPath((*tc.TempDir)().c_str()); - MakeDirIfNotExist(databaseDirectory.c_str()); - } - TPDiskInfo info; + // Read format info to get raw chunk size + TString dataPath; + if (tc.TempDir) { + TString databaseDirectory = MakeDatabasePath((*tc.TempDir)().c_str()); + dataPath = MakePDiskPath((*tc.TempDir)().c_str()); + MakeDirIfNotExist(databaseDirectory.c_str()); + } + TPDiskInfo info; bool isOk = ReadPDiskFormatInfo(dataPath, NPDisk::YdbDefaultPDiskSequence, info, false, tc.SectorMap); - UNIT_ASSERT_VALUES_EQUAL(isOk, true); - // Destroy data in chunks starting at# SystemChunkCount + 1 - ui32 dataSize = 8 * chunkSize; - NPDisk::TAlignedData dataAfter(dataSize); - ReadPdiskFile(&tc, dataSize, dataAfter); - - ui64 firstSector = (info.SystemChunkCount + 1) * info.RawChunkSizeBytes / info.SectorSizeBytes - - 3; // to get into the situation where we have filled the chunks but did not write the next chunk reference - DestroySectors(&tc, dataAfter, dataSize, firstSector, 1); - // Write another full chunk of logs (assume it's chunk# SystemChunkCount + 1) + UNIT_ASSERT_VALUES_EQUAL(isOk, true); + // Destroy data in chunks starting at# SystemChunkCount + 1 + ui32 dataSize = 8 * chunkSize; + NPDisk::TAlignedData dataAfter(dataSize); + ReadPdiskFile(&tc, dataSize, dataAfter); + + ui64 firstSector = (info.SystemChunkCount + 1) * info.RawChunkSizeBytes / info.SectorSizeBytes + - 3; // to get into the situation where we have filled the chunks but did not write the next chunk reference + DestroySectors(&tc, dataAfter, dataSize, firstSector, 1); + // Write another full chunk of logs (assume it's chunk# SystemChunkCount + 1) Run<TTestContinueWriteLogChunk>(&tc, 1, chunkSize, false); - // Check that last log Lsn is somewhere out of the first log chunk + // Check that last log Lsn is somewhere out of the first log chunk Run<TTestLastLsn>(&tc, 1, chunkSize, false); -} - -YARD_UNIT_TEST(TestRestartAtChunkEnd) { - TTestContext tc(false, true); - ui32 chunkSize = MIN_CHUNK_SIZE; - // Write a full chunk of logs (assume it's chunk# SystemChunkCount) +} + +YARD_UNIT_TEST(TestRestartAtChunkEnd) { + TTestContext tc(false, true); + ui32 chunkSize = MIN_CHUNK_SIZE; + // Write a full chunk of logs (assume it's chunk# SystemChunkCount) Run<TTestContinueWriteLogChunk>(&tc, 1, chunkSize, false); - // Read format info to get raw chunk size - TString dataPath; - if (tc.TempDir) { - TString databaseDirectory = MakeDatabasePath((*tc.TempDir)().c_str()); - dataPath = MakePDiskPath((*tc.TempDir)().c_str()); - MakeDirIfNotExist(databaseDirectory.c_str()); - } - TPDiskInfo info; + // Read format info to get raw chunk size + TString dataPath; + if (tc.TempDir) { + TString databaseDirectory = MakeDatabasePath((*tc.TempDir)().c_str()); + dataPath = MakePDiskPath((*tc.TempDir)().c_str()); + MakeDirIfNotExist(databaseDirectory.c_str()); + } + TPDiskInfo info; bool isOk = ReadPDiskFormatInfo(dataPath, NPDisk::YdbDefaultPDiskSequence, info, false, tc.SectorMap); - UNIT_ASSERT_VALUES_EQUAL(isOk, true); - // Destroy data in chunks starting at# SystemChunkCount + 1 - ui32 dataSize = 8 * chunkSize; - NPDisk::TAlignedData dataAfter(dataSize); - ReadPdiskFile(&tc, dataSize, dataAfter); - - ui64 firstSector = (info.SystemChunkCount + 1) * info.RawChunkSizeBytes / info.SectorSizeBytes; - DestroySectors(&tc, dataAfter, dataSize, firstSector, 1); - // Write another full chunk of logs (assume it's chunk# SystemChunkCount + 1) + UNIT_ASSERT_VALUES_EQUAL(isOk, true); + // Destroy data in chunks starting at# SystemChunkCount + 1 + ui32 dataSize = 8 * chunkSize; + NPDisk::TAlignedData dataAfter(dataSize); + ReadPdiskFile(&tc, dataSize, dataAfter); + + ui64 firstSector = (info.SystemChunkCount + 1) * info.RawChunkSizeBytes / info.SectorSizeBytes; + DestroySectors(&tc, dataAfter, dataSize, firstSector, 1); + // Write another full chunk of logs (assume it's chunk# SystemChunkCount + 1) Run<TTestContinueWriteLogChunk>(&tc, 1, chunkSize, false); - // Check that last log Lsn is somewhere out of the first log chunk + // Check that last log Lsn is somewhere out of the first log chunk Run<TTestLastLsn>(&tc, 1, chunkSize, false); -} - -YARD_UNIT_TEST(TestEnormousDisk) { - TTestContext tc(false, true); - ui32 chunkSize = 512 << 20; - ui64 diskSize = 100ull << 40; - - TString dataPath; - EntropyPool().Read(&tc.PDiskGuid, sizeof(tc.PDiskGuid)); - FormatPDiskForTest(dataPath, tc.PDiskGuid, chunkSize, diskSize, false, tc.SectorMap); - +} + +YARD_UNIT_TEST(TestEnormousDisk) { + TTestContext tc(false, true); + ui32 chunkSize = 512 << 20; + ui64 diskSize = 100ull << 40; + + TString dataPath; + EntropyPool().Read(&tc.PDiskGuid, sizeof(tc.PDiskGuid)); + FormatPDiskForTest(dataPath, tc.PDiskGuid, chunkSize, diskSize, false, tc.SectorMap); + Run<TTestInit<true, 1>>(&tc, 1, chunkSize, false); Run<TTestCommitChunks<(31998)>>(&tc, 1, chunkSize, false); - Run<TTestLogWrite<512000000, 16>>(&tc, 1, chunkSize); - Run<TTestLogWrite<512000000, 17>>(&tc, 1, chunkSize); - Run<TTestLogWrite<128000000, 18>>(&tc, 1, chunkSize); - Run<TTestChunkWriteRead<30000, 2 << 20>>(&tc, 1, 5 << 20); -} - -/* -// TODO(cthulhu): Shorten test data, move it to a proper place -YARD_UNIT_TEST(TestInitOnOldDisk) { - TTestContext tc(false, true); - ui32 chunkSize = 134217728; - ui32 dataSize = 8 * chunkSize; - NPDisk::TAlignedData data0(dataSize); - - Run<TTestInit<true, 1>>(&tc, 1, chunkSize, false, true); - - ReadPdiskFile(&tc, dataSize, data0); - Cerr << Endl << Endl << Endl; - tc.PDiskGuid = 8308644718352142590ull; - - TString path = "/place/home/cthulhu/tmp_hdd2"; - ASSERT_YTHROW(NFs::Exists(path), "File " << path << " does not exist."); - { - TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; - TPDiskMon mon(counters); - THolder<NPDisk::IBlockDevice> device(NPDisk::CreateSyncBlockDevice(path, 999, mon)); - VERBOSE_COUT(" Performing Pread of " << dataSize); - device->PreadAsync(data0.Get(), dataSize, 0, nullptr, 9999, {}); - } - - WriteSectors(&tc, data0, 0, dataSize/4096); - Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); -} - -*/ -} -} // namespace NKikimr + Run<TTestLogWrite<512000000, 16>>(&tc, 1, chunkSize); + Run<TTestLogWrite<512000000, 17>>(&tc, 1, chunkSize); + Run<TTestLogWrite<128000000, 18>>(&tc, 1, chunkSize); + Run<TTestChunkWriteRead<30000, 2 << 20>>(&tc, 1, 5 << 20); +} + +/* +// TODO(cthulhu): Shorten test data, move it to a proper place +YARD_UNIT_TEST(TestInitOnOldDisk) { + TTestContext tc(false, true); + ui32 chunkSize = 134217728; + ui32 dataSize = 8 * chunkSize; + NPDisk::TAlignedData data0(dataSize); + + Run<TTestInit<true, 1>>(&tc, 1, chunkSize, false, true); + + ReadPdiskFile(&tc, dataSize, data0); + Cerr << Endl << Endl << Endl; + tc.PDiskGuid = 8308644718352142590ull; + + TString path = "/place/home/cthulhu/tmp_hdd2"; + ASSERT_YTHROW(NFs::Exists(path), "File " << path << " does not exist."); + { + TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; + TPDiskMon mon(counters); + THolder<NPDisk::IBlockDevice> device(NPDisk::CreateSyncBlockDevice(path, 999, mon)); + VERBOSE_COUT(" Performing Pread of " << dataSize); + device->PreadAsync(data0.Get(), dataSize, 0, nullptr, 9999, {}); + } + + WriteSectors(&tc, data0, 0, dataSize/4096); + Run<TTestInit<false, 1>>(&tc, 1, chunkSize, false, true); +} + +*/ +} +} // namespace NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_atomicblockcounter.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_atomicblockcounter.cpp index 46afd940a2..3ea3033b19 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_atomicblockcounter.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_atomicblockcounter.cpp @@ -41,7 +41,7 @@ void TAtomicBlockCounter::Unblock(ui64 flag, TResult& res) noexcept { } ui64 TAtomicBlockCounter::Add(ui64 value) noexcept { - Y_VERIFY_S(value > 0, "zero value# " << value); + Y_VERIFY_S(value > 0, "zero value# " << value); while (true) { ui64 prevData = AtomicGet(Data); if (GetBlocked(prevData)) { @@ -55,7 +55,7 @@ ui64 TAtomicBlockCounter::Add(ui64 value) noexcept { } ui64 TAtomicBlockCounter::Sub(ui64 value) noexcept { - Y_VERIFY_S(value > 0, "zero value# " << value); + Y_VERIFY_S(value > 0, "zero value# " << value); while (true) { ui64 prevData = AtomicGet(Data); ui64 data = NextSeqno(CheckedSubCounter(prevData, value)); @@ -66,7 +66,7 @@ ui64 TAtomicBlockCounter::Sub(ui64 value) noexcept { } ui64 TAtomicBlockCounter::ThresholdAdd(ui64 value, ui64 threshold, TAtomicBlockCounter::TResult& res) noexcept { - Y_VERIFY_S(value > 0, "zero value# " << value); + Y_VERIFY_S(value > 0, "zero value# " << value); while (true) { ui64 prevData = AtomicGet(Data); if (GetBlocked(prevData)) { // Add is forbidden iff blocked @@ -82,7 +82,7 @@ ui64 TAtomicBlockCounter::ThresholdAdd(ui64 value, ui64 threshold, TAtomicBlockC } ui64 TAtomicBlockCounter::ThresholdSub(ui64 value, ui64 threshold, TAtomicBlockCounter::TResult& res) noexcept { - Y_VERIFY_S(value > 0, "zero value# " << value); + Y_VERIFY_S(value > 0, "zero value# " << value); while (true) { ui64 prevData = AtomicGet(Data); ui64 data = NextSeqno(ThresholdBlock(CheckedSubCounter(prevData, value), threshold)); @@ -109,16 +109,16 @@ ui64 TAtomicBlockCounter::Get() const noexcept { } ui64 TAtomicBlockCounter::CheckedAddCounter(ui64 prevData, ui64 value) noexcept { - Y_VERIFY_S(!(value & ~CounterMask), "invalid value# " << value); - Y_VERIFY_S(!((GetCounter(prevData) + value) & ~CounterMask), - "overflow value# " << value << " prevData# " << GetCounter(prevData)); + Y_VERIFY_S(!(value & ~CounterMask), "invalid value# " << value); + Y_VERIFY_S(!((GetCounter(prevData) + value) & ~CounterMask), + "overflow value# " << value << " prevData# " << GetCounter(prevData)); return prevData + value; // No overflow, so higher bits are untouched } ui64 TAtomicBlockCounter::CheckedSubCounter(ui64 prevData, ui64 value) noexcept { - Y_VERIFY_S(!(value & ~CounterMask), "invalid value# " << value); - Y_VERIFY_S(!((GetCounter(prevData) - value) & ~CounterMask), - "underflow value# " << value << " prevData# " << GetCounter(prevData)); + Y_VERIFY_S(!(value & ~CounterMask), "invalid value# " << value); + Y_VERIFY_S(!((GetCounter(prevData) - value) & ~CounterMask), + "underflow value# " << value << " prevData# " << GetCounter(prevData)); return prevData - value; // No underflow, so higher bits are untouched } diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_countedqueuemanyone.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_countedqueuemanyone.h index 2f75932ede..a0ac88b250 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_countedqueuemanyone.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_countedqueuemanyone.h @@ -1,67 +1,67 @@ -#pragma once -#include "defs.h" +#pragma once +#include "defs.h" #include <ydb/core/util/queue_oneone_inplace.h> -#include <util/system/condvar.h> +#include <util/system/condvar.h> #include <library/cpp/threading/queue/mpsc_vinfarr_obstructive.h> #include <library/cpp/threading/queue/mpsc_read_as_filled.h> - -namespace NKikimr { -namespace NPDisk { - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TCountedQueueManyOne -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -template<typename T, ui32 TSize> -class TCountedQueueManyOne { + +namespace NKikimr { +namespace NPDisk { + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// TCountedQueueManyOne +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +template<typename T, ui32 TSize> +class TCountedQueueManyOne { NThreading::TObstructiveConsumerAuxQueue<T> Queue; - TAtomic SizeLowerEstimate; - TMutex ProducedMutex; - TCondVar ProducedCondVar; -public: - TCountedQueueManyOne() - : SizeLowerEstimate(0) - {} - - virtual ~TCountedQueueManyOne() { - Y_VERIFY_S(AtomicGet(SizeLowerEstimate) == 0, "Unexpected SizeLowerEstimate# " << AtomicGet(SizeLowerEstimate)); - } - + TAtomic SizeLowerEstimate; + TMutex ProducedMutex; + TCondVar ProducedCondVar; +public: + TCountedQueueManyOne() + : SizeLowerEstimate(0) + {} + + virtual ~TCountedQueueManyOne() { + Y_VERIFY_S(AtomicGet(SizeLowerEstimate) == 0, "Unexpected SizeLowerEstimate# " << AtomicGet(SizeLowerEstimate)); + } + void Push(T *x) noexcept { - Queue.Push(x); - AtomicIncrement(SizeLowerEstimate); - WakeUp(); - } - - T *Pop() { - AtomicDecrement(SizeLowerEstimate); - return Queue.Pop(); - } - - TAtomicBase GetWaitingSize() { - return AtomicLoad(&SizeLowerEstimate); - } - - void ProducedWaitI() { - TGuard<TMutex> guard(ProducedMutex); - if (AtomicGet(SizeLowerEstimate)) { - return; - } - return ProducedCondVar.WaitI(ProducedMutex); - } - - bool ProducedWait(TDuration duration) { - TGuard<TMutex> guard(ProducedMutex); - if (AtomicGet(SizeLowerEstimate)) { - return true; - } - return ProducedCondVar.WaitT(ProducedMutex, duration); - } - - void WakeUp() { - TGuard<TMutex> guard(ProducedMutex); - ProducedCondVar.Signal(); - } -}; - -} // NPDisk -} // NKikimr + Queue.Push(x); + AtomicIncrement(SizeLowerEstimate); + WakeUp(); + } + + T *Pop() { + AtomicDecrement(SizeLowerEstimate); + return Queue.Pop(); + } + + TAtomicBase GetWaitingSize() { + return AtomicLoad(&SizeLowerEstimate); + } + + void ProducedWaitI() { + TGuard<TMutex> guard(ProducedMutex); + if (AtomicGet(SizeLowerEstimate)) { + return; + } + return ProducedCondVar.WaitI(ProducedMutex); + } + + bool ProducedWait(TDuration duration) { + TGuard<TMutex> guard(ProducedMutex); + if (AtomicGet(SizeLowerEstimate)) { + return true; + } + return ProducedCondVar.WaitT(ProducedMutex, duration); + } + + void WakeUp() { + TGuard<TMutex> guard(ProducedMutex); + ProducedCondVar.Signal(); + } +}; + +} // NPDisk +} // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_countedqueueoneone.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_countedqueueoneone.h index 6b54807eaa..fc2951466c 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_countedqueueoneone.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_countedqueueoneone.h @@ -1,7 +1,7 @@ #pragma once #include <ydb/core/util/queue_oneone_inplace.h> #include <util/system/condvar.h> -#include <util/system/mutex.h> +#include <util/system/mutex.h> namespace NKikimr { namespace NPDisk { @@ -21,7 +21,7 @@ public: {} virtual ~TCountedQueueOneOne() { - Y_VERIFY_S(AtomicGet(SizeLowerEstimate) == 0, "Unexpected SizeLowerEstimate# " << AtomicGet(SizeLowerEstimate)); + Y_VERIFY_S(AtomicGet(SizeLowerEstimate) == 0, "Unexpected SizeLowerEstimate# " << AtomicGet(SizeLowerEstimate)); } void Push(T x) noexcept { diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_devicemode.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_devicemode.h index c9e95b0fd1..721e0a5f15 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_devicemode.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_devicemode.h @@ -1,29 +1,29 @@ -#pragma once - -#include <util/generic/string.h> - -namespace NKikimr::NPDisk { - -struct TDeviceMode { - using TFlags = ui32; - - enum EFlags : TFlags { - None = 0, - LockFile = 1 << 0, - UseSpdk = 1 << 1, - UseShmem = 1 << 2, - UseSubmitGetThread = 1 << 3, - }; - - static TString Validate(TFlags flags) { - if ((flags & UseSpdk) && (flags & UseShmem)) { - return "Spdk cannot be used on top of shmem device"; - } else if (flags & UseShmem) { - return "PDisk on shared memory is not supported now"; - } else { - return ""; - } - } -}; - -} // NKikimr::NPDisk +#pragma once + +#include <util/generic/string.h> + +namespace NKikimr::NPDisk { + +struct TDeviceMode { + using TFlags = ui32; + + enum EFlags : TFlags { + None = 0, + LockFile = 1 << 0, + UseSpdk = 1 << 1, + UseShmem = 1 << 2, + UseSubmitGetThread = 1 << 3, + }; + + static TString Validate(TFlags flags) { + if ((flags & UseSpdk) && (flags & UseShmem)) { + return "Spdk cannot be used on top of shmem device"; + } else if (flags & UseShmem) { + return "PDisk on shared memory is not supported now"; + } else { + return ""; + } + } +}; + +} // NKikimr::NPDisk diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_idlecounter.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_idlecounter.h index be3ddc3a3e..0aadc3f207 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_idlecounter.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_idlecounter.h @@ -1,40 +1,40 @@ -#pragma once - -#include "defs.h" -#include "blobstorage_pdisk_util_atomicblockcounter.h" -#include "blobstorage_pdisk_mon.h" - -namespace NKikimr { - -namespace NPDisk { - -class TIdleCounter { - static constexpr ui32 InFlightThreshold = 1024; - - TAtomicBlockCounter ReversedInFlight; - TLight &IdleLight; - -public: - - TIdleCounter(TLight &light) - : IdleLight(light) - { - ReversedInFlight.Add(InFlightThreshold + 1); - } - - void Increment() { - TAtomicBlockCounter::TResult res; - ReversedInFlight.ThresholdSub(1, InFlightThreshold, res); - IdleLight.Set(res.A, res.Seqno); - } - - void Decrement() { - TAtomicBlockCounter::TResult res; - ReversedInFlight.ThresholdAdd(1, InFlightThreshold, res); - IdleLight.Set(res.A, res.Seqno); - } -}; - -} // namespace NKikimr - -} // namespace NPDisk +#pragma once + +#include "defs.h" +#include "blobstorage_pdisk_util_atomicblockcounter.h" +#include "blobstorage_pdisk_mon.h" + +namespace NKikimr { + +namespace NPDisk { + +class TIdleCounter { + static constexpr ui32 InFlightThreshold = 1024; + + TAtomicBlockCounter ReversedInFlight; + TLight &IdleLight; + +public: + + TIdleCounter(TLight &light) + : IdleLight(light) + { + ReversedInFlight.Add(InFlightThreshold + 1); + } + + void Increment() { + TAtomicBlockCounter::TResult res; + ReversedInFlight.ThresholdSub(1, InFlightThreshold, res); + IdleLight.Set(res.A, res.Seqno); + } + + void Decrement() { + TAtomicBlockCounter::TResult res; + ReversedInFlight.ThresholdAdd(1, InFlightThreshold, res); + IdleLight.Set(res.A, res.Seqno); + } +}; + +} // namespace NKikimr + +} // namespace NPDisk diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_sector.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_sector.h index 5e1b70c7aa..45369a7a34 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_sector.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_sector.h @@ -1,132 +1,132 @@ -#pragma once - -#include "defs.h" -#include "blobstorage_pdisk_data.h" - -#include <util/generic/strbuf.h> +#pragma once + +#include "defs.h" +#include "blobstorage_pdisk_data.h" + +#include <util/generic/strbuf.h> #include <util/stream/format.h> - -namespace NKikimr::NPDisk { - -class TSector { - TStringBuf Buf; - -public: - TSector(const ui8* data, ui32 size) - : Buf(reinterpret_cast<const char*>(data), size) - {} - - TSector(const char* data, ui32 size) - : Buf(data, size) - {} - - ui8* Begin() { - return (ui8*)Buf.Data(); - } - - const ui8* Begin() const { - return (const ui8*)Buf.Data(); - } - - ui8* End() { - return Begin() + Size(); - } - - const ui8* End() const { - return Begin() + Size(); - } - - ui8 &operator[](ui32 idx) { - return *(Begin() + idx); - } - - const ui8 &operator[](ui32 idx) const { - return *(Begin() + idx); - } - - size_t Size() const { - return Buf.Size(); - } - - TDataSectorFooter *GetDataFooter() { - Y_VERIFY_DEBUG(Size() >= sizeof(TDataSectorFooter)); - return (TDataSectorFooter*) (End() - sizeof(TDataSectorFooter)); - } - - ui64 GetCanary() const { - Y_VERIFY_DEBUG(Size() >= sizeof(TDataSectorFooter) + CanarySize); - return ReadUnaligned<ui64>(End() - sizeof(TDataSectorFooter) - CanarySize); - } - - void SetCanary(ui64 canary = NPDisk::Canary) { - Y_VERIFY_DEBUG(Size() >= sizeof(TDataSectorFooter) + CanarySize); - WriteUnaligned<ui64>(End() - sizeof(TDataSectorFooter) - CanarySize, canary); - } - - TString ToString(size_t widthBytes = 16) { - TStringStream out; - for (ui32 row = 0; row < (Buf.size() + widthBytes - 1) / widthBytes; ++row) { - out << LeftPad(row * widthBytes, 6) << ": "; - for (ui32 col = 0; col < widthBytes; ++col) { - const ui32 idx = row * widthBytes + col; - if (col) { - out << ' '; - if (col % (widthBytes / 2) == 0) { - out << ' '; - } - } - if (idx < Buf.size()) { - out << Hex(Buf[idx], HF_FULL); - } else { - out << " "; - } - } - out << '\n'; - } - return out.Str(); - } - - bool CheckCanary() { - return GetCanary() == NPDisk::Canary; - } -}; - -class TSectorsWithData { - const ui32 SectorSize; - const ui32 SectorCount; - TString Buf; - -public: - TSectorsWithData(ui32 sectorSize, ui32 sectorCount) - : SectorSize(sectorSize) - , SectorCount(sectorCount) - , Buf(TString::Uninitialized(sectorSize * sectorCount)) - {} - - ui8* Data() { - return reinterpret_cast<ui8*>(Buf.Detach()); - } - - size_t Size() const { - return SectorCount; - } - - TSector Begin() { - return (*this)[0]; - } - - TSector End() { - return (*this)[SectorCount - 1]; - } - - TSector operator[](ui32 idx) { - return {Buf.Data() + idx * SectorSize, SectorSize}; - } - - const TSector operator[](ui32 idx) const { - return {Buf.Data() + idx * SectorSize, SectorSize}; - } -}; - - -} // namespace NKikimr::NPDisk + +namespace NKikimr::NPDisk { + +class TSector { + TStringBuf Buf; + +public: + TSector(const ui8* data, ui32 size) + : Buf(reinterpret_cast<const char*>(data), size) + {} + + TSector(const char* data, ui32 size) + : Buf(data, size) + {} + + ui8* Begin() { + return (ui8*)Buf.Data(); + } + + const ui8* Begin() const { + return (const ui8*)Buf.Data(); + } + + ui8* End() { + return Begin() + Size(); + } + + const ui8* End() const { + return Begin() + Size(); + } + + ui8 &operator[](ui32 idx) { + return *(Begin() + idx); + } + + const ui8 &operator[](ui32 idx) const { + return *(Begin() + idx); + } + + size_t Size() const { + return Buf.Size(); + } + + TDataSectorFooter *GetDataFooter() { + Y_VERIFY_DEBUG(Size() >= sizeof(TDataSectorFooter)); + return (TDataSectorFooter*) (End() - sizeof(TDataSectorFooter)); + } + + ui64 GetCanary() const { + Y_VERIFY_DEBUG(Size() >= sizeof(TDataSectorFooter) + CanarySize); + return ReadUnaligned<ui64>(End() - sizeof(TDataSectorFooter) - CanarySize); + } + + void SetCanary(ui64 canary = NPDisk::Canary) { + Y_VERIFY_DEBUG(Size() >= sizeof(TDataSectorFooter) + CanarySize); + WriteUnaligned<ui64>(End() - sizeof(TDataSectorFooter) - CanarySize, canary); + } + + TString ToString(size_t widthBytes = 16) { + TStringStream out; + for (ui32 row = 0; row < (Buf.size() + widthBytes - 1) / widthBytes; ++row) { + out << LeftPad(row * widthBytes, 6) << ": "; + for (ui32 col = 0; col < widthBytes; ++col) { + const ui32 idx = row * widthBytes + col; + if (col) { + out << ' '; + if (col % (widthBytes / 2) == 0) { + out << ' '; + } + } + if (idx < Buf.size()) { + out << Hex(Buf[idx], HF_FULL); + } else { + out << " "; + } + } + out << '\n'; + } + return out.Str(); + } + + bool CheckCanary() { + return GetCanary() == NPDisk::Canary; + } +}; + +class TSectorsWithData { + const ui32 SectorSize; + const ui32 SectorCount; + TString Buf; + +public: + TSectorsWithData(ui32 sectorSize, ui32 sectorCount) + : SectorSize(sectorSize) + , SectorCount(sectorCount) + , Buf(TString::Uninitialized(sectorSize * sectorCount)) + {} + + ui8* Data() { + return reinterpret_cast<ui8*>(Buf.Detach()); + } + + size_t Size() const { + return SectorCount; + } + + TSector Begin() { + return (*this)[0]; + } + + TSector End() { + return (*this)[SectorCount - 1]; + } + + TSector operator[](ui32 idx) { + return {Buf.Data() + idx * SectorSize, SectorSize}; + } + + const TSector operator[](ui32 idx) const { + return {Buf.Data() + idx * SectorSize, SectorSize}; + } +}; + + +} // namespace NKikimr::NPDisk diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_signal_event.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_signal_event.cpp index 5dafeff3e2..62e0a71b27 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_signal_event.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_signal_event.cpp @@ -1,116 +1,116 @@ -#include "blobstorage_pdisk_util_signal_event.h" - -#include <util/system/datetime.h> -#include <util/system/defaults.h> - -#include <cstdio> - -#include <util/system/atomic.h> -#include <util/system/event.h> -#include <util/system/mutex.h> -#include <util/system/condvar.h> - -class TSignalEvent::TEvImpl: public TThrRefBase { -public: -#ifdef _win_ - inline TEvImpl() { - cond = CreateEvent(nullptr, false, false, nullptr); - } - - inline ~TEvImpl() { - CloseHandle(cond); - } - - inline void Reset() noexcept { - ResetEvent(cond); - } - - inline void Signal() noexcept { - SetEvent(cond); - } - - inline bool WaitD(TInstant deadLine) noexcept { - if (deadLine == TInstant::Max()) { - return WaitForSingleObject(cond, INFINITE) == WAIT_OBJECT_0; - } - - const TInstant now = Now(); - - if (now < deadLine) { - //TODO - return WaitForSingleObject(cond, (deadLine - now).MilliSeconds()) == WAIT_OBJECT_0; - } - - return (WaitForSingleObject(cond, 0) == WAIT_OBJECT_0); - } - -#else - - inline void Signal() noexcept { - with_lock (Mutex) { - AtomicSet(Signaled, 1); - Cond.BroadCast(); - } - } - - inline void Reset() noexcept { - AtomicSet(Signaled, 0); - } - - inline bool WaitD(TInstant deadLine) noexcept { - bool resSignaled = true; - - with_lock (Mutex) { - while (!AtomicGet(Signaled)) { - if (!Cond.WaitD(Mutex, deadLine)) { - resSignaled = AtomicGet(Signaled); // timed out, but Signaled could have been set - - break; - } - } - - AtomicSet(Signaled, 0); - } - - return resSignaled; - } -#endif - -private: -#ifdef _win_ - HANDLE cond; -#else - TCondVar Cond; - TMutex Mutex; - TAtomic Signaled = 0; -#endif -}; - -TSignalEvent::TSignalEvent() - : EvImpl_(new TEvImpl()) -{ -} - -TSignalEvent::TSignalEvent(const TSignalEvent& other) noexcept - : EvImpl_(other.EvImpl_) -{ -} - -TSignalEvent& TSignalEvent::operator=(const TSignalEvent& other) noexcept { - EvImpl_ = other.EvImpl_; - return *this; -} - -TSignalEvent::~TSignalEvent() = default; - -void TSignalEvent::Reset() noexcept { - EvImpl_->Reset(); -} - -void TSignalEvent::Signal() noexcept { - EvImpl_->Signal(); -} - -bool TSignalEvent::WaitD(TInstant deadLine) noexcept { - return EvImpl_->WaitD(deadLine); -} +#include "blobstorage_pdisk_util_signal_event.h" + +#include <util/system/datetime.h> +#include <util/system/defaults.h> + +#include <cstdio> + +#include <util/system/atomic.h> +#include <util/system/event.h> +#include <util/system/mutex.h> +#include <util/system/condvar.h> + +class TSignalEvent::TEvImpl: public TThrRefBase { +public: +#ifdef _win_ + inline TEvImpl() { + cond = CreateEvent(nullptr, false, false, nullptr); + } + + inline ~TEvImpl() { + CloseHandle(cond); + } + + inline void Reset() noexcept { + ResetEvent(cond); + } + + inline void Signal() noexcept { + SetEvent(cond); + } + + inline bool WaitD(TInstant deadLine) noexcept { + if (deadLine == TInstant::Max()) { + return WaitForSingleObject(cond, INFINITE) == WAIT_OBJECT_0; + } + + const TInstant now = Now(); + + if (now < deadLine) { + //TODO + return WaitForSingleObject(cond, (deadLine - now).MilliSeconds()) == WAIT_OBJECT_0; + } + + return (WaitForSingleObject(cond, 0) == WAIT_OBJECT_0); + } + +#else + + inline void Signal() noexcept { + with_lock (Mutex) { + AtomicSet(Signaled, 1); + Cond.BroadCast(); + } + } + + inline void Reset() noexcept { + AtomicSet(Signaled, 0); + } + + inline bool WaitD(TInstant deadLine) noexcept { + bool resSignaled = true; + + with_lock (Mutex) { + while (!AtomicGet(Signaled)) { + if (!Cond.WaitD(Mutex, deadLine)) { + resSignaled = AtomicGet(Signaled); // timed out, but Signaled could have been set + + break; + } + } + + AtomicSet(Signaled, 0); + } + + return resSignaled; + } +#endif + +private: +#ifdef _win_ + HANDLE cond; +#else + TCondVar Cond; + TMutex Mutex; + TAtomic Signaled = 0; +#endif +}; + +TSignalEvent::TSignalEvent() + : EvImpl_(new TEvImpl()) +{ +} + +TSignalEvent::TSignalEvent(const TSignalEvent& other) noexcept + : EvImpl_(other.EvImpl_) +{ +} + +TSignalEvent& TSignalEvent::operator=(const TSignalEvent& other) noexcept { + EvImpl_ = other.EvImpl_; + return *this; +} + +TSignalEvent::~TSignalEvent() = default; + +void TSignalEvent::Reset() noexcept { + EvImpl_->Reset(); +} + +void TSignalEvent::Signal() noexcept { + EvImpl_->Signal(); +} + +bool TSignalEvent::WaitD(TInstant deadLine) noexcept { + return EvImpl_->WaitD(deadLine); +} diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_signal_event.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_signal_event.h index 3beda21232..6dc07a04a0 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_signal_event.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_signal_event.h @@ -1,38 +1,38 @@ -#pragma once - -#include "defs.h" - -#include <util/generic/ptr.h> -#include <util/datetime/base.h> - -class TSignalEvent { -public: - TSignalEvent(); - TSignalEvent(const TSignalEvent& other) noexcept; - TSignalEvent& operator=(const TSignalEvent& other) noexcept; - - ~TSignalEvent(); - - void Reset() noexcept; - void Signal() noexcept; - - /* - * return true if signaled, false if timed out. - */ - bool WaitD(TInstant deadLine) noexcept; - - inline bool WaitT(TDuration timeOut) noexcept { - return WaitD(timeOut.ToDeadLine()); - } - - /* - * wait infinite time - */ - inline void WaitI() noexcept { - WaitD(TInstant::Max()); - } - -private: - class TEvImpl; - TIntrusivePtr<TEvImpl> EvImpl_; -}; +#pragma once + +#include "defs.h" + +#include <util/generic/ptr.h> +#include <util/datetime/base.h> + +class TSignalEvent { +public: + TSignalEvent(); + TSignalEvent(const TSignalEvent& other) noexcept; + TSignalEvent& operator=(const TSignalEvent& other) noexcept; + + ~TSignalEvent(); + + void Reset() noexcept; + void Signal() noexcept; + + /* + * return true if signaled, false if timed out. + */ + bool WaitD(TInstant deadLine) noexcept; + + inline bool WaitT(TDuration timeOut) noexcept { + return WaitD(timeOut.ToDeadLine()); + } + + /* + * wait infinite time + */ + inline void WaitI() noexcept { + WaitD(TInstant::Max()); + } + +private: + class TEvImpl; + TIntrusivePtr<TEvImpl> EvImpl_; +}; diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_space_color.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_space_color.h index 1857884673..bdb0e0f152 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_space_color.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_space_color.h @@ -1,67 +1,67 @@ #pragma once -#include "defs.h" - +#include "defs.h" + #include <ydb/core/protos/blobstorage_config.pb.h> #include <ydb/core/protos/blobstorage.pb.h> - -namespace NKikimr { - -inline NKikimrBlobStorage::TPDiskSpaceColor::E StatusFlagToSpaceColor(NPDisk::TStatusFlags flags) { - using TColor = NKikimrBlobStorage::TPDiskSpaceColor; - - if (flags & NKikimrBlobStorage::StatusDiskSpaceBlack) { - return TColor::BLACK; - } else if (flags & NKikimrBlobStorage::StatusDiskSpaceRed) { - return TColor::RED; - } else if (flags & NKikimrBlobStorage::StatusDiskSpaceOrange) { - return TColor::ORANGE; - } else if (flags & NKikimrBlobStorage::StatusDiskSpaceLightOrange) { - return TColor::LIGHT_ORANGE; + +namespace NKikimr { + +inline NKikimrBlobStorage::TPDiskSpaceColor::E StatusFlagToSpaceColor(NPDisk::TStatusFlags flags) { + using TColor = NKikimrBlobStorage::TPDiskSpaceColor; + + if (flags & NKikimrBlobStorage::StatusDiskSpaceBlack) { + return TColor::BLACK; + } else if (flags & NKikimrBlobStorage::StatusDiskSpaceRed) { + return TColor::RED; + } else if (flags & NKikimrBlobStorage::StatusDiskSpaceOrange) { + return TColor::ORANGE; + } else if (flags & NKikimrBlobStorage::StatusDiskSpaceLightOrange) { + return TColor::LIGHT_ORANGE; } else if (flags & NKikimrBlobStorage::StatusDiskSpaceYellowStop) { - return TColor::YELLOW; + return TColor::YELLOW; } else if (flags & NKikimrBlobStorage::StatusDiskSpaceLightYellowMove) { return TColor::LIGHT_YELLOW; - } else if (flags & NKikimrBlobStorage::StatusDiskSpaceCyan) { - return TColor::CYAN; - } else { - return TColor::GREEN; - } -} - -inline NPDisk::TStatusFlags SpaceColorToStatusFlag(NKikimrBlobStorage::TPDiskSpaceColor::E color) { - using TColor = NKikimrBlobStorage::TPDiskSpaceColor; - - NPDisk::TStatusFlags flags = NKikimrBlobStorage::StatusIsValid; - switch (color) { - case TColor::BLACK: - flags |= NKikimrBlobStorage::StatusDiskSpaceBlack; + } else if (flags & NKikimrBlobStorage::StatusDiskSpaceCyan) { + return TColor::CYAN; + } else { + return TColor::GREEN; + } +} + +inline NPDisk::TStatusFlags SpaceColorToStatusFlag(NKikimrBlobStorage::TPDiskSpaceColor::E color) { + using TColor = NKikimrBlobStorage::TPDiskSpaceColor; + + NPDisk::TStatusFlags flags = NKikimrBlobStorage::StatusIsValid; + switch (color) { + case TColor::BLACK: + flags |= NKikimrBlobStorage::StatusDiskSpaceBlack; [[fallthrough]]; - case TColor::RED: - flags |= NKikimrBlobStorage::StatusDiskSpaceRed; + case TColor::RED: + flags |= NKikimrBlobStorage::StatusDiskSpaceRed; [[fallthrough]]; - case TColor::ORANGE: - flags |= NKikimrBlobStorage::StatusDiskSpaceOrange; + case TColor::ORANGE: + flags |= NKikimrBlobStorage::StatusDiskSpaceOrange; [[fallthrough]]; - case TColor::LIGHT_ORANGE: - flags |= NKikimrBlobStorage::StatusDiskSpaceLightOrange; + case TColor::LIGHT_ORANGE: + flags |= NKikimrBlobStorage::StatusDiskSpaceLightOrange; [[fallthrough]]; - case TColor::YELLOW: + case TColor::YELLOW: flags |= NKikimrBlobStorage::StatusDiskSpaceYellowStop; [[fallthrough]]; case TColor::LIGHT_YELLOW: flags |= NKikimrBlobStorage::StatusDiskSpaceLightYellowMove; [[fallthrough]]; - case TColor::CYAN: - flags |= NKikimrBlobStorage::StatusDiskSpaceCyan; + case TColor::CYAN: + flags |= NKikimrBlobStorage::StatusDiskSpaceCyan; [[fallthrough]]; - case TColor::GREEN: + case TColor::GREEN: [[fallthrough]]; - case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MIN_SENTINEL_DO_NOT_USE_: + case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MIN_SENTINEL_DO_NOT_USE_: [[fallthrough]]; - case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MAX_SENTINEL_DO_NOT_USE_: - break; - } - return flags; -} - -} + case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MAX_SENTINEL_DO_NOT_USE_: + break; + } + return flags; +} + +} diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_ut.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_ut.cpp index 5a0595fcd5..97938cfcdc 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_ut.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_ut.cpp @@ -1,25 +1,25 @@ #include "defs.h" -#include "blobstorage_pdisk_chunk_id_formatter.h" -#include "blobstorage_pdisk_data.h" -#include "blobstorage_pdisk_driveestimator.h" -#include "blobstorage_pdisk_impl.h" -#include "blobstorage_pdisk_mon.h" -#include "blobstorage_pdisk_sectorrestorator.h" -#include "blobstorage_pdisk_state.h" -#include "blobstorage_pdisk_tools.h" -#include "blobstorage_pdisk_ut_defs.h" -#include "blobstorage_pdisk_util_atomicblockcounter.h" -#include "blobstorage_pdisk_util_sector.h" -#include "blobstorage_pdisk_util_wcache.h" +#include "blobstorage_pdisk_chunk_id_formatter.h" +#include "blobstorage_pdisk_data.h" +#include "blobstorage_pdisk_driveestimator.h" +#include "blobstorage_pdisk_impl.h" +#include "blobstorage_pdisk_mon.h" +#include "blobstorage_pdisk_sectorrestorator.h" +#include "blobstorage_pdisk_state.h" +#include "blobstorage_pdisk_tools.h" +#include "blobstorage_pdisk_ut_defs.h" +#include "blobstorage_pdisk_util_atomicblockcounter.h" +#include "blobstorage_pdisk_util_sector.h" +#include "blobstorage_pdisk_util_wcache.h" #include <ydb/core/blobstorage/crypto/default.h> #include <ydb/library/pdisk_io/aio.h> - + #include <library/cpp/testing/unittest/registar.h> -#include <util/stream/null.h> -#include <util/system/tempfile.h> -#include <cstring> +#include <util/stream/null.h> +#include <util/system/tempfile.h> +#include <cstring> namespace NKikimr { namespace NPDisk { @@ -213,7 +213,7 @@ Y_UNIT_TEST_SUITE(TPDiskUtil) { UNIT_ASSERT_EQUAL(count->Val(), cntStart); } l.Set(false, missedSeqno); // place missed one - UNIT_ASSERT_EQUAL((bool)state->Val(), st); + UNIT_ASSERT_EQUAL((bool)state->Val(), st); UNIT_ASSERT_EQUAL(count->Val(), cnt); } } @@ -235,290 +235,290 @@ Y_UNIT_TEST_SUITE(TPDiskUtil) { UNIT_ASSERT_EQUAL(state->Val(), 0); } } - + Y_UNIT_TEST(DriveEstimator) { - TTempFileHandle file; - file.Resize(1 << 30); - TDriveEstimator estimator(file.Name()); - TDriveModel model = estimator.EstimateDriveModel(); - UNIT_ASSERT_UNEQUAL(model.Speed(TDriveModel::OP_TYPE_AVG), 0); - UNIT_ASSERT_UNEQUAL(model.SeekTimeNs(), 0); - } - -void TestOffset(ui64 offset, ui64 size, ui64 expectedFirstSector, ui64 expectedLastSector, - ui64 expectedSectorOffset) { - TDiskFormat format; - format.Clear(); - format.SectorSize = 4096; - format.FormatFlags &= ~EFormatFlags::FormatFlagErasureEncodeUserChunks; - - ui64 firstSector; - ui64 lastSector; - ui64 sectorOffset; - bool isOk = ParseSectorOffset(format, nullptr, 0, offset, size, firstSector, lastSector, sectorOffset); - UNIT_ASSERT_C(isOk && firstSector == expectedFirstSector && lastSector == expectedLastSector && - sectorOffset == expectedSectorOffset, - "isOk# " << isOk << "\n" - "offset# " << offset << " size# " << size << "\n" - "firstSector# " << firstSector << " expectedFirstSector# " << expectedFirstSector << "\n" - "lastSector# " << lastSector << " expectedLastSector# " << expectedLastSector << "\n" - "sectorOffset# " << sectorOffset << " expectedSectorOffset# " << expectedSectorOffset << "\n" - ); + TTempFileHandle file; + file.Resize(1 << 30); + TDriveEstimator estimator(file.Name()); + TDriveModel model = estimator.EstimateDriveModel(); + UNIT_ASSERT_UNEQUAL(model.Speed(TDriveModel::OP_TYPE_AVG), 0); + UNIT_ASSERT_UNEQUAL(model.SeekTimeNs(), 0); + } + +void TestOffset(ui64 offset, ui64 size, ui64 expectedFirstSector, ui64 expectedLastSector, + ui64 expectedSectorOffset) { + TDiskFormat format; + format.Clear(); + format.SectorSize = 4096; + format.FormatFlags &= ~EFormatFlags::FormatFlagErasureEncodeUserChunks; + + ui64 firstSector; + ui64 lastSector; + ui64 sectorOffset; + bool isOk = ParseSectorOffset(format, nullptr, 0, offset, size, firstSector, lastSector, sectorOffset); + UNIT_ASSERT_C(isOk && firstSector == expectedFirstSector && lastSector == expectedLastSector && + sectorOffset == expectedSectorOffset, + "isOk# " << isOk << "\n" + "offset# " << offset << " size# " << size << "\n" + "firstSector# " << firstSector << " expectedFirstSector# " << expectedFirstSector << "\n" + "lastSector# " << lastSector << " expectedLastSector# " << expectedLastSector << "\n" + "sectorOffset# " << sectorOffset << " expectedSectorOffset# " << expectedSectorOffset << "\n" + ); +} + + Y_UNIT_TEST(OffsetParsingCorrectness) { + TDiskFormat format; + format.Clear(); + format.SectorSize = 4096; + const ui64 sectorPayload = format.SectorPayloadSize(); + + TestOffset(0, sectorPayload*15, 0, 14, 0); + + TestOffset(0, sectorPayload*15, 0, 14, 0); + + const ui64 size = sectorPayload * LogErasureDataParts; + for (ui64 offset = 1; offset < size; ++offset) { + const ui64 lastSector = (offset + size + sectorPayload - 1) / sectorPayload - 1; + TestOffset(offset, size, offset / sectorPayload, lastSector, offset % sectorPayload); + } + + TestOffset(4123, 4012*13, 1, 13, 59); + + TestOffset(4123, sectorPayload*14, 1, 15, 59); + + TestOffset(4123, 4063*13, 1, 14, 59); + } + +void TestPayloadOffset(ui64 firstSector, ui64 lastSector, ui64 currentSector, ui64 expectedPayloadSize, + ui64 expectedPayloadOffset) { + TDiskFormat format; + format.Clear(); + format.SectorSize = 4096; + format.FormatFlags &= ~EFormatFlags::FormatFlagErasureEncodeUserChunks; + + ui64 payloadSize; + ui64 payloadOffset; + ParsePayloadFromSectorOffset(format, firstSector, lastSector, currentSector, &payloadSize, &payloadOffset); + UNIT_ASSERT_C(payloadSize == expectedPayloadSize && payloadOffset == expectedPayloadOffset, + "firstSector# " << firstSector << " lastSector# " << lastSector << " currentSector# " << currentSector << "\n" + "payloadSize# " << payloadSize << " expectedPayloadSize# " << expectedPayloadSize << "\n" + "payloadOffset# " << payloadOffset << " expectedPayloadOffset# " << expectedPayloadOffset << "\n" + ); } - Y_UNIT_TEST(OffsetParsingCorrectness) { - TDiskFormat format; - format.Clear(); - format.SectorSize = 4096; - const ui64 sectorPayload = format.SectorPayloadSize(); - - TestOffset(0, sectorPayload*15, 0, 14, 0); - - TestOffset(0, sectorPayload*15, 0, 14, 0); - - const ui64 size = sectorPayload * LogErasureDataParts; - for (ui64 offset = 1; offset < size; ++offset) { - const ui64 lastSector = (offset + size + sectorPayload - 1) / sectorPayload - 1; - TestOffset(offset, size, offset / sectorPayload, lastSector, offset % sectorPayload); - } - - TestOffset(4123, 4012*13, 1, 13, 59); - - TestOffset(4123, sectorPayload*14, 1, 15, 59); - - TestOffset(4123, 4063*13, 1, 14, 59); - } - -void TestPayloadOffset(ui64 firstSector, ui64 lastSector, ui64 currentSector, ui64 expectedPayloadSize, - ui64 expectedPayloadOffset) { - TDiskFormat format; - format.Clear(); - format.SectorSize = 4096; - format.FormatFlags &= ~EFormatFlags::FormatFlagErasureEncodeUserChunks; - - ui64 payloadSize; - ui64 payloadOffset; - ParsePayloadFromSectorOffset(format, firstSector, lastSector, currentSector, &payloadSize, &payloadOffset); - UNIT_ASSERT_C(payloadSize == expectedPayloadSize && payloadOffset == expectedPayloadOffset, - "firstSector# " << firstSector << " lastSector# " << lastSector << " currentSector# " << currentSector << "\n" - "payloadSize# " << payloadSize << " expectedPayloadSize# " << expectedPayloadSize << "\n" - "payloadOffset# " << payloadOffset << " expectedPayloadOffset# " << expectedPayloadOffset << "\n" - ); -} - - Y_UNIT_TEST(PayloadParsingTest) { - TDiskFormat format; - format.Clear(); - format.SectorSize = 4096; - const ui64 sectorPayload = format.SectorPayloadSize(); - - TestPayloadOffset(1, 1, 1, sectorPayload, 0); - TestPayloadOffset(1, 2, 2, sectorPayload, sectorPayload); - - TestPayloadOffset(0, 15, 0, 16 * sectorPayload, 0); - TestPayloadOffset(0, 15, 0, 16 * sectorPayload, 0); - - TestPayloadOffset(0, 15, 14, 2 * sectorPayload, 14 * sectorPayload); - TestPayloadOffset(0, 15, 15, sectorPayload, 15 * sectorPayload); - - TestPayloadOffset(13, 15, 13, 3 * sectorPayload, 0); - TestPayloadOffset(13, 15, 14, 2 * sectorPayload, sectorPayload); - } - - Y_UNIT_TEST(SectorRestorator) { - TDiskFormat format; - format.Clear(); - TSectorsWithData sectors(format.SectorSize, LogErasureDataParts + 1); - constexpr ui64 magic = 0x123951924; - ui64 nonce = 1; - for (ui32 useT1haHash = 0; useT1haHash < 2; ++useT1haHash) { - for (ui32 i = 0; i < LogErasureDataParts + 1; ++i) { - memset(sectors[i].Begin(), 0, sectors[i].Size()); - sectors[i].SetCanary(); - auto *footer = sectors[i].GetDataFooter(); - footer->Version = PDISK_DATA_VERSION; - footer->Nonce = nonce++; - NPDisk::TPDiskHashCalculator hasher(useT1haHash); - if (i < LogErasureDataParts) { - ui64 offset = format.SectorSize * i; - footer->Hash = hasher.HashSector(offset, magic, sectors[i].Begin(), sectors[i].Size()); - } - } - TSectorRestorator restorator(false, LogErasureDataParts, true, format); - restorator.Restore(sectors.Data(), 0, magic, 0, useT1haHash); - UNIT_ASSERT_C(restorator.GoodSectorCount == LogErasureDataParts + 1, - "restorator.GoodSectorCount# " << restorator.GoodSectorCount); - } - } - - Y_UNIT_TEST(SectorRestoratorOldNewHash) { - TDiskFormat format; - format.Clear(); - TSectorsWithData sectors(format.SectorSize, 3); - const ui64 magic = 0x123951924; - const ui64 offset = format.SectorSize * 17; - ui64 nonce = 1; - for (ui32 useT1haHash = 0; useT1haHash < 2; ++useT1haHash) { - for (ui32 i = 0; i < sectors.Size(); ++i) { - memset(sectors[i].Begin(), 13, sectors[i].Size()); - sectors[i].SetCanary(); - auto *footer = sectors[i].GetDataFooter(); - footer->Version = PDISK_DATA_VERSION; - footer->Nonce = nonce++; - NPDisk::TPDiskHashCalculator hasher(useT1haHash); - switch (i) { - case 0: - footer->Hash = hasher.OldHashSector(offset, magic, sectors[i].Begin(), sectors[i].Size()); - break; - case 1: - footer->Hash = hasher.T1ha0HashSector<TT1ha0NoAvxHasher>(offset, magic, sectors[i].Begin(), sectors[i].Size()); - break; - case 2: - footer->Hash = hasher.HashSector(offset, magic, sectors[i].Begin(), sectors[i].Size()); - break; - default: - UNIT_ASSERT(false); - } - TSectorRestorator restorator(false, 1, false, format); - restorator.Restore(sectors[i].Begin(), offset, magic, 0, useT1haHash); - UNIT_ASSERT_C(restorator.GoodSectorCount == 1, "i# " << i << " useT1haHash# " << useT1haHash - << " GoodSectorCount# " << restorator.GoodSectorCount); - } - } - } - - Y_UNIT_TEST(SectorPrint) { - TSectorsWithData sectors(97, 1); + Y_UNIT_TEST(PayloadParsingTest) { + TDiskFormat format; + format.Clear(); + format.SectorSize = 4096; + const ui64 sectorPayload = format.SectorPayloadSize(); + + TestPayloadOffset(1, 1, 1, sectorPayload, 0); + TestPayloadOffset(1, 2, 2, sectorPayload, sectorPayload); + + TestPayloadOffset(0, 15, 0, 16 * sectorPayload, 0); + TestPayloadOffset(0, 15, 0, 16 * sectorPayload, 0); + + TestPayloadOffset(0, 15, 14, 2 * sectorPayload, 14 * sectorPayload); + TestPayloadOffset(0, 15, 15, sectorPayload, 15 * sectorPayload); + + TestPayloadOffset(13, 15, 13, 3 * sectorPayload, 0); + TestPayloadOffset(13, 15, 14, 2 * sectorPayload, sectorPayload); + } + + Y_UNIT_TEST(SectorRestorator) { + TDiskFormat format; + format.Clear(); + TSectorsWithData sectors(format.SectorSize, LogErasureDataParts + 1); + constexpr ui64 magic = 0x123951924; + ui64 nonce = 1; + for (ui32 useT1haHash = 0; useT1haHash < 2; ++useT1haHash) { + for (ui32 i = 0; i < LogErasureDataParts + 1; ++i) { + memset(sectors[i].Begin(), 0, sectors[i].Size()); + sectors[i].SetCanary(); + auto *footer = sectors[i].GetDataFooter(); + footer->Version = PDISK_DATA_VERSION; + footer->Nonce = nonce++; + NPDisk::TPDiskHashCalculator hasher(useT1haHash); + if (i < LogErasureDataParts) { + ui64 offset = format.SectorSize * i; + footer->Hash = hasher.HashSector(offset, magic, sectors[i].Begin(), sectors[i].Size()); + } + } + TSectorRestorator restorator(false, LogErasureDataParts, true, format); + restorator.Restore(sectors.Data(), 0, magic, 0, useT1haHash); + UNIT_ASSERT_C(restorator.GoodSectorCount == LogErasureDataParts + 1, + "restorator.GoodSectorCount# " << restorator.GoodSectorCount); + } + } + + Y_UNIT_TEST(SectorRestoratorOldNewHash) { + TDiskFormat format; + format.Clear(); + TSectorsWithData sectors(format.SectorSize, 3); + const ui64 magic = 0x123951924; + const ui64 offset = format.SectorSize * 17; + ui64 nonce = 1; + for (ui32 useT1haHash = 0; useT1haHash < 2; ++useT1haHash) { + for (ui32 i = 0; i < sectors.Size(); ++i) { + memset(sectors[i].Begin(), 13, sectors[i].Size()); + sectors[i].SetCanary(); + auto *footer = sectors[i].GetDataFooter(); + footer->Version = PDISK_DATA_VERSION; + footer->Nonce = nonce++; + NPDisk::TPDiskHashCalculator hasher(useT1haHash); + switch (i) { + case 0: + footer->Hash = hasher.OldHashSector(offset, magic, sectors[i].Begin(), sectors[i].Size()); + break; + case 1: + footer->Hash = hasher.T1ha0HashSector<TT1ha0NoAvxHasher>(offset, magic, sectors[i].Begin(), sectors[i].Size()); + break; + case 2: + footer->Hash = hasher.HashSector(offset, magic, sectors[i].Begin(), sectors[i].Size()); + break; + default: + UNIT_ASSERT(false); + } + TSectorRestorator restorator(false, 1, false, format); + restorator.Restore(sectors[i].Begin(), offset, magic, 0, useT1haHash); + UNIT_ASSERT_C(restorator.GoodSectorCount == 1, "i# " << i << " useT1haHash# " << useT1haHash + << " GoodSectorCount# " << restorator.GoodSectorCount); + } + } + } + + Y_UNIT_TEST(SectorPrint) { + TSectorsWithData sectors(97, 1); memset(sectors[0].Begin(), 0, sectors[0].Size()); - sectors[0][0] = 12; - sectors[0][1] = 9; - sectors[0].SetCanary(); - Cnull << sectors[0].ToString(); - } - - Y_UNIT_TEST(TChunkIdFormatter) { - auto test = [] (const TDeque<ui32>& in, const TString& expect) { - TStringStream ss; - TChunkIdFormatter(ss).PrintBracedChunksList(in); - UNIT_ASSERT_EQUAL_C(ss.Str(), expect, " got# " << ss.Str().Quote() << " expect# " << expect.Quote()); - }; - - test({1}, "{1}"); - test({1, 2}, "{1, 2}"); - test({1, 2, 3}, "{1..3}"); - test({1, 3, 5}, "{1, 3, 5}"); - test({1, 2, 3, 5, 6, 7}, "{1..3, 5..7}"); - } - - Y_UNIT_TEST(TOwnerPrintTest) { - TStringStream ss; - ss << TOwner(0) << " " << TOwner(3) << " " << TOwner(124) << " " << TOwner(231); - TString expect = "0 3 124 231"; - UNIT_ASSERT_EQUAL_C(ss.Str(), expect, " got# " << ss.Str().Quote() << " expect# " << expect.Quote()); - } - - Y_UNIT_TEST(TChunkStateEnumPrintTest) { - TStringStream ss; - ss << TChunkState::DATA_RESERVED; - TString expect = "DATA_RESERVED"; - UNIT_ASSERT_EQUAL_C(ss.Str(), expect, " got# " << ss.Str().Quote() << " expect# " << expect.Quote()); - } - - Y_UNIT_TEST(TIoResultEnumPrintTest) { - TStringStream ss; - ss << EIoResult::Ok << " "; - ss << EIoResult::TryAgain << " "; - ss << EIoResult::FileLockError; - TString expect = "Ok TryAgain FileLockError"; - UNIT_ASSERT_EQUAL_C(ss.Str(), expect, " got# " << ss.Str().Quote() << " expect# " << expect.Quote()); - } - - Y_UNIT_TEST(TIoTypeEnumPrintTest) { - TStringStream ss; - ss << IAsyncIoOperation::EType::PRead << " "; - ss << IAsyncIoOperation::EType::PWrite << " "; - ss << IAsyncIoOperation::EType::PTrim; - TString expect = "PRead PWrite PTrim"; - UNIT_ASSERT_EQUAL_C(ss.Str(), expect, " got# " << ss.Str().Quote() << " expect# " << expect.Quote()); - } - - Y_UNIT_TEST(TestNVMeSerial) { - TString path = "/dev/disk/by-partlabel/kikimr_nvme_01"; - TStringStream details; - if (std::optional<NPDisk::TDriveData> data = NPDisk::GetDriveData(path, &details)) { - Cout << "data# " << data->ToString(false) << Endl; - } else { - Cout << "error, details# " << details.Str() << Endl; - } - } - - Y_UNIT_TEST(TestDeviceList) { - for(const NPDisk::TDriveData& data : ListDevicesWithPartlabel()) { - Cout << "data# " << data.ToString(false) << Endl; - } - } - - Y_UNIT_TEST(TestBufferPool) { - TBufferPoolCommon pool(512, 10, TBufferPool::TPDiskParams{}); - - for (ui32 i = 0; i < 100; ++i) { + sectors[0][0] = 12; + sectors[0][1] = 9; + sectors[0].SetCanary(); + Cnull << sectors[0].ToString(); + } + + Y_UNIT_TEST(TChunkIdFormatter) { + auto test = [] (const TDeque<ui32>& in, const TString& expect) { + TStringStream ss; + TChunkIdFormatter(ss).PrintBracedChunksList(in); + UNIT_ASSERT_EQUAL_C(ss.Str(), expect, " got# " << ss.Str().Quote() << " expect# " << expect.Quote()); + }; + + test({1}, "{1}"); + test({1, 2}, "{1, 2}"); + test({1, 2, 3}, "{1..3}"); + test({1, 3, 5}, "{1, 3, 5}"); + test({1, 2, 3, 5, 6, 7}, "{1..3, 5..7}"); + } + + Y_UNIT_TEST(TOwnerPrintTest) { + TStringStream ss; + ss << TOwner(0) << " " << TOwner(3) << " " << TOwner(124) << " " << TOwner(231); + TString expect = "0 3 124 231"; + UNIT_ASSERT_EQUAL_C(ss.Str(), expect, " got# " << ss.Str().Quote() << " expect# " << expect.Quote()); + } + + Y_UNIT_TEST(TChunkStateEnumPrintTest) { + TStringStream ss; + ss << TChunkState::DATA_RESERVED; + TString expect = "DATA_RESERVED"; + UNIT_ASSERT_EQUAL_C(ss.Str(), expect, " got# " << ss.Str().Quote() << " expect# " << expect.Quote()); + } + + Y_UNIT_TEST(TIoResultEnumPrintTest) { + TStringStream ss; + ss << EIoResult::Ok << " "; + ss << EIoResult::TryAgain << " "; + ss << EIoResult::FileLockError; + TString expect = "Ok TryAgain FileLockError"; + UNIT_ASSERT_EQUAL_C(ss.Str(), expect, " got# " << ss.Str().Quote() << " expect# " << expect.Quote()); + } + + Y_UNIT_TEST(TIoTypeEnumPrintTest) { + TStringStream ss; + ss << IAsyncIoOperation::EType::PRead << " "; + ss << IAsyncIoOperation::EType::PWrite << " "; + ss << IAsyncIoOperation::EType::PTrim; + TString expect = "PRead PWrite PTrim"; + UNIT_ASSERT_EQUAL_C(ss.Str(), expect, " got# " << ss.Str().Quote() << " expect# " << expect.Quote()); + } + + Y_UNIT_TEST(TestNVMeSerial) { + TString path = "/dev/disk/by-partlabel/kikimr_nvme_01"; + TStringStream details; + if (std::optional<NPDisk::TDriveData> data = NPDisk::GetDriveData(path, &details)) { + Cout << "data# " << data->ToString(false) << Endl; + } else { + Cout << "error, details# " << details.Str() << Endl; + } + } + + Y_UNIT_TEST(TestDeviceList) { + for(const NPDisk::TDriveData& data : ListDevicesWithPartlabel()) { + Cout << "data# " << data.ToString(false) << Endl; + } + } + + Y_UNIT_TEST(TestBufferPool) { + TBufferPoolCommon pool(512, 10, TBufferPool::TPDiskParams{}); + + for (ui32 i = 0; i < 100; ++i) { TBuffer::TPtr buffers{pool.Pop()}; - } - - std::vector<TBuffer::TPtr> buffers; - for (ui32 i = 0; i < 20; ++i) { - buffers.emplace_back(pool.Pop()); - } - } - - Y_UNIT_TEST(SectorMap) { - TIntrusivePtr<TSectorMap> sectorMap(new TSectorMap(1024*1024)); - TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; - THolder<TPDiskMon> mon(new TPDiskMon(counters, 0, nullptr)); - TActorSystemCreator creator; - THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDeviceWithDefaults("SectorMap:123", *mon, - NPDisk::TDeviceMode::LockFile, sectorMap, creator.GetActorSystem())); - ui32 size = 4096; - TAlignedData data(size); - TAlignedData readData(size); - memset(data.Get(), 1, size); - device->PwriteSync(data.Get(), size, 0, {}, {}); - device->PreadSync(readData.Get(), size, 0, {}, {}); - UNIT_ASSERT(memcmp(data.Get(), readData.Get(), size) == 0); - } - - Y_UNIT_TEST(FormatSectorMap) { - TIntrusivePtr<TSectorMap> sectorMap(new TSectorMap(1024*1024*1024)); - TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; - - NPDisk::TKey chunkKey{}; - NPDisk::TKey logKey{}; - NPDisk::TKey sysLogKey{}; - - TPDiskConfig cfg("SectorMap:1024042", 12345, 0, {}); - FormatPDisk(cfg.Path, 0, 4096, MIN_CHUNK_SIZE, cfg.PDiskGuid, chunkKey, logKey, sysLogKey, + } + + std::vector<TBuffer::TPtr> buffers; + for (ui32 i = 0; i < 20; ++i) { + buffers.emplace_back(pool.Pop()); + } + } + + Y_UNIT_TEST(SectorMap) { + TIntrusivePtr<TSectorMap> sectorMap(new TSectorMap(1024*1024)); + TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; + THolder<TPDiskMon> mon(new TPDiskMon(counters, 0, nullptr)); + TActorSystemCreator creator; + THolder<NPDisk::IBlockDevice> device(NPDisk::CreateRealBlockDeviceWithDefaults("SectorMap:123", *mon, + NPDisk::TDeviceMode::LockFile, sectorMap, creator.GetActorSystem())); + ui32 size = 4096; + TAlignedData data(size); + TAlignedData readData(size); + memset(data.Get(), 1, size); + device->PwriteSync(data.Get(), size, 0, {}, {}); + device->PreadSync(readData.Get(), size, 0, {}, {}); + UNIT_ASSERT(memcmp(data.Get(), readData.Get(), size) == 0); + } + + Y_UNIT_TEST(FormatSectorMap) { + TIntrusivePtr<TSectorMap> sectorMap(new TSectorMap(1024*1024*1024)); + TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; + + NPDisk::TKey chunkKey{}; + NPDisk::TKey logKey{}; + NPDisk::TKey sysLogKey{}; + + TPDiskConfig cfg("SectorMap:1024042", 12345, 0, {}); + FormatPDisk(cfg.Path, 0, 4096, MIN_CHUNK_SIZE, cfg.PDiskGuid, chunkKey, logKey, sysLogKey, YdbDefaultPDiskSequence, TString(), false, false, sectorMap); - } - - Y_UNIT_TEST(SectorMapStoreLoadFromFile) { - TIntrusivePtr<TSectorMap> sectorMap(new TSectorMap(1024*1024)); - TTempFileHandle tmp; - - ui32 size = 1024*1024; - TAlignedData data(size); - for (ui32 i = 0; i < size; ++i) { - data.Get()[i] = i % 139; - } - memset(data.Get(), 0x23, size); - sectorMap->Write(data.Get(), size, 4096); - - sectorMap->StoreToFile(tmp.Name()); - sectorMap->LoadFromFile(tmp.Name()); - - TAlignedData readData(size); - sectorMap->Read(readData.Get(), size, 4096); - UNIT_ASSERT(memcmp(data.Get(), readData.Get(), size) == 0); - } -} - + } + + Y_UNIT_TEST(SectorMapStoreLoadFromFile) { + TIntrusivePtr<TSectorMap> sectorMap(new TSectorMap(1024*1024)); + TTempFileHandle tmp; + + ui32 size = 1024*1024; + TAlignedData data(size); + for (ui32 i = 0; i < size; ++i) { + data.Get()[i] = i % 139; + } + memset(data.Get(), 0x23, size); + sectorMap->Write(data.Get(), size, 4096); + + sectorMap->StoreToFile(tmp.Name()); + sectorMap->LoadFromFile(tmp.Name()); + + TAlignedData readData(size); + sectorMap->Read(readData.Get(), size, 4096); + UNIT_ASSERT(memcmp(data.Get(), readData.Get(), size) == 0); + } +} + }} // namespace NKikimr // namespace NPDisk diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_wcache.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_wcache.cpp index 4f327f12c3..f613c70bee 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_wcache.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_wcache.cpp @@ -2,64 +2,64 @@ #include <library/cpp/actors/core/log.h> #include <ydb/core/protos/services.pb.h> -#include <util/stream/file.h> -#include <util/string/strip.h> +#include <util/stream/file.h> +#include <util/string/strip.h> #include <util/system/file.h> #ifdef _linux_ -#include <libgen.h> -#include <limits.h> -#include <linux/fs.h> -#include <linux/nvme_ioctl.h> -#include <stdlib.h> +#include <libgen.h> +#include <limits.h> +#include <linux/fs.h> +#include <linux/nvme_ioctl.h> +#include <stdlib.h> #include <sys/ioctl.h> -#include <sys/stat.h> -#include <sys/types.h> +#include <sys/stat.h> +#include <sys/types.h> //#include <linux/hdreg.h> #define HDIO_GET_WCACHE 0x030e /* get write cache mode on|off */ #define HDIO_SET_WCACHE 0x032b /* change write cache enable-disable */ #endif -#include <regex> - +#include <regex> + namespace NKikimr { namespace NPDisk { #ifndef _linux_ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Compatibility version -EWriteCacheResult FlushWriteCache(FHANDLE file, const TString &path, TStringStream *outDetails) { +EWriteCacheResult FlushWriteCache(FHANDLE file, const TString &path, TStringStream *outDetails) { Y_UNUSED(file); if (outDetails) { (*outDetails) << "FlushWriteCache is not implemented, path# \"" << path << "\""; } - return WriteCacheResultErrorPersistent; + return WriteCacheResultErrorPersistent; +} + +std::optional<TDriveData> GetDriveData(const TString &path, TStringStream *outDetails) { + if (outDetails) { + (*outDetails) << "GetDriveData is not implemented, path# \"" << path << "\""; + } + return std::nullopt; } -std::optional<TDriveData> GetDriveData(const TString &path, TStringStream *outDetails) { - if (outDetails) { - (*outDetails) << "GetDriveData is not implemented, path# \"" << path << "\""; - } - return std::nullopt; -} - -EWriteCacheResult GetWriteCache(FHANDLE file, const TString &path, TDriveData *outDriveData, TStringStream *outDetails) { +EWriteCacheResult GetWriteCache(FHANDLE file, const TString &path, TDriveData *outDriveData, TStringStream *outDetails) { Y_VERIFY(outDriveData); Y_UNUSED(file); if (outDetails) { (*outDetails) << "GetWriteCache is not implemented, path# \"" << path << "\""; } - *outDriveData = TDriveData{}; - - return WriteCacheResultErrorPersistent; + *outDriveData = TDriveData{}; + + return WriteCacheResultErrorPersistent; } -EWriteCacheResult SetWriteCache(FHANDLE file, const TString &path, bool isEnable, TStringStream *outDetails) { +EWriteCacheResult SetWriteCache(FHANDLE file, const TString &path, bool isEnable, TStringStream *outDetails) { Y_UNUSED(file); if (outDetails) { (*outDetails) << "SetWriteCache is not implemented, path# \"" << path << "\" isEnable# " << isEnable; } - return WriteCacheResultErrorPersistent; + return WriteCacheResultErrorPersistent; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -297,7 +297,7 @@ enum EAtaOperationCode { AOC_ATA_16 = 0x85 }; -EWriteCacheResult AtaPassThrough(FHANDLE file, ui8 command, ui8 features, ui8 dataCount, ui8 *data, bool preferAta12, +EWriteCacheResult AtaPassThrough(FHANDLE file, ui8 command, ui8 features, ui8 dataCount, ui8 *data, bool preferAta12, ui32 timeoutMs, TStringStream *outDetails) { Y_VERIFY(!dataCount || data); ui32 dataBytes = dataCount * 512; @@ -351,30 +351,30 @@ EWriteCacheResult AtaPassThrough(FHANDLE file, ui8 command, ui8 features, ui8 da ioHdr.sbp = sb.Raw; ioHdr.timeout = timeoutMs; - - if (ioctl(file, SG_IO, &ioHdr) == -1) { + + if (ioctl(file, SG_IO, &ioHdr) == -1) { if (outDetails) { (*outDetails) << "SG_IO not supported (?)"; } - return WriteCacheResultErrorPersistent; + return WriteCacheResultErrorPersistent; } if (ioHdr.status && ioHdr.status != SG_CHECK_CONDITION) { if (outDetails) { (*outDetails) << "SG_IO: bad status# " << (ui32)ioHdr.status; } - return WriteCacheResultErrorTemporary; + return WriteCacheResultErrorTemporary; } if (ioHdr.host_status) { if (outDetails) { (*outDetails) << "SG_IO: bad host status# " << (ui32)ioHdr.host_status; } - return WriteCacheResultErrorTemporary; + return WriteCacheResultErrorTemporary; } if (ioHdr.driver_status && (ioHdr.driver_status != SG_DRIVER_SENSE)) { if (outDetails) { (*outDetails) << "SG_IO: bad driver_status# " << (ui32)ioHdr.driver_status; } - return WriteCacheResultErrorPersistent; + return WriteCacheResultErrorPersistent; } if (sb.Desc.Status.ErrorCheckCondition || sb.Desc.Status.DataRequest) { if (ioHdr.driver_status != SG_DRIVER_SENSE) { @@ -383,7 +383,7 @@ EWriteCacheResult AtaPassThrough(FHANDLE file, ui8 command, ui8 features, ui8 da if (outDetails) { (*outDetails) << "SG_IO: questionable sense data, results may be incorrect."; } - return WriteCacheResultErrorPersistent; + return WriteCacheResultErrorPersistent; } } else if (sb.Raw[0] != 0x72 || sb.Raw[7] < 14 || sb.Desc.DescriptorCode != 0x09 || sb.Desc.AdditionalDescriptorLength < 0x0c) { @@ -395,9 +395,9 @@ EWriteCacheResult AtaPassThrough(FHANDLE file, ui8 command, ui8 features, ui8 da (*outDetails) << "I/O error, command# " << (ui32)command << " status# " << (ui32)sb.Desc.Status.Raw << "error# " << (ui32)sb.Desc.Error; } - return WriteCacheResultErrorTemporary; + return WriteCacheResultErrorTemporary; } - return WriteCacheResultOk; + return WriteCacheResultOk; } enum EThreeValuedLogic { @@ -436,15 +436,15 @@ struct TIdentifyData { TString GetString(ui32 offsetWords, ui32 sizeBytes) const { Y_VERIFY(Id); TString string; - string.resize(sizeBytes); - char *dst = string.Detach(); + string.resize(sizeBytes); + char *dst = string.Detach(); char *src = reinterpret_cast<char*>(&Id[offsetWords]); Y_VERIFY((sizeBytes & 1) == 0); for (ui32 i = 0; i < sizeBytes; i += 2) { dst[i + 0] = src[i + 1]; dst[i + 1] = src[i + 0]; } - return StripString(string); + return StripString(string); } TString GetSerialNumber() const { @@ -459,17 +459,17 @@ struct TIdentifyData { return GetString(27, 40); } - TPDiskCategory::EDeviceType GetDeviceType() const { - // "nominal media rotation rate" of HDD devices, equals to 1 for SSD - if (Id[217] > 0x401) { - return TPDiskCategory::DEVICE_TYPE_ROT; - } else if (Id[217] == 1) { - return TPDiskCategory::DEVICE_TYPE_SSD; - } else { - return TPDiskCategory::DEVICE_TYPE_UNKNOWN; - } - } - + TPDiskCategory::EDeviceType GetDeviceType() const { + // "nominal media rotation rate" of HDD devices, equals to 1 for SSD + if (Id[217] > 0x401) { + return TPDiskCategory::DEVICE_TYPE_ROT; + } else if (Id[217] == 1) { + return TPDiskCategory::DEVICE_TYPE_SSD; + } else { + return TPDiskCategory::DEVICE_TYPE_UNKNOWN; + } + } + EThreeValuedLogic Is3WriteCacheSuppored() const { if (IsKnowable(83)) { return ((Id[82] & (1 << 5)) ? TVL_TRUE : TVL_FALSE); @@ -506,36 +506,36 @@ struct TIdentifyData { } } - EWriteCacheResult Gather(FHANDLE file, TStringStream *outDetails) { + EWriteCacheResult Gather(FHANDLE file, TStringStream *outDetails) { if (IsGathered) { - return WriteCacheResultOk; + return WriteCacheResultOk; } - EWriteCacheResult res1 = AtaPassThrough(file, ATA_OP_IDENTIFY, 0, 1, Data, true, 60000, outDetails); - if (res1 != WriteCacheResultOk) { + EWriteCacheResult res1 = AtaPassThrough(file, ATA_OP_IDENTIFY, 0, 1, Data, true, 60000, outDetails); + if (res1 != WriteCacheResultOk) { IsAta12 = false; - EWriteCacheResult res2 = AtaPassThrough(file, ATA_OP_PIDENTIFY, 0, 1, Data, false, 60000, outDetails); - if (res2 != WriteCacheResultOk) { + EWriteCacheResult res2 = AtaPassThrough(file, ATA_OP_PIDENTIFY, 0, 1, Data, false, 60000, outDetails); + if (res2 != WriteCacheResultOk) { if (outDetails) { (*outDetails) << "GetIdentifyData failed both ATA_OP_IDENTIFY and ATA_OP_PIDENTIFY."; } - if (res1 == WriteCacheResultErrorPersistent && res2 == WriteCacheResultErrorPersistent) { - return WriteCacheResultErrorPersistent; + if (res1 == WriteCacheResultErrorPersistent && res2 == WriteCacheResultErrorPersistent) { + return WriteCacheResultErrorPersistent; } else { - return WriteCacheResultErrorTemporary; + return WriteCacheResultErrorTemporary; } } } ToHostByteOrder(); IsGathered = true; Id = (ui16*)(void*)Data; - return WriteCacheResultOk; + return WriteCacheResultOk; } }; -EWriteCacheResult FlushWriteCache(FHANDLE file, const TString &path, TStringStream *outDetails) { +EWriteCacheResult FlushWriteCache(FHANDLE file, const TString &path, TStringStream *outDetails) { TIdentifyData identify; - EWriteCacheResult res = identify.Gather(file, outDetails); - if (res != WriteCacheResultOk) { + EWriteCacheResult res = identify.Gather(file, outDetails); + if (res != WriteCacheResultOk) { if (outDetails) { (*outDetails) << "FlushWriteCache failed, path# \"" << path << "\""; } @@ -545,7 +545,7 @@ EWriteCacheResult FlushWriteCache(FHANDLE file, const TString &path, TStringStre bool useExt = (identify.Is3FlushCacheExtSuppored() == TVL_TRUE); res = AtaPassThrough(file, useExt ? ATA_OP_FLUSHCACHE_EXT : ATA_OP_FLUSHCACHE, 0, 0, nullptr, useExt, 60000, outDetails); - if (res != WriteCacheResultOk) { + if (res != WriteCacheResultOk) { if (outDetails) { (*outDetails) << "FlushWriteCache failed, path# \"" << path << "\""; (*outDetails) << " op# " << (useExt ? "ATA_OP_FLUSHCACHE_EXT" : "ATA_OP_FLUSHCACHE"); @@ -553,147 +553,147 @@ EWriteCacheResult FlushWriteCache(FHANDLE file, const TString &path, TStringStre } return res; } - return WriteCacheResultOk; + return WriteCacheResultOk; } -//////////////////////////////////////////////////////////////////////////////// -// NVMe admin command -//////////////////////////////////////////////////////////////////////////////// - -static constexpr ui64 NVME_ADMIN_IDENTIFY_OPCODE = 0x06; -static constexpr ui64 NVME_IDENTIFY_DATA_SIZE = 4096; - -static constexpr ui64 NVME_ID_MODEL_NUMBER_OFFSET = 4; -static constexpr ui64 NVME_ID_MODEL_NUMBER_SIZE = 20; -static constexpr ui64 NVME_ID_SERIAL_NUMBER_OFFSET = 24; -static constexpr ui64 NVME_ID_SERIAL_NUMBER_SIZE = 40; -static constexpr ui64 NVME_ID_FIRMWARE_REVISION_OFFSET = 64; -static constexpr ui64 NVME_ID_FIRMWARE_REVISION_SIZE = 8; - -static TArrayHolder<char> GetNvmeIdentifyStruct(int fd, TStringStream *outDetails) -{ +//////////////////////////////////////////////////////////////////////////////// +// NVMe admin command +//////////////////////////////////////////////////////////////////////////////// + +static constexpr ui64 NVME_ADMIN_IDENTIFY_OPCODE = 0x06; +static constexpr ui64 NVME_IDENTIFY_DATA_SIZE = 4096; + +static constexpr ui64 NVME_ID_MODEL_NUMBER_OFFSET = 4; +static constexpr ui64 NVME_ID_MODEL_NUMBER_SIZE = 20; +static constexpr ui64 NVME_ID_SERIAL_NUMBER_OFFSET = 24; +static constexpr ui64 NVME_ID_SERIAL_NUMBER_SIZE = 40; +static constexpr ui64 NVME_ID_FIRMWARE_REVISION_OFFSET = 64; +static constexpr ui64 NVME_ID_FIRMWARE_REVISION_SIZE = 8; + +static TArrayHolder<char> GetNvmeIdentifyStruct(int fd, TStringStream *outDetails) +{ TArrayHolder<char> id_ctrl_buffer{new char[NVME_IDENTIFY_DATA_SIZE]}; - memset(id_ctrl_buffer.Get(), 0, NVME_IDENTIFY_DATA_SIZE); - struct nvme_admin_cmd cmd; - memset(&cmd, 0, sizeof(struct nvme_admin_cmd)); - - cmd.opcode = NVME_ADMIN_IDENTIFY_OPCODE; - cmd.addr = reinterpret_cast<__u64>(id_ctrl_buffer.Get()); - cmd.data_len = NVME_IDENTIFY_DATA_SIZE; - cmd.nsid = 0; - // bits 31:16 -- Controller Identifier (CNTID) - // bits 15:08 -- Reserved - // bits 07:00 -- Controller or Namespace Structure (CNS). - cmd.cdw10 = 0x01; // CNS == 01h -- Identify Controller data structure for the controller processing the command. - // bits 31:16 -- Reserved - // bits 15:00 -- NVM Set Identifier (NVMSETID). This field is used for Identify operations with a CNS value of 04h - cmd.cdw11 = 0; - - if (ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd) == 0) { - return id_ctrl_buffer; - } else { - if (outDetails) { - *outDetails << "error in NVME_IOCTL_ADMIN_CMD ioctl, errno# " << errno << " strerror# " << strerror(errno); - } - return nullptr; - } -} - -static TString RenderCharField(const char *s, size_t n) { - TString str(s, n); - if (size_t end = str.find('\0'); end != TString::npos) { - str.resize(end); - } - return StripString(str); -} - -static std::optional<TDriveData> GetNvmeDriveData(int fd, TStringStream *outDetails) { - if (TArrayHolder<char> id_data = GetNvmeIdentifyStruct(fd, outDetails)) { - TDriveData data; - data.IsWriteCacheValid = false; - const char *id = id_data.Get(); - data.SerialNumber = RenderCharField(id + NVME_ID_SERIAL_NUMBER_OFFSET, NVME_ID_SERIAL_NUMBER_SIZE); - data.ModelNumber = RenderCharField(id + NVME_ID_MODEL_NUMBER_OFFSET, NVME_ID_MODEL_NUMBER_SIZE); - data.FirmwareRevision = RenderCharField(id + NVME_ID_FIRMWARE_REVISION_OFFSET, NVME_ID_FIRMWARE_REVISION_SIZE); - data.DeviceType = TPDiskCategory::DEVICE_TYPE_NVME; - return data; - } else { - return std::nullopt; - } -} - -static std::optional<TDriveData> GetSysfsDriveData(const TString &path, TStringStream *outDetails) { - char realPath[PATH_MAX]; - char *res = realpath(path.Data(), realPath); - if (res == NULL) { - if (errno == ENOENT) { - ythrow TFileError() << "no such file# " << path; - } - *outDetails << "erron in realpath(), details# " << strerror(errno); - return std::nullopt; - } - std::regex name_regex(R"__(nvme\d+n\d+)__"); - std::cmatch match; - if (!std::regex_search(realPath, match, name_regex)) { - *outDetails << "regex_search failed, realPath# " << realPath; - return std::nullopt; - } - if (match.size() != 1) { - *outDetails << "regex_match size not equals to 1, realPath# " << realPath; - return std::nullopt; - } - TString nvme_sysfs = TStringBuilder() << "/sys/block/" << match[0].str() << "/"; - - auto readField = [&] (TStringBuf subpath) { - return StripString(TFileInput(nvme_sysfs + subpath).ReadAll()); - }; - - try { - TDriveData data; - data.IsWriteCacheValid = false; - data.SerialNumber = readField("device/serial"); - data.ModelNumber = readField("device/model"); - data.FirmwareRevision = readField("device/firmware_rev"); - data.DeviceType = TPDiskCategory::DEVICE_TYPE_NVME; - return data; - } catch (const TFileError& err) { - *outDetails << "can't open sysfs files, caught TFileError, what# " << err.what(); - return std::nullopt; - } -} - -std::optional<TDriveData> GetDriveData(const TString &path, TStringStream *outDetails) { - try { - TFile f(path, OpenExisting | RdOnly); - TDriveData data; - EWriteCacheResult res = GetWriteCache(f.GetHandle(), path, &data, outDetails); - if (res == EWriteCacheResult::WriteCacheResultOk) { - data.Path = path; - return data; - } - *outDetails << "; "; - if (std::optional<TDriveData> nvmeData = GetSysfsDriveData(path, outDetails)) { - nvmeData->Path = path; - return nvmeData; - } - *outDetails << "; "; - if (std::optional<TDriveData> nvmeData = GetNvmeDriveData(f.GetHandle(), outDetails)) { - nvmeData->Path = path; - return nvmeData; - } - return std::nullopt; - } catch (const TFileError& err) { - *outDetails << "caught TFileError, what# " << err.what(); - return std::nullopt; - } -} - -EWriteCacheResult GetWriteCache(FHANDLE file, const TString &path, TDriveData *outDriveData, + memset(id_ctrl_buffer.Get(), 0, NVME_IDENTIFY_DATA_SIZE); + struct nvme_admin_cmd cmd; + memset(&cmd, 0, sizeof(struct nvme_admin_cmd)); + + cmd.opcode = NVME_ADMIN_IDENTIFY_OPCODE; + cmd.addr = reinterpret_cast<__u64>(id_ctrl_buffer.Get()); + cmd.data_len = NVME_IDENTIFY_DATA_SIZE; + cmd.nsid = 0; + // bits 31:16 -- Controller Identifier (CNTID) + // bits 15:08 -- Reserved + // bits 07:00 -- Controller or Namespace Structure (CNS). + cmd.cdw10 = 0x01; // CNS == 01h -- Identify Controller data structure for the controller processing the command. + // bits 31:16 -- Reserved + // bits 15:00 -- NVM Set Identifier (NVMSETID). This field is used for Identify operations with a CNS value of 04h + cmd.cdw11 = 0; + + if (ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd) == 0) { + return id_ctrl_buffer; + } else { + if (outDetails) { + *outDetails << "error in NVME_IOCTL_ADMIN_CMD ioctl, errno# " << errno << " strerror# " << strerror(errno); + } + return nullptr; + } +} + +static TString RenderCharField(const char *s, size_t n) { + TString str(s, n); + if (size_t end = str.find('\0'); end != TString::npos) { + str.resize(end); + } + return StripString(str); +} + +static std::optional<TDriveData> GetNvmeDriveData(int fd, TStringStream *outDetails) { + if (TArrayHolder<char> id_data = GetNvmeIdentifyStruct(fd, outDetails)) { + TDriveData data; + data.IsWriteCacheValid = false; + const char *id = id_data.Get(); + data.SerialNumber = RenderCharField(id + NVME_ID_SERIAL_NUMBER_OFFSET, NVME_ID_SERIAL_NUMBER_SIZE); + data.ModelNumber = RenderCharField(id + NVME_ID_MODEL_NUMBER_OFFSET, NVME_ID_MODEL_NUMBER_SIZE); + data.FirmwareRevision = RenderCharField(id + NVME_ID_FIRMWARE_REVISION_OFFSET, NVME_ID_FIRMWARE_REVISION_SIZE); + data.DeviceType = TPDiskCategory::DEVICE_TYPE_NVME; + return data; + } else { + return std::nullopt; + } +} + +static std::optional<TDriveData> GetSysfsDriveData(const TString &path, TStringStream *outDetails) { + char realPath[PATH_MAX]; + char *res = realpath(path.Data(), realPath); + if (res == NULL) { + if (errno == ENOENT) { + ythrow TFileError() << "no such file# " << path; + } + *outDetails << "erron in realpath(), details# " << strerror(errno); + return std::nullopt; + } + std::regex name_regex(R"__(nvme\d+n\d+)__"); + std::cmatch match; + if (!std::regex_search(realPath, match, name_regex)) { + *outDetails << "regex_search failed, realPath# " << realPath; + return std::nullopt; + } + if (match.size() != 1) { + *outDetails << "regex_match size not equals to 1, realPath# " << realPath; + return std::nullopt; + } + TString nvme_sysfs = TStringBuilder() << "/sys/block/" << match[0].str() << "/"; + + auto readField = [&] (TStringBuf subpath) { + return StripString(TFileInput(nvme_sysfs + subpath).ReadAll()); + }; + + try { + TDriveData data; + data.IsWriteCacheValid = false; + data.SerialNumber = readField("device/serial"); + data.ModelNumber = readField("device/model"); + data.FirmwareRevision = readField("device/firmware_rev"); + data.DeviceType = TPDiskCategory::DEVICE_TYPE_NVME; + return data; + } catch (const TFileError& err) { + *outDetails << "can't open sysfs files, caught TFileError, what# " << err.what(); + return std::nullopt; + } +} + +std::optional<TDriveData> GetDriveData(const TString &path, TStringStream *outDetails) { + try { + TFile f(path, OpenExisting | RdOnly); + TDriveData data; + EWriteCacheResult res = GetWriteCache(f.GetHandle(), path, &data, outDetails); + if (res == EWriteCacheResult::WriteCacheResultOk) { + data.Path = path; + return data; + } + *outDetails << "; "; + if (std::optional<TDriveData> nvmeData = GetSysfsDriveData(path, outDetails)) { + nvmeData->Path = path; + return nvmeData; + } + *outDetails << "; "; + if (std::optional<TDriveData> nvmeData = GetNvmeDriveData(f.GetHandle(), outDetails)) { + nvmeData->Path = path; + return nvmeData; + } + return std::nullopt; + } catch (const TFileError& err) { + *outDetails << "caught TFileError, what# " << err.what(); + return std::nullopt; + } +} + +EWriteCacheResult GetWriteCache(FHANDLE file, const TString &path, TDriveData *outDriveData, TStringStream *outDetails) { Y_VERIFY(outDriveData); TIdentifyData identify; - EWriteCacheResult res = identify.Gather(file, outDetails); - if (res != WriteCacheResultOk) { + EWriteCacheResult res = identify.Gather(file, outDetails); + if (res != WriteCacheResultOk) { if (outDetails) { (*outDetails) << "GetWriteCache failed, path# \"" << path << "\""; } @@ -704,26 +704,26 @@ EWriteCacheResult GetWriteCache(FHANDLE file, const TString &path, TDriveData *o if (wcache == TVL_UNKNOWABLE) { (*outDetails) << "GetWriteCache failed, write cache state is unknowable, path# \"" << path << "\""; - return WriteCacheResultErrorPersistent; + return WriteCacheResultErrorPersistent; } - outDriveData->Path = path; + outDriveData->Path = path; outDriveData->IsWriteCacheValid = true; outDriveData->IsWriteCacheEnabled = (wcache == TVL_TRUE); outDriveData->SerialNumber = identify.GetSerialNumber(); outDriveData->FirmwareRevision = identify.GetFirmwareRevision(); outDriveData->ModelNumber = identify.GetModelNumber(); - outDriveData->DeviceType = identify.GetDeviceType(); - return WriteCacheResultOk; + outDriveData->DeviceType = identify.GetDeviceType(); + return WriteCacheResultOk; } -EWriteCacheResult SetWriteCache(FHANDLE file, const TString &path, bool isEnable, TStringStream *outDetails) { +EWriteCacheResult SetWriteCache(FHANDLE file, const TString &path, bool isEnable, TStringStream *outDetails) { ui8 features = (isEnable ? 0x02 : 0x82); // 0x02 and 0x82 are magic numbers from ATA specificaton 6.49.8 - EWriteCacheResult res = AtaPassThrough(file, ATA_OP_SETFEATURES, features, 0, nullptr, false, 60000, outDetails); - if (res != WriteCacheResultOk) { + EWriteCacheResult res = AtaPassThrough(file, ATA_OP_SETFEATURES, features, 0, nullptr, false, 60000, outDetails); + if (res != WriteCacheResultOk) { (*outDetails) << "SetWriteCache failed, path# \"" << path << "\" isEnable# " << isEnable; return res; } - return WriteCacheResultOk; + return WriteCacheResultOk; } #endif diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_wcache.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_wcache.h index f9a7922624..86129e68f2 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_wcache.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_wcache.h @@ -1,36 +1,36 @@ #pragma once #include "blobstorage_pdisk_drivedata.h" -#include <util/system/file.h> +#include <util/system/file.h> + +#include <optional> -#include <optional> - namespace NKikimr { namespace NPDisk { -// FHANDLE file is used for the operation. +// FHANDLE file is used for the operation. // const TString &path is used for debugging and messages in outDetails. // TStringSteram *outDetails can be nullptr or a pointer to a TStringStream that will receive error details. // Return value is true in case of success. //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -enum EWriteCacheResult { - WriteCacheResultOk = 0, - WriteCacheResultErrorTemporary = 1, - WriteCacheResultErrorPersistent = 2 +enum EWriteCacheResult { + WriteCacheResultOk = 0, + WriteCacheResultErrorTemporary = 1, + WriteCacheResultErrorPersistent = 2 }; -EWriteCacheResult FlushWriteCache(FHANDLE file, const TString &path, TStringStream *outDetails); +EWriteCacheResult FlushWriteCache(FHANDLE file, const TString &path, TStringStream *outDetails); // outIsEnabled must be a valid bool pointer. -EWriteCacheResult GetWriteCache(FHANDLE file, const TString &path, TDriveData *outDriveData, +EWriteCacheResult GetWriteCache(FHANDLE file, const TString &path, TDriveData *outDriveData, TStringStream *outDetails); -std::optional<TDriveData> GetDriveData(const TString &path, TStringStream *outDetails); - - +std::optional<TDriveData> GetDriveData(const TString &path, TStringStream *outDetails); + + // Attention! You should flush the cache before disabling it. -EWriteCacheResult SetWriteCache(FHANDLE file, const TString &path, bool isEnable, TStringStream *outDetails); +EWriteCacheResult SetWriteCache(FHANDLE file, const TString &path, bool isEnable, TStringStream *outDetails); } // NPDisk } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_writer.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_writer.cpp index 85b2ae417d..b638c55ed8 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_writer.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_writer.cpp @@ -11,7 +11,7 @@ namespace NPDisk { //////////////////////////////////////////////////////////////////////////// void TBufferedWriter::WriteBufferWithFlush(TReqId reqId, NWilson::TTraceId *traceId, - TCompletionAction *flushAction, ui32 chunkIdx) { + TCompletionAction *flushAction, ui32 chunkIdx) { static NWilson::TTraceId noTrace; if (DirtyFrom != DirtyTo) { ui8 *source = CurrentBuffer->Data() + DirtyFrom - StartOffset; @@ -25,9 +25,9 @@ void TBufferedWriter::WriteBufferWithFlush(TReqId reqId, NWilson::TTraceId *trac CurrentSector = CurrentBuffer->Data(); StartOffset = DirtyTo; DirtyFrom = DirtyTo; - } else if (flushAction) { + } else if (flushAction) { flushAction->CostNs = 1; - BlockDevice.FlushAsync(flushAction, reqId); + BlockDevice.FlushAsync(flushAction, reqId); } } @@ -42,7 +42,7 @@ TBufferedWriter::TBufferedWriter(ui64 sectorSize, IBlockDevice &blockDevice, TDi , DirtyTo(0) , CurrentSector(nullptr) , Pool(pool) - , CurrentBuffer(Pool->Pop()) + , CurrentBuffer(Pool->Pop()) , ActorSystem(actorSystem) , LastReqId(TReqId::InitialTSectorWriterReqId, 0) , DriveModel(driveModel) @@ -50,7 +50,7 @@ TBufferedWriter::TBufferedWriter(ui64 sectorSize, IBlockDevice &blockDevice, TDi } void TBufferedWriter::SetupWithBuffer(ui64 startOffset, ui64 currentOffset, TBuffer *buffer, ui32 count, TReqId reqId) { - CurrentBuffer.Reset(buffer); + CurrentBuffer.Reset(buffer); CurrentSector = CurrentBuffer->Data() + (currentOffset - startOffset); StartOffset = startOffset; @@ -88,7 +88,7 @@ ui8* TBufferedWriter::RawData() const { } void TBufferedWriter::Flush(TReqId reqId, NWilson::TTraceId *traceId, - TCompletionAction *flushAction, ui32 chunkIdx) { + TCompletionAction *flushAction, ui32 chunkIdx) { WriteBufferWithFlush(reqId, traceId, flushAction, chunkIdx); } diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_writer.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_writer.h index 802b256f9a..6b818645dd 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_writer.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_writer.h @@ -37,14 +37,14 @@ protected: ui8* CurrentSector; TBufferPool *Pool; - TBuffer::TPtr CurrentBuffer; + TBuffer::TPtr CurrentBuffer; TActorSystem *ActorSystem; TReqId LastReqId; TDriveModel *DriveModel; void WriteBufferWithFlush(TReqId reqId, NWilson::TTraceId *traceId, - TCompletionAction *flushAction, ui32 chunkIdx); + TCompletionAction *flushAction, ui32 chunkIdx); public: TBufferedWriter(ui64 sectorSize, IBlockDevice &blockDevice, TDiskFormat &format, TBufferPool *pool, TActorSystem *actorSystem, TDriveModel *driveModel); @@ -59,15 +59,15 @@ public: ~TBufferedWriter(); }; -struct TChunkIdxWithInfo { - TChunkIdx Idx; - TLogChunkInfo *Info; -}; - +struct TChunkIdxWithInfo { + TChunkIdx Idx; + TLogChunkInfo *Info; +}; + //////////////////////////////////////////////////////////////////////////// // TSectorWriter //////////////////////////////////////////////////////////////////////////// -template <bool IsLog, bool IsSysLog> +template <bool IsLog, bool IsSysLog> class TSectorWriter { public: TPDiskMon &Mon; @@ -83,26 +83,26 @@ public: ui32 ChunkIdx; ui64 RecordBytesLeft; ui64 DataMagic; - TDeque<TChunkIdxWithInfo> NextChunks; - TLogChunkInfo *LogChunkInfo = nullptr; + TDeque<TChunkIdxWithInfo> NextChunks; + TLogChunkInfo *LogChunkInfo = nullptr; - TPDiskHashCalculator Hash; - TControlWrapper UseT1ha0Hasher; - TPDiskStreamCypher Cypher; + TPDiskHashCalculator Hash; + TControlWrapper UseT1ha0Hasher; + TPDiskStreamCypher Cypher; TActorSystem *ActorSystem; ui32 PDiskId; TDriveModel *DriveModel; - bool OnNewChunk; - + bool OnNewChunk; + bool IsEmptySector() const { return (SectorBytesFree == Format.SectorPayloadSize()); } - TSectorWriter(TPDiskMon &mon, IBlockDevice &blockDevice, TDiskFormat &format, ui64 &nonce, - const TKey &key, TBufferPool *pool, ui64 firstSectorIdx, ui64 endSectorIdx, ui64 dataMagic, ui32 chunkIdx, - TLogChunkInfo *logChunkInfo, ui64 sectorIdx, TBuffer *buffer, TActorSystem *actorSystem, ui32 pDiskId, - TDriveModel *driveModel, const TControlWrapper& useT1ha0Hasher, bool enableEncrytion) + TSectorWriter(TPDiskMon &mon, IBlockDevice &blockDevice, TDiskFormat &format, ui64 &nonce, + const TKey &key, TBufferPool *pool, ui64 firstSectorIdx, ui64 endSectorIdx, ui64 dataMagic, ui32 chunkIdx, + TLogChunkInfo *logChunkInfo, ui64 sectorIdx, TBuffer *buffer, TActorSystem *actorSystem, ui32 pDiskId, + TDriveModel *driveModel, const TControlWrapper& useT1ha0Hasher, bool enableEncrytion) : Mon(mon) , BlockDevice(blockDevice) , Format(format) @@ -115,16 +115,16 @@ public: , ChunkIdx(chunkIdx) , RecordBytesLeft(0) , DataMagic(dataMagic) - , LogChunkInfo(logChunkInfo) - , Hash(useT1ha0Hasher) - , UseT1ha0Hasher(useT1ha0Hasher) - , Cypher(enableEncrytion) + , LogChunkInfo(logChunkInfo) + , Hash(useT1ha0Hasher) + , UseT1ha0Hasher(useT1ha0Hasher) + , Cypher(enableEncrytion) , ActorSystem(actorSystem) , PDiskId(pDiskId) , DriveModel(driveModel) - , OnNewChunk(true) + , OnNewChunk(true) { - Y_VERIFY(!LogChunkInfo || LogChunkInfo->ChunkIdx == ChunkIdx); + Y_VERIFY(!LogChunkInfo || LogChunkInfo->ChunkIdx == ChunkIdx); BufferedWriter.Reset(new TBufferedWriter(Format.SectorSize, BlockDevice, Format, pool, actorSystem, DriveModel)); @@ -134,47 +134,47 @@ public: ui64 sectorOffset = Format.Offset(ChunkIdx, SectorIdx); if (buffer) { Y_VERIFY(IsLog); - Y_VERIFY(!IsSysLog); + Y_VERIFY(!IsSysLog); ui64 startOffset = Format.Offset(ChunkIdx, SectorIdx); BufferedWriter->SetupWithBuffer(startOffset, sectorOffset, buffer, 1, TReqId(TReqId::CreateTSectorWriterWithBuffer, 0)); } else { - BufferedWriter->Seek(sectorOffset, IsSysLog ? ReplicationFactor : 1, + BufferedWriter->Seek(sectorOffset, IsSysLog ? ReplicationFactor : 1, IsSysLog ? ReplicationFactor : 1, TReqId(TReqId::CreateTSectorWriterSeek, 0), nullptr, ChunkIdx); } - if (SectorIdx == 0) { - if (LogChunkInfo) { - LogChunkInfo->FirstNonce = Nonce; - } - } - - if (ActorSystem) { - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() << " is created at " - << " chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx + if (SectorIdx == 0) { + if (LogChunkInfo) { + LogChunkInfo->FirstNonce = Nonce; + } + } + + if (ActorSystem) { + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() << " is created at " + << " chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx << (buffer ? " WithBuffer" : " NoBuffer")); } - } - - const TString SelfInfo() { - TStringStream ss; - ss << "PDiskId# " << PDiskId - << " TSectorWriter<" - << (IsLog ? "Log" : "!Log") << "," - << (IsSysLog ? "SysLog" : "!SysLog") - << "> "; - return ss.Str(); } - void WriteNextChunkReference(TChunkIdx nextChunk, ui64 nextChunkNonce, TCompletionAction *action, - TReqId reqId, NWilson::TTraceId *traceId) { + const TString SelfInfo() { + TStringStream ss; + ss << "PDiskId# " << PDiskId + << " TSectorWriter<" + << (IsLog ? "Log" : "!Log") << "," + << (IsSysLog ? "SysLog" : "!SysLog") + << "> "; + return ss.Str(); + } + + void WriteNextChunkReference(TChunkIdx nextChunk, ui64 nextChunkNonce, TCompletionAction *action, + TReqId reqId, NWilson::TTraceId *traceId) { - if (ActorSystem) { - LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " WriteNextChunkReference, currentChunkIdx# " << ChunkIdx - << " nextChunkIdx# " << nextChunk << " Nonce# " << Nonce); - } + if (ActorSystem) { + LOG_INFO_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " WriteNextChunkReference, currentChunkIdx# " << ChunkIdx + << " nextChunkIdx# " << nextChunk << " Nonce# " << Nonce); + } ui64 sectorOffset = Format.Offset(ChunkIdx, SectorIdx); ui8* sector = BufferedWriter->Seek(sectorOffset, ReplicationFactor, ReplicationFactor, reqId, traceId, @@ -185,13 +185,13 @@ public: nextLogChunkReference->Version = PDISK_DATA_VERSION_3; nextLogChunkReference->NextChunk = nextChunk; nextLogChunkReference->CreatedAt = TInstant::Now(); - // zero in typical case, non-zero only in case of log splicing + // zero in typical case, non-zero only in case of log splicing nextLogChunkReference->NextChunkFirstNonce = nextChunkNonce; nextLogChunkReference->IsNotCompatible = 0; - + memcpy(sector + Format.SectorSize - CanarySize - sizeof(TDataSectorFooter), &Canary, CanarySize); Cypher.InplaceEncrypt(sector, Format.SectorSize - (ui32)sizeof(TDataSectorFooter)); - + PrepareDataSectorFooter(sector, Format.MagicNextLogChunkReference, sectorOffset); for (ui32 replica = 1; replica < ReplicationFactor; ++replica) { @@ -200,36 +200,36 @@ public: BufferedWriter->MarkDirty(); SectorIdx += ReplicationFactor; - if (!IsSysLog) { + if (!IsSysLog) { *Mon.BandwidthPLogChunkFooter += Format.SectorSize * ReplicationFactor; *Mon.BandwidthPLogChunkPadding += Format.ChunkSize - SectorIdx * Format.SectorSize; } BufferedWriter->Flush(reqId, traceId, action, ChunkIdx); - } - - void SwitchToNewChunk(TReqId reqId, NWilson::TTraceId *traceId) { - // Allocate next log chunk, write next log chunk pointer sectors, switch to that log chunk. - Y_VERIFY(IsLog); - Y_VERIFY(!NextChunks.empty()); - ui32 nextChunk = NextChunks.front().Idx; - TLogChunkInfo *nextLogChunkInfo = NextChunks.front().Info; - NextChunks.pop_front(); - - WriteNextChunkReference(nextChunk, 0, nullptr, reqId, traceId); - - // Start working with new chunk - OnNewChunk = true; + } + + void SwitchToNewChunk(TReqId reqId, NWilson::TTraceId *traceId) { + // Allocate next log chunk, write next log chunk pointer sectors, switch to that log chunk. + Y_VERIFY(IsLog); + Y_VERIFY(!NextChunks.empty()); + ui32 nextChunk = NextChunks.front().Idx; + TLogChunkInfo *nextLogChunkInfo = NextChunks.front().Info; + NextChunks.pop_front(); + + WriteNextChunkReference(nextChunk, 0, nullptr, reqId, traceId); + + // Start working with new chunk + OnNewChunk = true; ChunkIdx = nextChunk; - LogChunkInfo = nextLogChunkInfo; - LogChunkInfo->FirstNonce = Nonce; - LogChunkInfo->PrevChunkLastNonce = Nonce - 1; + LogChunkInfo = nextLogChunkInfo; + LogChunkInfo->FirstNonce = Nonce; + LogChunkInfo->PrevChunkLastNonce = Nonce - 1; SectorIdx = 0; FirstSectorIdx = 0; - ui64 sectorOffset = Format.Offset(ChunkIdx, SectorIdx); + ui64 sectorOffset = Format.Offset(ChunkIdx, SectorIdx); ui32 seekCount = IsSysLog ? ReplicationFactor : 1; - BufferedWriter->Seek(sectorOffset, IsSysLog ? ReplicationFactor : 1, seekCount, reqId, traceId, + BufferedWriter->Seek(sectorOffset, IsSysLog ? ReplicationFactor : 1, seekCount, reqId, traceId, ChunkIdx); } @@ -239,42 +239,42 @@ public: } void NextSector(const ui64 dataMagic, TReqId reqId, NWilson::TTraceId *traceId) { - if (OnNewChunk) { - OnNewChunk = false; - if (ActorSystem) { - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " NextSector on new chunk, currentChunkIdx# " << ChunkIdx - << " SectorIdx# " << SectorIdx << " Nonce# " << Nonce); - } - } + if (OnNewChunk) { + OnNewChunk = false; + if (ActorSystem) { + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " NextSector on new chunk, currentChunkIdx# " << ChunkIdx + << " SectorIdx# " << SectorIdx << " Nonce# " << Nonce); + } + } ui32 reserve = 1; - if (IsSysLog) { - const ui64 sectorOffset = Format.Offset(ChunkIdx, SectorIdx); + if (IsSysLog) { + const ui64 sectorOffset = Format.Offset(ChunkIdx, SectorIdx); for (ui32 replica = 1; replica < ReplicationFactor; ++replica) { ui8 *sectorData = BufferedWriter->Get() + Format.SectorSize * replica; memcpy(sectorData, BufferedWriter->Get(), Format.SectorSize); // Check sector CRC - const ui64 sectorHash = *(ui64*)(void*)(sectorData + Format.SectorSize - sizeof(ui64)); - Y_VERIFY(Hash.CheckSectorHash(sectorOffset, dataMagic, sectorData, Format.SectorSize, sectorHash), - "Sector hash corruption detected!"); + const ui64 sectorHash = *(ui64*)(void*)(sectorData + Format.SectorSize - sizeof(ui64)); + Y_VERIFY(Hash.CheckSectorHash(sectorOffset, dataMagic, sectorData, Format.SectorSize, sectorHash), + "Sector hash corruption detected!"); } BufferedWriter->MarkDirty(); reserve = ReplicationFactor; - *Mon.BandwidthPSysLogErasure += Format.SectorSize * (ReplicationFactor - 1); + *Mon.BandwidthPSysLogErasure += Format.SectorSize * (ReplicationFactor - 1); } - SectorIdx += (IsSysLog ? ReplicationFactor : 1); + SectorIdx += (IsSysLog ? ReplicationFactor : 1); if (SectorIdx < EndSectorIdx) { ui64 sectorOffset = Format.Offset(ChunkIdx, SectorIdx); - BufferedWriter->Seek(sectorOffset, IsSysLog ? ReplicationFactor : 1, reserve, reqId, traceId, + BufferedWriter->Seek(sectorOffset, IsSysLog ? ReplicationFactor : 1, reserve, reqId, traceId, ChunkIdx); return; } - if (IsSysLog) { + if (IsSysLog) { SectorIdx = FirstSectorIdx; ui64 sectorOffset = Format.Offset(ChunkIdx, SectorIdx); - BufferedWriter->Seek(sectorOffset, ReplicationFactor, reserve, reqId, traceId, + BufferedWriter->Seek(sectorOffset, ReplicationFactor, reserve, reqId, traceId, ChunkIdx); return; } @@ -284,26 +284,26 @@ public: } } - bool OnFirstSectorInChunk() const { - return SectorIdx == 0; - } - - bool OnLastSectorInChunk() const { - return SectorIdx + 1 == EndSectorIdx; - } - + bool OnFirstSectorInChunk() const { + return SectorIdx == 0; + } + + bool OnLastSectorInChunk() const { + return SectorIdx + 1 == EndSectorIdx; + } + void PrepareDataSectorFooter(ui8 *sector, ui64 magic, ui64 sectorOffset) { - if (IsLog && LogChunkInfo) { - // Nonce is incremented in next lines, so save it now - LogChunkInfo->LastNonce = Nonce; - } - + if (IsLog && LogChunkInfo) { + // Nonce is incremented in next lines, so save it now + LogChunkInfo->LastNonce = Nonce; + } + TDataSectorFooter §orFooter = *(TDataSectorFooter*)(sector + Format.SectorSize - sizeof(TDataSectorFooter)); sectorFooter.Version = PDISK_DATA_VERSION; sectorFooter.Nonce = Nonce; - Hash.SetUseT1ha0Hasher(UseT1ha0Hasher); - sectorFooter.Hash = Hash.HashSector(sectorOffset, magic, sector, Format.SectorSize); - + Hash.SetUseT1ha0Hasher(UseT1ha0Hasher); + sectorFooter.Hash = Hash.HashSector(sectorOffset, magic, sector, Format.SectorSize); + BufferedWriter->MarkDirty(); ++Nonce; Cypher.StartMessage(Nonce); @@ -313,13 +313,13 @@ public: TParitySectorFooter §orFooter = *(TParitySectorFooter*) (sector + Format.SectorSize - sizeof(TParitySectorFooter)); sectorFooter.Nonce = Nonce; - Hash.SetUseT1ha0Hasher(UseT1ha0Hasher); - sectorFooter.Hash = Hash.HashSector(sectorOffset, magic, sector, Format.SectorSize); - if (!IsLog && ActorSystem) { - LOG_TRACE_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " PrepareParitySectorFooter, sectorOffset#" << sectorOffset - << " Nonce# " << Nonce << " hash# " << sectorFooter.Hash); - } + Hash.SetUseT1ha0Hasher(UseT1ha0Hasher); + sectorFooter.Hash = Hash.HashSector(sectorOffset, magic, sector, Format.SectorSize); + if (!IsLog && ActorSystem) { + LOG_TRACE_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " PrepareParitySectorFooter, sectorOffset#" << sectorOffset + << " Nonce# " << Nonce << " hash# " << sectorFooter.Hash); + } BufferedWriter->MarkDirty(); ++Nonce; Cypher.StartMessage(Nonce); @@ -361,132 +361,132 @@ public: BufferedWriter->Flush(reqId, traceId, flushAction, ChunkIdx); } - void TerminateLog(TReqId reqId, NWilson::TTraceId *traceId) { - Y_VERIFY(IsLog); - if (SectorBytesFree == 0 || SectorBytesFree == Format.SectorPayloadSize()) { - if (ActorSystem) { - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " TerminateLog Sector is full or free " - << " SectorBytesFree# " << SectorBytesFree - << " chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx - << " sectorOffset# " << Format.Offset(ChunkIdx, SectorIdx) - << " Marker# BPD63"); - } - } else if (SectorBytesFree <= sizeof(TFirstLogPageHeader)) { - if (ActorSystem) { - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " TerminateLog small SectorBytesFree# " << SectorBytesFree - << " chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx - << " sectorOffset# " << Format.Offset(ChunkIdx, SectorIdx) - << " Marker# BPD65"); - } - TFirstLogPageHeader terminator(LogPageTerminator, 0, 0, 0, 0, 0); - if (IsSysLog) { - *Mon.BandwidthPSysLogPadding += SectorBytesFree; - } else { - *Mon.BandwidthPLogPadding += SectorBytesFree; - } - RecordBytesLeft += SectorBytesFree; - Write(&terminator, SectorBytesFree, reqId, traceId); - } else { - if (ActorSystem) { - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " TerminateLog large SectorBytesFree# " << SectorBytesFree - << " chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx - << " sectorOffset# " << Format.Offset(ChunkIdx, SectorIdx) - << " Marker# BPD66"); - } - ui32 availableSize = SectorBytesFree - sizeof(TFirstLogPageHeader); - TFirstLogPageHeader header(LogPageTerminator, availableSize, availableSize, 0, 0, 0); - if (IsSysLog) { - *Mon.BandwidthPSysLogPadding += SectorBytesFree; - } else { - *Mon.BandwidthPLogPadding += SectorBytesFree; - } - RecordBytesLeft += sizeof(TFirstLogPageHeader) + availableSize; - Write(&header, sizeof(TFirstLogPageHeader), reqId, traceId); - WriteZeroes(availableSize, reqId, traceId); - } - } - - void LogHeader(TOwner owner, TLogSignature signature, ui64 ownerLsn, ui64 dataSize, TReqId reqId, - NWilson::TTraceId *traceId) { - Y_VERIFY(IsLog); - Y_VERIFY(SectorBytesFree >= sizeof(TFirstLogPageHeader)); - ui64 availableSize = SectorBytesFree - sizeof(TFirstLogPageHeader); - bool isWhole = availableSize >= dataSize; - bool isTornOffHeader = false; - { - ui64 sizeNeeded = sizeof(TFirstLogPageHeader) + dataSize; - ui8 flags = LogPageFirst | (isWhole ? LogPageLast : 0); - ui32 payloadSize = isWhole ? dataSize : availableSize; - TFirstLogPageHeader header(flags, payloadSize, dataSize, owner, signature, ownerLsn); - RecordBytesLeft = sizeNeeded; - isTornOffHeader = (SectorBytesFree == sizeof(TFirstLogPageHeader) && !isWhole); - if (ActorSystem) { - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " LogPageHeader, chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx - << " nonce# " << Nonce << " Marker# BPD60"); - } - Write(&header, sizeof(TFirstLogPageHeader), reqId, traceId); - } - if (isTornOffHeader) { - bool isLast = RecordBytesLeft <= SectorBytesFree - sizeof(TLogPageHeader); - ui8 flags = isLast ? LogPageLast : 0; - ui32 payloadSize = isLast ? RecordBytesLeft : SectorBytesFree - sizeof(TLogPageHeader); - TLogPageHeader header(flags, payloadSize); - RecordBytesLeft += sizeof(TLogPageHeader); - if (IsSysLog) { - *Mon.BandwidthPSysLogRecordHeader += sizeof(TLogPageHeader); - } else { - *Mon.BandwidthPLogRecordHeader += sizeof(TLogPageHeader); - } - if (ActorSystem) { - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() - << " LogPageHeader, chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx - << " nonce# " << Nonce << " Marker# BPD61"); - } - Write(&header, sizeof(TLogPageHeader), reqId, traceId); - } - } - - void LogDataPart(const void* data, ui64 size, TReqId reqId, NWilson::TTraceId *traceId) { - Y_VERIFY(IsLog); - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(data, size); - Y_VERIFY(data); - Y_VERIFY(size > 0); - while (RecordBytesLeft > SectorBytesFree && size >= SectorBytesFree) { - const ui64 bytesToWrite = SectorBytesFree; - Write(data, bytesToWrite, reqId, traceId); - size -= bytesToWrite; - data = (ui8*)data + bytesToWrite; - - bool isLast = (RecordBytesLeft <= SectorBytesFree - sizeof(TLogPageHeader)); - ui8 flags = isLast ? LogPageLast : 0; - ui32 payloadSize = isLast ? RecordBytesLeft : SectorBytesFree - sizeof(TLogPageHeader); - TLogPageHeader header(flags, payloadSize); - RecordBytesLeft += sizeof(TLogPageHeader); - if (IsSysLog) { - *Mon.BandwidthPSysLogRecordHeader += sizeof(TLogPageHeader); - } else { - *Mon.BandwidthPLogRecordHeader += sizeof(TLogPageHeader); - } - if (ActorSystem) { - LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId - << " Line# " << __LINE__ << " LogPageHeader writing" - << " chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx - << " Marker# BPD62"); - } - Write(&header, sizeof(TLogPageHeader), reqId, traceId); - } - Write(data, size, reqId, traceId); - if (RecordBytesLeft == 0 && SectorBytesFree > 0) { - if (SectorBytesFree < sizeof(TFirstLogPageHeader)) { - TerminateLog(reqId, traceId); - } - } - } - + void TerminateLog(TReqId reqId, NWilson::TTraceId *traceId) { + Y_VERIFY(IsLog); + if (SectorBytesFree == 0 || SectorBytesFree == Format.SectorPayloadSize()) { + if (ActorSystem) { + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " TerminateLog Sector is full or free " + << " SectorBytesFree# " << SectorBytesFree + << " chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx + << " sectorOffset# " << Format.Offset(ChunkIdx, SectorIdx) + << " Marker# BPD63"); + } + } else if (SectorBytesFree <= sizeof(TFirstLogPageHeader)) { + if (ActorSystem) { + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " TerminateLog small SectorBytesFree# " << SectorBytesFree + << " chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx + << " sectorOffset# " << Format.Offset(ChunkIdx, SectorIdx) + << " Marker# BPD65"); + } + TFirstLogPageHeader terminator(LogPageTerminator, 0, 0, 0, 0, 0); + if (IsSysLog) { + *Mon.BandwidthPSysLogPadding += SectorBytesFree; + } else { + *Mon.BandwidthPLogPadding += SectorBytesFree; + } + RecordBytesLeft += SectorBytesFree; + Write(&terminator, SectorBytesFree, reqId, traceId); + } else { + if (ActorSystem) { + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " TerminateLog large SectorBytesFree# " << SectorBytesFree + << " chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx + << " sectorOffset# " << Format.Offset(ChunkIdx, SectorIdx) + << " Marker# BPD66"); + } + ui32 availableSize = SectorBytesFree - sizeof(TFirstLogPageHeader); + TFirstLogPageHeader header(LogPageTerminator, availableSize, availableSize, 0, 0, 0); + if (IsSysLog) { + *Mon.BandwidthPSysLogPadding += SectorBytesFree; + } else { + *Mon.BandwidthPLogPadding += SectorBytesFree; + } + RecordBytesLeft += sizeof(TFirstLogPageHeader) + availableSize; + Write(&header, sizeof(TFirstLogPageHeader), reqId, traceId); + WriteZeroes(availableSize, reqId, traceId); + } + } + + void LogHeader(TOwner owner, TLogSignature signature, ui64 ownerLsn, ui64 dataSize, TReqId reqId, + NWilson::TTraceId *traceId) { + Y_VERIFY(IsLog); + Y_VERIFY(SectorBytesFree >= sizeof(TFirstLogPageHeader)); + ui64 availableSize = SectorBytesFree - sizeof(TFirstLogPageHeader); + bool isWhole = availableSize >= dataSize; + bool isTornOffHeader = false; + { + ui64 sizeNeeded = sizeof(TFirstLogPageHeader) + dataSize; + ui8 flags = LogPageFirst | (isWhole ? LogPageLast : 0); + ui32 payloadSize = isWhole ? dataSize : availableSize; + TFirstLogPageHeader header(flags, payloadSize, dataSize, owner, signature, ownerLsn); + RecordBytesLeft = sizeNeeded; + isTornOffHeader = (SectorBytesFree == sizeof(TFirstLogPageHeader) && !isWhole); + if (ActorSystem) { + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " LogPageHeader, chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx + << " nonce# " << Nonce << " Marker# BPD60"); + } + Write(&header, sizeof(TFirstLogPageHeader), reqId, traceId); + } + if (isTornOffHeader) { + bool isLast = RecordBytesLeft <= SectorBytesFree - sizeof(TLogPageHeader); + ui8 flags = isLast ? LogPageLast : 0; + ui32 payloadSize = isLast ? RecordBytesLeft : SectorBytesFree - sizeof(TLogPageHeader); + TLogPageHeader header(flags, payloadSize); + RecordBytesLeft += sizeof(TLogPageHeader); + if (IsSysLog) { + *Mon.BandwidthPSysLogRecordHeader += sizeof(TLogPageHeader); + } else { + *Mon.BandwidthPLogRecordHeader += sizeof(TLogPageHeader); + } + if (ActorSystem) { + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, SelfInfo() + << " LogPageHeader, chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx + << " nonce# " << Nonce << " Marker# BPD61"); + } + Write(&header, sizeof(TLogPageHeader), reqId, traceId); + } + } + + void LogDataPart(const void* data, ui64 size, TReqId reqId, NWilson::TTraceId *traceId) { + Y_VERIFY(IsLog); + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(data, size); + Y_VERIFY(data); + Y_VERIFY(size > 0); + while (RecordBytesLeft > SectorBytesFree && size >= SectorBytesFree) { + const ui64 bytesToWrite = SectorBytesFree; + Write(data, bytesToWrite, reqId, traceId); + size -= bytesToWrite; + data = (ui8*)data + bytesToWrite; + + bool isLast = (RecordBytesLeft <= SectorBytesFree - sizeof(TLogPageHeader)); + ui8 flags = isLast ? LogPageLast : 0; + ui32 payloadSize = isLast ? RecordBytesLeft : SectorBytesFree - sizeof(TLogPageHeader); + TLogPageHeader header(flags, payloadSize); + RecordBytesLeft += sizeof(TLogPageHeader); + if (IsSysLog) { + *Mon.BandwidthPSysLogRecordHeader += sizeof(TLogPageHeader); + } else { + *Mon.BandwidthPLogRecordHeader += sizeof(TLogPageHeader); + } + if (ActorSystem) { + LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " Line# " << __LINE__ << " LogPageHeader writing" + << " chunkIdx# " << ChunkIdx << " sectorIdx# " << SectorIdx + << " Marker# BPD62"); + } + Write(&header, sizeof(TLogPageHeader), reqId, traceId); + } + Write(data, size, reqId, traceId); + if (RecordBytesLeft == 0 && SectorBytesFree > 0) { + if (SectorBytesFree < sizeof(TFirstLogPageHeader)) { + TerminateLog(reqId, traceId); + } + } + } + protected: void FinalizeWrite(ui64 size, TReqId reqId, NWilson::TTraceId *traceId) { CurrentPosition += size; @@ -501,7 +501,7 @@ protected: ui64 sectorOffset = Format.Offset(ChunkIdx, SectorIdx); PrepareDataSectorFooter(BufferedWriter->Get(), DataMagic, sectorOffset); if (IsLog) { - if (IsSysLog) { + if (IsSysLog) { *Mon.BandwidthPSysLogSectorFooter += sizeof(TDataSectorFooter); } else { *Mon.BandwidthPLogSectorFooter += sizeof(TDataSectorFooter); @@ -518,9 +518,9 @@ protected: } }; -using TChunkWriter = TSectorWriter<false, false>; -using TLogWriter = TSectorWriter<true, false>; -using TSysLogWriter = TSectorWriter<true, true>; - +using TChunkWriter = TSectorWriter<false, false>; +using TLogWriter = TSectorWriter<true, false>; +using TSysLogWriter = TSectorWriter<true, true>; + } // NPDisk } // NKikimr diff --git a/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp b/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp index b74d850507..1e84f5d8f6 100644 --- a/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp +++ b/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp @@ -22,7 +22,7 @@ struct TPDiskMockState::TImpl { NPDisk::TOwnerRound OwnerRound = 0; TActorId CutLogId; std::deque<NPDisk::TLogRecord> Log; - TMap<TLogSignature, NPDisk::TLogRecord> StartingPoints; + TMap<TLogSignature, NPDisk::TLogRecord> StartingPoints; ui64 LogDataSize = 0; bool Slain = false; ui64 LastLsn = 0; @@ -404,7 +404,7 @@ public: // process the log entry bool isStartingPoint = false; - if (msg->Signature.HasCommitRecord()) { + if (msg->Signature.HasCommitRecord()) { const auto& cr = msg->CommitRecord; if (cr.FirstLsnToKeep) { // trim log std::deque<NPDisk::TLogRecord>::iterator it; @@ -425,10 +425,10 @@ public: } isStartingPoint = cr.IsStartingPoint; } - owner.Log.emplace_back(msg->Signature.GetUnmasked(), msg->Data, msg->Lsn); + owner.Log.emplace_back(msg->Signature.GetUnmasked(), msg->Data, msg->Lsn); owner.LogDataSize += msg->Data.size(); if (isStartingPoint) { - owner.StartingPoints[msg->Signature.GetUnmasked()] = owner.Log.back(); + owner.StartingPoints[msg->Signature.GetUnmasked()] = owner.Log.back(); } } Y_VERIFY(res); diff --git a/ydb/core/blobstorage/pdisk/ya.make b/ydb/core/blobstorage/pdisk/ya.make index a4b1ad99f2..464191b8c0 100644 --- a/ydb/core/blobstorage/pdisk/ya.make +++ b/ydb/core/blobstorage/pdisk/ya.make @@ -2,7 +2,7 @@ LIBRARY() OWNER( cthulhu - va-kuznecov + va-kuznecov g:kikimr ) @@ -15,7 +15,7 @@ PEERDIR( library/cpp/lwtrace library/cpp/monlib/dynamic_counters/percentile library/cpp/monlib/service/pages - util + util ydb/core/base ydb/core/base/services ydb/core/blobstorage/base @@ -32,31 +32,31 @@ PEERDIR( ydb/library/wilson ) -GENERATE_ENUM_SERIALIZATION(blobstorage_pdisk_state.h) - +GENERATE_ENUM_SERIALIZATION(blobstorage_pdisk_state.h) + SRCS( blobstorage_pdisk.cpp blobstorage_pdisk_actor.cpp blobstorage_pdisk_blockdevice_async.cpp blobstorage_pdisk_completion_impl.cpp blobstorage_pdisk_delayed_cost_loop.cpp - blobstorage_pdisk_driveestimator.cpp - blobstorage_pdisk_drivedata.cpp + blobstorage_pdisk_driveestimator.cpp + blobstorage_pdisk_drivedata.cpp blobstorage_pdisk_drivemodel_db.cpp blobstorage_pdisk_impl.cpp - blobstorage_pdisk_impl_http.cpp - blobstorage_pdisk_impl_log.cpp - blobstorage_pdisk_internal_interface.cpp - blobstorage_pdisk_logreader.cpp + blobstorage_pdisk_impl_http.cpp + blobstorage_pdisk_impl_log.cpp + blobstorage_pdisk_internal_interface.cpp + blobstorage_pdisk_logreader.cpp blobstorage_pdisk_mon.cpp blobstorage_pdisk_params.cpp blobstorage_pdisk_requestimpl.cpp blobstorage_pdisk_syslogreader.cpp - blobstorage_pdisk_sectorrestorator.cpp + blobstorage_pdisk_sectorrestorator.cpp blobstorage_pdisk_tools.cpp blobstorage_pdisk_util_atomicblockcounter.cpp blobstorage_pdisk_util_flightcontrol.cpp - blobstorage_pdisk_util_signal_event.cpp + blobstorage_pdisk_util_signal_event.cpp blobstorage_pdisk_util_wcache.cpp blobstorage_pdisk_writer.cpp blobstorage_pdisk_ut_helpers.cpp diff --git a/ydb/core/blobstorage/storagepoolmon/storagepool_counters.h b/ydb/core/blobstorage/storagepoolmon/storagepool_counters.h index 98fd2ee30a..73cc053c0e 100644 --- a/ydb/core/blobstorage/storagepoolmon/storagepool_counters.h +++ b/ydb/core/blobstorage/storagepoolmon/storagepool_counters.h @@ -6,7 +6,7 @@ #include <ydb/core/base/group_stat.h> #include <ydb/core/blobstorage/base/common_latency_hist_bounds.h> #include <ydb/core/mon/mon.h> - + #include <util/generic/bitops.h> #include <util/generic/ptr.h> @@ -22,22 +22,22 @@ struct TRequestMonItem { NMonitoring::TDynamicCounters::TCounterPtr GeneratedSubrequestBytes; NMonitoring::THistogramPtr ResponseTime; - void Init(TIntrusivePtr<NMonitoring::TDynamicCounters> counters, TPDiskCategory::EDeviceType type) { + void Init(TIntrusivePtr<NMonitoring::TDynamicCounters> counters, TPDiskCategory::EDeviceType type) { RequestBytes = counters->GetCounter("requestBytes", true); GeneratedSubrequests = counters->GetCounter("generatedSubrequests", true); GeneratedSubrequestBytes = counters->GetCounter("generatedSubrequestBytes", true); - NMonitoring::TBucketBounds bounds = GetCommonLatencyHistBounds(type); + NMonitoring::TBucketBounds bounds = GetCommonLatencyHistBounds(type); - ResponseTime = counters->GetNamedHistogram("sensor", "responseTimeMs", - NMonitoring::ExplicitHistogram(std::move(bounds))); + ResponseTime = counters->GetNamedHistogram("sensor", "responseTimeMs", + NMonitoring::ExplicitHistogram(std::move(bounds))); } void Register(ui32 requestBytes, ui32 generatedSubrequests, ui32 generatedSubrequestBytes, double durationSeconds) { *RequestBytes += requestBytes; *GeneratedSubrequests += generatedSubrequests; *GeneratedSubrequestBytes += generatedSubrequestBytes; - ResponseTime->Collect(durationSeconds * 1000.0); + ResponseTime->Collect(durationSeconds * 1000.0); } }; @@ -46,10 +46,10 @@ public: enum EHandleClass { HcPutTabletLog = 0, HcPutUserData = 1, - HcPutAsync = 2, + HcPutAsync = 2, HcGetFast = 3, - HcGetAsync = 4, - HcGetDiscover = 5, + HcGetAsync = 4, + HcGetDiscover = 5, HcGetLow = 6, HcCount = 7 }; @@ -60,14 +60,14 @@ public: return "PutTabletLog"; case HcPutUserData: return "PutUserData"; - case HcPutAsync: - return "PutAsync"; + case HcPutAsync: + return "PutAsync"; case HcGetFast: return "GetFast"; - case HcGetAsync: - return "GetAsync"; - case HcGetDiscover: - return "GetDiscover"; + case HcGetAsync: + return "GetAsync"; + case HcGetDiscover: + return "GetDiscover"; case HcGetLow: return "GetLow"; case HcCount: @@ -119,8 +119,8 @@ public: return RequestMon[(ui32)handleClass][sizeClassIdx]; } - TStoragePoolCounters(TIntrusivePtr<NMonitoring::TDynamicCounters> &counters, const TString &storagePoolName, - TPDiskCategory::EDeviceType type) { + TStoragePoolCounters(TIntrusivePtr<NMonitoring::TDynamicCounters> &counters, const TString &storagePoolName, + TPDiskCategory::EDeviceType type) { StoragePoolName = storagePoolName; TIntrusivePtr<NMonitoring::TDynamicCounters> poolGroup = counters->GetSubgroup("storagePool", storagePoolName); for (ui32 handleClass = 0; handleClass < (ui32)HcCount; ++handleClass) { @@ -128,7 +128,7 @@ public: TIntrusivePtr<NMonitoring::TDynamicCounters> hcGroup = poolGroup->GetSubgroup("handleClass", handleClassName); for (ui32 sizeClassIdx = 0; sizeClassIdx <= MaxSizeClassBucketIdx; ++sizeClassIdx) { TString sizeClassName = SizeClassName(sizeClassIdx); - RequestMon[handleClass][sizeClassIdx].Init(hcGroup->GetSubgroup("sizeClass", sizeClassName), type); + RequestMon[handleClass][sizeClassIdx].Init(hcGroup->GetSubgroup("sizeClass", sizeClassName), type); } } } @@ -146,13 +146,13 @@ public: Counters = group->GetSubgroup("subsystem", "request"); }; - TIntrusivePtr<TStoragePoolCounters> GetPoolCounters(const TString &storagePoolName, - TPDiskCategory::EDeviceType type = TPDiskCategory::DEVICE_TYPE_UNKNOWN) { + TIntrusivePtr<TStoragePoolCounters> GetPoolCounters(const TString &storagePoolName, + TPDiskCategory::EDeviceType type = TPDiskCategory::DEVICE_TYPE_UNKNOWN) { auto it = StoragePoolCounters.find(storagePoolName); if (it != StoragePoolCounters.end()) { return it->second; } - TIntrusivePtr<TStoragePoolCounters> spc = MakeIntrusive<TStoragePoolCounters>(Counters, storagePoolName, type); + TIntrusivePtr<TStoragePoolCounters> spc = MakeIntrusive<TStoragePoolCounters>(Counters, storagePoolName, type); StoragePoolCounters.emplace(storagePoolName, spc); return spc; } diff --git a/ydb/core/blobstorage/testload/test_load_actor.cpp b/ydb/core/blobstorage/testload/test_load_actor.cpp index 8c48924c0b..c8788b1310 100644 --- a/ydb/core/blobstorage/testload/test_load_actor.cpp +++ b/ydb/core/blobstorage/testload/test_load_actor.cpp @@ -28,15 +28,15 @@ class TLoadActor : public TActorBootstrapped<TLoadActor> { TString ErrorMessage; }; - struct TFinishedTestInfo { - ui64 Tag; - TString ErrorReason; - TInstant FinishTime; - }; - - // info about finished actors - TVector<TFinishedTestInfo> FinishedTests; - + struct TFinishedTestInfo { + ui64 Tag; + TString ErrorReason; + TInstant FinishTime; + }; + + // info about finished actors + TVector<TFinishedTestInfo> FinishedTests; + // currently running load actors TMap<ui64, TActorId> LoadActors; @@ -46,16 +46,16 @@ class TLoadActor : public TActorBootstrapped<TLoadActor> { // HTTP info requests being currently executed THashMap<ui32, THttpInfoRequest> InfoRequests; - TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; - + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::BS_LOAD_ACTOR; } - TLoadActor(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters) + TLoadActor(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters) : NextRequestId(1) - , Counters(counters) + , Counters(counters) {} void Bootstrap(const TActorContext& /*ctx*/) { @@ -69,8 +69,8 @@ public: try { ProcessCmd(record, ctx); } catch (const TLoadActorException& ex) { - LOG_ERROR_S(ctx, NKikimrServices::BS_LOAD_TEST, "Exception while creating load actor, what# " - << ex.what()); + LOG_ERROR_S(ctx, NKikimrServices::BS_LOAD_TEST, "Exception while creating load actor, what# " + << ex.what()); status = NMsgBusProxy::MSTATUS_ERROR; error = ex.what(); } @@ -85,99 +85,99 @@ public: ctx.Send(ev->Sender, response.release()); } - template<typename T> - ui64 GetOrGenerateTag(const T& cmd) { - if (cmd.HasTag()) { - return cmd.GetTag(); - } else { - if (LoadActors.empty()) { - return 1; - } else { - return LoadActors.rbegin()->first + 1; - } - } - } - + template<typename T> + ui64 GetOrGenerateTag(const T& cmd) { + if (cmd.HasTag()) { + return cmd.GetTag(); + } else { + if (LoadActors.empty()) { + return 1; + } else { + return LoadActors.rbegin()->first + 1; + } + } + } + void ProcessCmd(const NKikimrBlobStorage::TEvTestLoadRequest& record, const TActorContext& ctx) { switch (record.Command_case()) { case NKikimrBlobStorage::TEvTestLoadRequest::CommandCase::kLoadStart: { const auto& cmd = record.GetLoadStart(); - const ui64 tag = GetOrGenerateTag(cmd); - if (LoadActors.count(tag) != 0) { - ythrow TLoadActorException() << Sprintf("duplicate load actor with Tag# %" PRIu64, tag); + const ui64 tag = GetOrGenerateTag(cmd); + if (LoadActors.count(tag) != 0) { + ythrow TLoadActorException() << Sprintf("duplicate load actor with Tag# %" PRIu64, tag); } - LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Create new load actor with tag# " << tag); - LoadActors.emplace(tag, ctx.Register(CreateWriterTestLoad(cmd, ctx.SelfID, - GetServiceCounters(Counters, "load_actor"), tag))); + LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Create new load actor with tag# " << tag); + LoadActors.emplace(tag, ctx.Register(CreateWriterTestLoad(cmd, ctx.SelfID, + GetServiceCounters(Counters, "load_actor"), tag))); break; } case NKikimrBlobStorage::TEvTestLoadRequest::CommandCase::kLoadStop: { const auto& cmd = record.GetLoadStop(); - if (cmd.HasRemoveAllTags() && cmd.GetRemoveAllTags()) { - LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Delete all running load actors"); - for (auto& actorPair : LoadActors) { - ctx.Send(actorPair.second, new TEvents::TEvPoisonPill); - } - } else { - VERIFY_PARAM(Tag); - const ui64 tag = cmd.GetTag(); - auto iter = LoadActors.find(tag); - if (iter == LoadActors.end()) { - ythrow TLoadActorException() - << Sprintf("load actor with Tag# %" PRIu64 " not found", tag); - } - LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Delete running load actor with tag# " - << tag); - ctx.Send(iter->second, new TEvents::TEvPoisonPill); + if (cmd.HasRemoveAllTags() && cmd.GetRemoveAllTags()) { + LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Delete all running load actors"); + for (auto& actorPair : LoadActors) { + ctx.Send(actorPair.second, new TEvents::TEvPoisonPill); + } + } else { + VERIFY_PARAM(Tag); + const ui64 tag = cmd.GetTag(); + auto iter = LoadActors.find(tag); + if (iter == LoadActors.end()) { + ythrow TLoadActorException() + << Sprintf("load actor with Tag# %" PRIu64 " not found", tag); + } + LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Delete running load actor with tag# " + << tag); + ctx.Send(iter->second, new TEvents::TEvPoisonPill); } break; } case NKikimrBlobStorage::TEvTestLoadRequest::CommandCase::kPDiskLoadStart: { const auto& cmd = record.GetPDiskLoadStart(); - const ui64 tag = GetOrGenerateTag(cmd); - if (LoadActors.count(tag) != 0) { - ythrow TLoadActorException() << Sprintf("duplicate load actor with Tag# %" PRIu64, tag); + const ui64 tag = GetOrGenerateTag(cmd); + if (LoadActors.count(tag) != 0) { + ythrow TLoadActorException() << Sprintf("duplicate load actor with Tag# %" PRIu64, tag); + } + LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Create new load actor with tag# " << tag); + LoadActors.emplace(tag, ctx.Register(CreatePDiskWriterTestLoad( + cmd, ctx.SelfID, GetServiceCounters(Counters, "load_actor"), 0, tag))); + break; + } + + case NKikimrBlobStorage::TEvTestLoadRequest::CommandCase::kPDiskReadLoadStart: { + const auto& cmd = record.GetPDiskReadLoadStart(); + const ui64 tag = GetOrGenerateTag(cmd); + if (LoadActors.count(tag) != 0) { + ythrow TLoadActorException() << Sprintf("duplicate load actor with Tag# %" PRIu64, tag); + } + LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Create new load actor with tag# " << tag); + LoadActors.emplace(tag, ctx.Register(CreatePDiskReaderTestLoad( + cmd, ctx.SelfID, GetServiceCounters(Counters, "load_actor"), 0, tag))); + break; + } + + case NKikimrBlobStorage::TEvTestLoadRequest::CommandCase::kPDiskLogLoadStart: { + const auto& cmd = record.GetPDiskLogLoadStart(); + const ui64 tag = GetOrGenerateTag(cmd); + if (LoadActors.count(tag) != 0) { + ythrow TLoadActorException() << Sprintf("duplicate load actor with Tag# %" PRIu64, tag); } - LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Create new load actor with tag# " << tag); - LoadActors.emplace(tag, ctx.Register(CreatePDiskWriterTestLoad( - cmd, ctx.SelfID, GetServiceCounters(Counters, "load_actor"), 0, tag))); + LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Create new load actor with tag# " << tag); + LoadActors.emplace(tag, ctx.Register(CreatePDiskLogWriterTestLoad( + cmd, ctx.SelfID, GetServiceCounters(Counters, "load_actor"), 0, tag))); break; } - case NKikimrBlobStorage::TEvTestLoadRequest::CommandCase::kPDiskReadLoadStart: { - const auto& cmd = record.GetPDiskReadLoadStart(); - const ui64 tag = GetOrGenerateTag(cmd); - if (LoadActors.count(tag) != 0) { - ythrow TLoadActorException() << Sprintf("duplicate load actor with Tag# %" PRIu64, tag); - } - LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Create new load actor with tag# " << tag); - LoadActors.emplace(tag, ctx.Register(CreatePDiskReaderTestLoad( - cmd, ctx.SelfID, GetServiceCounters(Counters, "load_actor"), 0, tag))); - break; - } - - case NKikimrBlobStorage::TEvTestLoadRequest::CommandCase::kPDiskLogLoadStart: { - const auto& cmd = record.GetPDiskLogLoadStart(); - const ui64 tag = GetOrGenerateTag(cmd); - if (LoadActors.count(tag) != 0) { - ythrow TLoadActorException() << Sprintf("duplicate load actor with Tag# %" PRIu64, tag); - } - LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Create new load actor with tag# " << tag); - LoadActors.emplace(tag, ctx.Register(CreatePDiskLogWriterTestLoad( - cmd, ctx.SelfID, GetServiceCounters(Counters, "load_actor"), 0, tag))); - break; - } - case NKikimrBlobStorage::TEvTestLoadRequest::CommandCase::kVDiskLoadStart: { const auto& cmd = record.GetVDiskLoadStart(); - const ui64 tag = GetOrGenerateTag(cmd); - if (LoadActors.count(tag) != 0) { - ythrow TLoadActorException() << Sprintf("duplicate load actor with Tag# %" PRIu64, tag); + const ui64 tag = GetOrGenerateTag(cmd); + if (LoadActors.count(tag) != 0) { + ythrow TLoadActorException() << Sprintf("duplicate load actor with Tag# %" PRIu64, tag); } - LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Create new load actor with tag# " << tag); - LoadActors.emplace(tag, ctx.Register(CreateVDiskWriterTestLoad(cmd, ctx.SelfID, tag))); + LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Create new load actor with tag# " << tag); + LoadActors.emplace(tag, ctx.Register(CreateVDiskWriterTestLoad(cmd, ctx.SelfID, tag))); break; } @@ -220,14 +220,14 @@ public: break; } - default: { - TString protoTxt; - google::protobuf::TextFormat::PrintToString(record, &protoTxt); - ythrow TLoadActorException() << (TStringBuilder() - << "TLoadActor::Handle(TEvBlobStorage::TEvTestLoadRequest): unexpected command case: " - << ui32(record.Command_case()) - << " protoTxt# " << protoTxt.Quote()); - } + default: { + TString protoTxt; + google::protobuf::TextFormat::PrintToString(record, &protoTxt); + ythrow TLoadActorException() << (TStringBuilder() + << "TLoadActor::Handle(TEvBlobStorage::TEvTestLoadRequest): unexpected command case: " + << ui32(record.Command_case()) + << " protoTxt# " << protoTxt.Quote()); + } } } @@ -235,11 +235,11 @@ public: const auto& msg = ev->Get(); auto iter = LoadActors.find(msg->Tag); Y_VERIFY(iter != LoadActors.end()); - LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Load actor with tag# " << msg->Tag << " finished"); + LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Load actor with tag# " << msg->Tag << " finished"); LoadActors.erase(iter); FinishedTests.push_back({msg->Tag, msg->ErrorReason, TAppData::TimeProvider->Now()}); - + auto it = InfoRequests.begin(); while (it != InfoRequests.end()) { auto next = std::next(it); @@ -356,22 +356,22 @@ public: } } } - - COLLAPSED_BUTTON_CONTENT("finished_tests_info", "Finished tests") { - for (const auto& req : FinishedTests) { - DIV_CLASS("panel panel-info") { - DIV_CLASS("panel-heading") { - str << "Tag# " << req.Tag; - } - DIV_CLASS("panel-body") { - str << "<p>"; - str << "Finish reason# " << req.ErrorReason << "<br/>"; - str << "Finish time# " << req.FinishTime << "<br/>"; - str << "</p>"; - } - } - } - } + + COLLAPSED_BUTTON_CONTENT("finished_tests_info", "Finished tests") { + for (const auto& req : FinishedTests) { + DIV_CLASS("panel panel-info") { + DIV_CLASS("panel-heading") { + str << "Tag# " << req.Tag; + } + DIV_CLASS("panel-body") { + str << "<p>"; + str << "Finish reason# " << req.ErrorReason << "<br/>"; + str << "Finish time# " << req.FinishTime << "<br/>"; + str << "</p>"; + } + } + } + } } } @@ -388,8 +388,8 @@ public: ) }; -IActor *CreateTestLoadActor(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters) { - return new TLoadActor(counters); +IActor *CreateTestLoadActor(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters) { + return new TLoadActor(counters); } } // NKikimr diff --git a/ydb/core/blobstorage/testload/test_load_actor.h b/ydb/core/blobstorage/testload/test_load_actor.h index df89fdbd34..38b7fb3d30 100644 --- a/ydb/core/blobstorage/testload/test_load_actor.h +++ b/ydb/core/blobstorage/testload/test_load_actor.h @@ -3,48 +3,48 @@ #include "defs.h" #include <ydb/core/base/blobstorage.h> #include <library/cpp/monlib/dynamic_counters/percentile/percentile_lg.h> -#include <cmath> +#include <cmath> namespace NKikimr { - enum { - EvStopTest = EventSpaceBegin(TKikimrEvents::ES_PRIVATE), - EvUpdateQuantile, - EvUpdateMonitoring, - }; - - struct TEvStopTest : TEventLocal<TEvStopTest, EvStopTest> - {}; - constexpr TDuration DelayBeforeMeasurements = TDuration::Seconds(15); - - struct TEvUpdateQuantile : TEventLocal<TEvUpdateQuantile, EvUpdateQuantile> - {}; - - constexpr ui64 MonitoringUpdateCycleMs = 1000; - - struct TEvUpdateMonitoring : TEventLocal<TEvUpdateMonitoring, EvUpdateMonitoring> - {}; - - + enum { + EvStopTest = EventSpaceBegin(TKikimrEvents::ES_PRIVATE), + EvUpdateQuantile, + EvUpdateMonitoring, + }; + + struct TEvStopTest : TEventLocal<TEvStopTest, EvStopTest> + {}; + constexpr TDuration DelayBeforeMeasurements = TDuration::Seconds(15); + + struct TEvUpdateQuantile : TEventLocal<TEvUpdateQuantile, EvUpdateQuantile> + {}; + + constexpr ui64 MonitoringUpdateCycleMs = 1000; + + struct TEvUpdateMonitoring : TEventLocal<TEvUpdateMonitoring, EvUpdateMonitoring> + {}; + + class TLoadActorException : public yexception { }; - NActors::IActor *CreateTestLoadActor(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters); + NActors::IActor *CreateTestLoadActor(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters); NActors::IActor *CreateWriterTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TLoadStart& cmd, const NActors::TActorId& parent, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, ui64 tag); NActors::IActor *CreatePDiskWriterTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskLoadStart& cmd, const NActors::TActorId& parent, const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, - ui64 index, ui64 tag); + ui64 index, ui64 tag); - NActors::IActor *CreatePDiskLogWriterTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskLogLoadStart& cmd, + NActors::IActor *CreatePDiskLogWriterTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskLogLoadStart& cmd, const NActors::TActorId& parent, const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, - ui64 index, ui64 tag); - - NActors::IActor *CreatePDiskReaderTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskReadLoadStart& cmd, + ui64 index, ui64 tag); + + NActors::IActor *CreatePDiskReaderTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskReadLoadStart& cmd, const NActors::TActorId& parent, const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, - ui64 index, ui64 tag); - + ui64 index, ui64 tag); + NActors::IActor *CreateVDiskWriterTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TVDiskLoadStart& cmd, const NActors::TActorId& parent, ui64 tag); @@ -60,58 +60,58 @@ namespace NKikimr { const NActors::TActorId& parent, const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, ui64 index, ui64 tag); - struct TLoadReport : public TThrRefBase { - enum ELoadType { - LOAD_READ, - LOAD_WRITE, - LOAD_LOG_WRITE, - }; - - TDuration Duration; - ui64 Size; - ui32 InFlight; - TVector<ui64> RwSpeedBps; - ELoadType LoadType; + struct TLoadReport : public TThrRefBase { + enum ELoadType { + LOAD_READ, + LOAD_WRITE, + LOAD_LOG_WRITE, + }; + + TDuration Duration; + ui64 Size; + ui32 InFlight; + TVector<ui64> RwSpeedBps; + ELoadType LoadType; NMonitoring::TPercentileTrackerLg<10, 4, 1> LatencyUs; // Upper threshold of this tracker is ~134 seconds, size is 256kB - TMap<double, ui64> DeviceLatency; - - double GetAverageSpeed() const { + TMap<double, ui64> DeviceLatency; + + double GetAverageSpeed() const { if (RwSpeedBps.size() < 1) { - return 0; - } - double avg = 0; - for (const ui64& speed : RwSpeedBps) { - avg += speed; - } + return 0; + } + double avg = 0; + for (const ui64& speed : RwSpeedBps) { + avg += speed; + } avg /= RwSpeedBps.size(); - return avg; - } - - double GetSpeedDeviation() const { + return avg; + } + + double GetSpeedDeviation() const { if (RwSpeedBps.size() <= 1) { - return 0; - } - i64 avg = (i64)GetAverageSpeed(); - double sd = 0; - for (const ui64& speed : RwSpeedBps) { - sd += ((i64)speed - avg) * ((i64)speed - avg); - } + return 0; + } + i64 avg = (i64)GetAverageSpeed(); + double sd = 0; + for (const ui64& speed : RwSpeedBps) { + sd += ((i64)speed - avg) * ((i64)speed - avg); + } sd /= RwSpeedBps.size(); - return std::sqrt(sd); - } - - TString LoadTypeName() const { - switch (LoadType) { - case LOAD_READ: - return "read"; - case LOAD_WRITE: - return "write"; - case LOAD_LOG_WRITE: - return "log_write"; - } - } - }; - + return std::sqrt(sd); + } + + TString LoadTypeName() const { + switch (LoadType) { + case LOAD_READ: + return "read"; + case LOAD_WRITE: + return "write"; + case LOAD_LOG_WRITE: + return "log_write"; + } + } + }; + struct TEvTestLoadFinished : public TEventLocal<TEvTestLoadFinished, TEvBlobStorage::EvTestLoadFinished> { ui64 Tag; TIntrusivePtr<TLoadReport> Report; // nullptr indicates error @@ -119,7 +119,7 @@ namespace NKikimr { TEvTestLoadFinished(ui64 tag, TIntrusivePtr<TLoadReport> report, TString errorReason) : Tag(tag) - , Report(report) + , Report(report) , ErrorReason(errorReason) {} }; diff --git a/ydb/core/blobstorage/testload/test_load_pdisk_log.cpp b/ydb/core/blobstorage/testload/test_load_pdisk_log.cpp index 8129e15f41..33fc363c93 100644 --- a/ydb/core/blobstorage/testload/test_load_pdisk_log.cpp +++ b/ydb/core/blobstorage/testload/test_load_pdisk_log.cpp @@ -1,701 +1,701 @@ -#include <util/random/shuffle.h> -#include "test_load_actor.h" +#include <util/random/shuffle.h> +#include "test_load_actor.h" #include <ydb/core/base/counters.h> #include <ydb/core/blobstorage/pdisk/blobstorage_pdisk.h> #include <ydb/core/blobstorage/base/blobstorage_events.h> #include <library/cpp/monlib/service/pages/templates.h> -#include <util/random/fast.h> -#include <util/generic/queue.h> - -namespace NKikimr { -class TPDiskLogWriterTestLoadActor; - -#define VAR_OUT(x) #x "# " << x << "; " - -#define PARAM(NAME, VALUE) \ - TABLER() { \ - TABLED() { str << NAME; } \ - TABLED() { str << VALUE; } \ - } - -class TWorker { - friend class TPDiskLogWriterTestLoadActor; - - TVDiskID VDiskId; - ui32 Idx; - NPDisk::TOwnerRound OwnerRound = 0; - - ui64 Lsn = 1; - - TControlWrapper MaxInFlight; - ui32 LogInFlight = 0; - - ui64 BurstWrittenBytes = 0; - ui64 BytesInFlight = 0; - bool IsDying = false; - bool IsHarakiriSent = false; - bool IsStartingPointWritten = false; - - ui32 SizeMin; - ui32 SizeMax; - - // | <-- BurstSize --> | - // | <-- BurstInterval ------------------> | - // ********************_____________________********************_____________________ - // '*' Data writing - // '_' Idle - ui64 BurstInterval; - ui64 BurstSize; - ui64 BurstIdx; - - ui64 StorageDuration; - ui64 CutLogLsn = Lsn; - ui64 CutLogBytesWritten = 0; - TMaybe<ui64> NextCutLogLsn = Lsn; - ui64 NextCutLogBytesWritten = 0; - - ui64 StartingPoint = 0; - ui64 NextStartingPoint = Lsn; - - TString DataBuffer; - TReallyFastRng32 *Gen; - - TIntrusivePtr<TPDiskParams> PDiskParams; - - NMonitoring::TDynamicCounters::TCounterPtr LogEntriesWritten; - - NPDisk::TLogPosition LogReadPosition{0, 0}; - - ui32 GenSize() const { - return Gen->Uniform(SizeMin, SizeMax + 1); - } - -public: - - TWorker(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskLogLoadStart::TWorkerConfig& cmd, - ui32 idx, TReallyFastRng32 *gen) - : Idx(idx) - , MaxInFlight(1, 0, 65536) - , Gen(gen) - { - - VERIFY_PARAM(MaxInFlight); - MaxInFlight = cmd.GetMaxInFlight(); - - VERIFY_PARAM(SizeIntervalMin); - SizeMin = cmd.GetSizeIntervalMin(); - VERIFY_PARAM(SizeIntervalMax); - SizeMax = cmd.GetSizeIntervalMax(); - DataBuffer = TString::TUninitialized(SizeMax); - ::memset(DataBuffer.Detach(), 0, SizeMax); - - VERIFY_PARAM(VDiskId); - VDiskId = VDiskIDFromVDiskID(cmd.GetVDiskId()); - - // Burst control - VERIFY_PARAM(BurstInterval); - BurstInterval = cmd.GetBurstInterval(); - VERIFY_PARAM(BurstSize); - BurstSize = cmd.GetBurstSize(); - - BurstIdx = 0; - - VERIFY_PARAM(StorageDuration); - StorageDuration = cmd.GetStorageDuration(); - } - +#include <util/random/fast.h> +#include <util/generic/queue.h> + +namespace NKikimr { +class TPDiskLogWriterTestLoadActor; + +#define VAR_OUT(x) #x "# " << x << "; " + +#define PARAM(NAME, VALUE) \ + TABLER() { \ + TABLED() { str << NAME; } \ + TABLED() { str << VALUE; } \ + } + +class TWorker { + friend class TPDiskLogWriterTestLoadActor; + + TVDiskID VDiskId; + ui32 Idx; + NPDisk::TOwnerRound OwnerRound = 0; + + ui64 Lsn = 1; + + TControlWrapper MaxInFlight; + ui32 LogInFlight = 0; + + ui64 BurstWrittenBytes = 0; + ui64 BytesInFlight = 0; + bool IsDying = false; + bool IsHarakiriSent = false; + bool IsStartingPointWritten = false; + + ui32 SizeMin; + ui32 SizeMax; + + // | <-- BurstSize --> | + // | <-- BurstInterval ------------------> | + // ********************_____________________********************_____________________ + // '*' Data writing + // '_' Idle + ui64 BurstInterval; + ui64 BurstSize; + ui64 BurstIdx; + + ui64 StorageDuration; + ui64 CutLogLsn = Lsn; + ui64 CutLogBytesWritten = 0; + TMaybe<ui64> NextCutLogLsn = Lsn; + ui64 NextCutLogBytesWritten = 0; + + ui64 StartingPoint = 0; + ui64 NextStartingPoint = Lsn; + + TString DataBuffer; + TReallyFastRng32 *Gen; + + TIntrusivePtr<TPDiskParams> PDiskParams; + + NMonitoring::TDynamicCounters::TCounterPtr LogEntriesWritten; + + NPDisk::TLogPosition LogReadPosition{0, 0}; + + ui32 GenSize() const { + return Gen->Uniform(SizeMin, SizeMax + 1); + } + +public: + + TWorker(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskLogLoadStart::TWorkerConfig& cmd, + ui32 idx, TReallyFastRng32 *gen) + : Idx(idx) + , MaxInFlight(1, 0, 65536) + , Gen(gen) + { + + VERIFY_PARAM(MaxInFlight); + MaxInFlight = cmd.GetMaxInFlight(); + + VERIFY_PARAM(SizeIntervalMin); + SizeMin = cmd.GetSizeIntervalMin(); + VERIFY_PARAM(SizeIntervalMax); + SizeMax = cmd.GetSizeIntervalMax(); + DataBuffer = TString::TUninitialized(SizeMax); + ::memset(DataBuffer.Detach(), 0, SizeMax); + + VERIFY_PARAM(VDiskId); + VDiskId = VDiskIDFromVDiskID(cmd.GetVDiskId()); + + // Burst control + VERIFY_PARAM(BurstInterval); + BurstInterval = cmd.GetBurstInterval(); + VERIFY_PARAM(BurstSize); + BurstSize = cmd.GetBurstSize(); + + BurstIdx = 0; + + VERIFY_PARAM(StorageDuration); + StorageDuration = cmd.GetStorageDuration(); + } + std::unique_ptr<NPDisk::TEvYardInit> GetYardInit(ui64 pDiskGuid) const { return std::make_unique<NPDisk::TEvYardInit>(OwnerRound, VDiskId, pDiskGuid); - } - - + } + + std::unique_ptr<NPDisk::TEvLog> TrySend(const ui64 globalWrittenBytes) { - if (IsDying || IsHarakiriSent) { - return {}; - } - - if (BurstInterval * (BurstIdx + 1) <= globalWrittenBytes && BurstWrittenBytes >= BurstSize) { - BurstWrittenBytes = 0; - ++BurstIdx; - } - + if (IsDying || IsHarakiriSent) { + return {}; + } + + if (BurstInterval * (BurstIdx + 1) <= globalWrittenBytes && BurstWrittenBytes >= BurstSize) { + BurstWrittenBytes = 0; + ++BurstIdx; + } + std::unique_ptr<NPDisk::TEvLog> ev; - - if (BurstWrittenBytes + BytesInFlight < BurstSize - && BurstInterval * BurstIdx <= globalWrittenBytes - && globalWrittenBytes < BurstInterval * (BurstIdx + 1) - && LogInFlight < MaxInFlight) { - TLsnSeg seg(Lsn, Lsn); - ++Lsn; - if (NextCutLogLsn) { - NPDisk::TCommitRecord record; - record.FirstLsnToKeep = CutLogLsn; - record.IsStartingPoint = true; - IsStartingPointWritten = true; - NextStartingPoint = Lsn - 1; - - CutLogLsn = *NextCutLogLsn; - CutLogBytesWritten = NextCutLogBytesWritten; + + if (BurstWrittenBytes + BytesInFlight < BurstSize + && BurstInterval * BurstIdx <= globalWrittenBytes + && globalWrittenBytes < BurstInterval * (BurstIdx + 1) + && LogInFlight < MaxInFlight) { + TLsnSeg seg(Lsn, Lsn); + ++Lsn; + if (NextCutLogLsn) { + NPDisk::TCommitRecord record; + record.FirstLsnToKeep = CutLogLsn; + record.IsStartingPoint = true; + IsStartingPointWritten = true; + NextStartingPoint = Lsn - 1; + + CutLogLsn = *NextCutLogLsn; + CutLogBytesWritten = NextCutLogBytesWritten; ev = std::make_unique<NPDisk::TEvLog>(PDiskParams->Owner, OwnerRound, TLogSignature(), - record, DataBuffer, seg, nullptr); - } else { + record, DataBuffer, seg, nullptr); + } else { ev = std::make_unique<NPDisk::TEvLog>(PDiskParams->Owner, OwnerRound, TLogSignature(), - DataBuffer, seg, nullptr); - } - BytesInFlight += DataBuffer.Size(); - ++LogInFlight; - } - return ev; - } - - void OnLogResult(const NPDisk::TEvLogResult::TRecord& rec, ui32 size) { - --LogInFlight; - BytesInFlight -= size; - BurstWrittenBytes += size; - - if (rec.Lsn == NextStartingPoint) { - StartingPoint = NextStartingPoint; - NextCutLogLsn.Clear(); - } - - if (!NextCutLogLsn && GetReallyWrittenBytes() - CutLogBytesWritten >= StorageDuration) { - NextCutLogLsn = Lsn - 1; - NextCutLogBytesWritten = GetReallyWrittenBytes(); - } - } - - void PoisonPill() { - MaxInFlight = 0; - } - - bool CheckDie() { - if (!MaxInFlight && !LogInFlight && !IsDying) { - if (PDiskParams) { - IsDying = true; - } - return true; - } - return false; - } - + DataBuffer, seg, nullptr); + } + BytesInFlight += DataBuffer.Size(); + ++LogInFlight; + } + return ev; + } + + void OnLogResult(const NPDisk::TEvLogResult::TRecord& rec, ui32 size) { + --LogInFlight; + BytesInFlight -= size; + BurstWrittenBytes += size; + + if (rec.Lsn == NextStartingPoint) { + StartingPoint = NextStartingPoint; + NextCutLogLsn.Clear(); + } + + if (!NextCutLogLsn && GetReallyWrittenBytes() - CutLogBytesWritten >= StorageDuration) { + NextCutLogLsn = Lsn - 1; + NextCutLogBytesWritten = GetReallyWrittenBytes(); + } + } + + void PoisonPill() { + MaxInFlight = 0; + } + + bool CheckDie() { + if (!MaxInFlight && !LogInFlight && !IsDying) { + if (PDiskParams) { + IsDying = true; + } + return true; + } + return false; + } + std::unique_ptr<NPDisk::TEvHarakiri> GetHarakiri() { - Y_VERIFY(IsDying); - Y_VERIFY(LogReadPosition == NPDisk::TLogPosition::Invalid()); + Y_VERIFY(IsDying); + Y_VERIFY(LogReadPosition == NPDisk::TLogPosition::Invalid()); return std::make_unique<NPDisk::TEvHarakiri>(PDiskParams->Owner, OwnerRound); - } - + } + std::unique_ptr<NPDisk::TEvReadLog> GetLogRead() { - if (LogReadPosition == NPDisk::TLogPosition::Invalid()) { - return {}; - } else { + if (LogReadPosition == NPDisk::TLogPosition::Invalid()) { + return {}; + } else { return std::make_unique<NPDisk::TEvReadLog>(PDiskParams->Owner, OwnerRound, LogReadPosition); - } - } - - void CheckStartingPoints(const TMap<TLogSignature, NPDisk::TLogRecord>& startingPoints) { - if (!IsStartingPointWritten) { - return; - } - - auto it = startingPoints.find(TLogSignature()); - Y_VERIFY_S(it != startingPoints.end(), - VAR_OUT((ui32)PDiskParams->Owner) << - VAR_OUT(StartingPoint) << - VAR_OUT(NextStartingPoint)); - const ui64 realStartingPoint = it->second.Lsn; - Y_VERIFY(realStartingPoint == StartingPoint || realStartingPoint == NextStartingPoint); - // Set StartingPoint to real point to start check from it - StartingPoint = realStartingPoint; - } - - bool FindLastWrittenLsn(const NPDisk::TEvReadLogResult* msg) { - for (const auto& res : msg->Results) { - Lsn = Max(Lsn, res.Lsn + 1); - } - - if (msg->IsEndOfLog) { - LogReadPosition = NPDisk::TLogPosition{0, 0}; - } else { - LogReadPosition = msg->NextPosition; - } - return msg->IsEndOfLog; - } - - void CheckLogRecords(const NPDisk::TEvReadLogResult* msg) { - Y_VERIFY(msg->Status == NKikimrProto::OK); - for (const auto& res : msg->Results) { - if (res.Lsn < StartingPoint) { - continue; - } - Y_VERIFY_S(StartingPoint == res.Lsn, - VAR_OUT(StartingPoint) << - VAR_OUT(res.Lsn)); - ++StartingPoint; - } - - if (msg->IsEndOfLog) { - Y_VERIFY_S(!IsStartingPointWritten || StartingPoint == Lsn, - VAR_OUT(StartingPoint) << - VAR_OUT(Lsn)); - LogReadPosition = NPDisk::TLogPosition::Invalid(); - } else { - LogReadPosition = msg->NextPosition; - } - } - - ui64 GetReallyWrittenBytes() const { - return BurstSize * BurstIdx + BurstWrittenBytes; - } - - ui64 GetGlobalWrittenBytes() const { - return BurstInterval * BurstIdx + BurstWrittenBytes; - } - - ~TWorker() { - } -}; - -class TPDiskLogWriterTestLoadActor : public TActorBootstrapped<TPDiskLogWriterTestLoadActor> { - struct TRequestInfo { - ui32 Size; - TInstant LogStartTime; - }; - - struct TRequestStat { - ui64 BytesWrittenTotal; - ui32 Size; - TDuration Latency; - }; - - struct TLogWriteCookie { - ui32 WorkerIdx; - TInstant SentTime; - ui64 Size; - }; - + } + } + + void CheckStartingPoints(const TMap<TLogSignature, NPDisk::TLogRecord>& startingPoints) { + if (!IsStartingPointWritten) { + return; + } + + auto it = startingPoints.find(TLogSignature()); + Y_VERIFY_S(it != startingPoints.end(), + VAR_OUT((ui32)PDiskParams->Owner) << + VAR_OUT(StartingPoint) << + VAR_OUT(NextStartingPoint)); + const ui64 realStartingPoint = it->second.Lsn; + Y_VERIFY(realStartingPoint == StartingPoint || realStartingPoint == NextStartingPoint); + // Set StartingPoint to real point to start check from it + StartingPoint = realStartingPoint; + } + + bool FindLastWrittenLsn(const NPDisk::TEvReadLogResult* msg) { + for (const auto& res : msg->Results) { + Lsn = Max(Lsn, res.Lsn + 1); + } + + if (msg->IsEndOfLog) { + LogReadPosition = NPDisk::TLogPosition{0, 0}; + } else { + LogReadPosition = msg->NextPosition; + } + return msg->IsEndOfLog; + } + + void CheckLogRecords(const NPDisk::TEvReadLogResult* msg) { + Y_VERIFY(msg->Status == NKikimrProto::OK); + for (const auto& res : msg->Results) { + if (res.Lsn < StartingPoint) { + continue; + } + Y_VERIFY_S(StartingPoint == res.Lsn, + VAR_OUT(StartingPoint) << + VAR_OUT(res.Lsn)); + ++StartingPoint; + } + + if (msg->IsEndOfLog) { + Y_VERIFY_S(!IsStartingPointWritten || StartingPoint == Lsn, + VAR_OUT(StartingPoint) << + VAR_OUT(Lsn)); + LogReadPosition = NPDisk::TLogPosition::Invalid(); + } else { + LogReadPosition = msg->NextPosition; + } + } + + ui64 GetReallyWrittenBytes() const { + return BurstSize * BurstIdx + BurstWrittenBytes; + } + + ui64 GetGlobalWrittenBytes() const { + return BurstInterval * BurstIdx + BurstWrittenBytes; + } + + ~TWorker() { + } +}; + +class TPDiskLogWriterTestLoadActor : public TActorBootstrapped<TPDiskLogWriterTestLoadActor> { + struct TRequestInfo { + ui32 Size; + TInstant LogStartTime; + }; + + struct TRequestStat { + ui64 BytesWrittenTotal; + ui32 Size; + TDuration Latency; + }; + + struct TLogWriteCookie { + ui32 WorkerIdx; + TInstant SentTime; + ui64 Size; + }; + TVector<std::unique_ptr<TWorker>> Workers; - - ui64 WrittenBytes = 0; - + + ui64 WrittenBytes = 0; + const TActorId Parent; - ui64 Tag; - ui32 DurationSeconds; - i32 OwnerInitInProgress = 0; - ui32 HarakiriInFlight = 0; - - TReallyFastRng32 Rng; - - // Monitoring - TIntrusivePtr<NMonitoring::TDynamicCounters> LoadCounters; - TInstant TestStartTime; - - ui32 PDiskId; - ui64 PDiskGuid; - - bool IsWardenlessTest = false; - bool IsDying = false; - - NMonitoring::TDynamicCounters::TCounterPtr LogBytesWritten; - ui64 ReqIdx = 0; - TMap<ui64, TLogWriteCookie> InFlightLogWrites; - NMonitoring::TPercentileTrackerLg<6, 5, 15> LogResponseTimes; - -public: + ui64 Tag; + ui32 DurationSeconds; + i32 OwnerInitInProgress = 0; + ui32 HarakiriInFlight = 0; + + TReallyFastRng32 Rng; + + // Monitoring + TIntrusivePtr<NMonitoring::TDynamicCounters> LoadCounters; + TInstant TestStartTime; + + ui32 PDiskId; + ui64 PDiskGuid; + + bool IsWardenlessTest = false; + bool IsDying = false; + + NMonitoring::TDynamicCounters::TCounterPtr LogBytesWritten; + ui64 ReqIdx = 0; + TMap<ui64, TLogWriteCookie> InFlightLogWrites; + NMonitoring::TPercentileTrackerLg<6, 5, 15> LogResponseTimes; + +public: static constexpr auto ActorActivityType() { - return NKikimrServices::TActivity::BS_LOAD_PDISK_LOG_WRITE; - } - - TPDiskLogWriterTestLoadActor(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskLogLoadStart& cmd, + return NKikimrServices::TActivity::BS_LOAD_PDISK_LOG_WRITE; + } + + TPDiskLogWriterTestLoadActor(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskLogLoadStart& cmd, const TActorId& parent, const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, ui64 index, ui64 tag) - : Parent(parent) - , Tag(tag) - , Rng(Now().GetValue()) - // , Report(new TLoadReport()) - { - VERIFY_PARAM(PDiskId); - PDiskId = cmd.GetPDiskId(); - - VERIFY_PARAM(PDiskGuid); - PDiskGuid = cmd.GetPDiskGuid(); - - VERIFY_PARAM(IsWardenlessTest); - IsWardenlessTest = cmd.GetIsWardenlessTest(); - - - ui32 idx = 0; - for (const auto& workerCmd : cmd.GetWorkers()) { + : Parent(parent) + , Tag(tag) + , Rng(Now().GetValue()) + // , Report(new TLoadReport()) + { + VERIFY_PARAM(PDiskId); + PDiskId = cmd.GetPDiskId(); + + VERIFY_PARAM(PDiskGuid); + PDiskGuid = cmd.GetPDiskGuid(); + + VERIFY_PARAM(IsWardenlessTest); + IsWardenlessTest = cmd.GetIsWardenlessTest(); + + + ui32 idx = 0; + for (const auto& workerCmd : cmd.GetWorkers()) { Workers.push_back(std::make_unique<TWorker>(workerCmd, idx, &Rng)); - if (IsWardenlessTest) { - Workers.back()->OwnerRound = 1000 + index + idx; - } - ++idx; - } - - VERIFY_PARAM(DurationSeconds); - DurationSeconds = cmd.GetDurationSeconds(); - Y_ASSERT(DurationSeconds > DelayBeforeMeasurements.Seconds()); - // Report->Duration = TDuration::Seconds(DurationSeconds); - - // Monitoring initialization - TVector<float> percentiles {0.1f, 0.5f, 0.9f, 0.99f, 0.999f, 1.0f}; - LoadCounters = counters->GetSubgroup("tag", Sprintf("%" PRIu64, tag))-> - GetSubgroup("pdisk", Sprintf("%09" PRIu32, PDiskId)); - LogBytesWritten = LoadCounters->GetCounter("LogBytesWritten", true); - LogResponseTimes.Initialize(LoadCounters, "subsystem", "LoadActorLogWriteDuration", "Time in microseconds", percentiles); - } - - ~TPDiskLogWriterTestLoadActor() { - LoadCounters->ResetCounters(); - } - - void Bootstrap(const TActorContext& ctx) { - Become(&TPDiskLogWriterTestLoadActor::StateFunc); - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " Schedule PoisonPill"); - ctx.Schedule(TDuration::Seconds(DurationSeconds), new TEvents::TEvPoisonPill); - ctx.Schedule(TDuration::MilliSeconds(MonitoringUpdateCycleMs), new TEvUpdateMonitoring); - - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " Bootstrap, Workers.size# " << Workers.size()); - if (IsWardenlessTest) { - for (auto& worker : Workers) { - AppData(ctx)->Icb->RegisterLocalControl(worker->MaxInFlight, - Sprintf("PDiskWriteLoadActor_MaxInFlight_%04" PRIu64 "_%04" PRIu32, Tag, worker->Idx)); - SendRequest(ctx, worker->GetYardInit(PDiskGuid)); - } - } else { - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " Send TEvRegisterPDiskLoadActor"); - Send(MakeBlobStorageNodeWardenID(ctx.SelfID.NodeId()), new TEvRegisterPDiskLoadActor()); - } - OwnerInitInProgress = Workers.size(); - } - - void Handle(TEvRegisterPDiskLoadActorResult::TPtr& ev, const TActorContext& ctx) { - auto msg = ev->Get(); - - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag - << " TEvRegisterPDiskLoadActorResult recieved, ownerRound# " << (ui32)msg->OwnerRound); - for (auto& worker : Workers) { - worker->OwnerRound = msg->OwnerRound + 1; - SendRequest(ctx, worker->GetYardInit(PDiskGuid)); - } - } - - void Handle(NPDisk::TEvYardInitResult::TPtr& ev, const TActorContext& ctx) { - auto msg = ev->Get(); - if (msg->Status != NKikimrProto::OK) { - TStringStream str; - str << "TEvYardInitResult is not OK, msg.ToString()# " << msg->ToString(); - LOG_ERROR_S(ctx, NKikimrServices::BS_LOAD_TEST, str.Str()); - ctx.Send(Parent, new TEvTestLoadFinished(Tag, nullptr, str.Str())); - Die(ctx); - return; - } - - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " TEvYardInitResult, " - << " Owner# " << (ui32)msg->PDiskParams->Owner - << " OwnerRound# " << msg->PDiskParams->OwnerRound); - - for (auto& worker : Workers) { - if (!worker->PDiskParams) { - worker->PDiskParams = std::move(msg->PDiskParams); - worker->OwnerRound = Max(worker->OwnerRound, worker->PDiskParams->OwnerRound); - auto logRead = worker->GetLogRead(); - Y_VERIFY(logRead); - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " owner# " - << (ui32)worker->PDiskParams->Owner << " going to send first TEvLogRead# " << logRead->ToString()); - SendRequest(ctx, std::move(logRead)); - break; - } - } - } - - void Handle(NPDisk::TEvReadLogResult::TPtr& ev, const TActorContext& ctx) { - auto msg = ev->Get(); - Y_VERIFY(msg->Status == NKikimrProto::OK); - - for (auto& worker : Workers) { - if (worker->PDiskParams && worker->PDiskParams->Owner == msg->Owner) { - if (worker->FindLastWrittenLsn(msg)) { - // Lsn is found - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " owner# " << (ui32)msg->Owner - << " found first Lsn to write# " << worker->Lsn); - - --OwnerInitInProgress; - } else { - auto logRead = worker->GetLogRead(); - Y_VERIFY(logRead); - SendRequest(ctx, std::move(logRead)); - } - break; - } - } - - // All workers is initialized - if (!OwnerInitInProgress) { - TestStartTime = TAppData::TimeProvider->Now(); - if (IsDying) { - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " last TEvReadLogResult, " - << " all workers is initialized, but IsDying# true, so starting death process"); - StartDeathProcess(ctx); - } else { - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " last TEvReadLogResult, " - << " all workers is initialized, start test"); - SendWriteRequests(ctx); - } - } - } - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Death management - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - void HandlePoisonPill(const TActorContext& ctx) { - IsDying = true; - if (OwnerInitInProgress) { - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " HandlePoisonPill, " - << "not all workers is initialized, so wait them to end initialization"); - } else { - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " HandlePoisonPill, " - << "all workers is initialized, so starting death process"); - StartDeathProcess(ctx); - } - } - - void StartDeathProcess(const TActorContext& ctx) { - Become(&TPDiskLogWriterTestLoadActor::StateEndOfWork); - if (IsWardenlessTest) { - for (auto& worker : Workers) { - ++worker->OwnerRound; - worker->PoisonPill(); - } - CheckDie(ctx); - } else { - Send(MakeBlobStorageNodeWardenID(ctx.SelfID.NodeId()), new TEvRegisterPDiskLoadActor()); - } - } - - void HandleEnd(TEvRegisterPDiskLoadActorResult::TPtr& ev, const TActorContext& ctx) { - auto msg = ev->Get(); - - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag - << " TEvRegisterPDiskLoadActorResult recieved, ownerRound# " << msg->OwnerRound); - for (auto& worker : Workers) { - worker->OwnerRound = msg->OwnerRound + 1; - worker->PoisonPill(); - } - CheckDie(ctx); - } - - - void CheckDie(const TActorContext& ctx) { - for (auto& worker : Workers) { - if (worker->CheckDie()) { - SendRequest(ctx, worker->GetYardInit(PDiskGuid)); - } - } - } - - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // State Dying - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - void HandleEnd(NPDisk::TEvYardInitResult::TPtr& ev, const TActorContext& ctx) { - auto msg = ev->Get(); - if (msg->Status != NKikimrProto::OK) { - TStringStream str; - str << "TEvYardInitResult is not OK, msg.ToString()# " << msg->ToString(); - LOG_ERROR_S(ctx, NKikimrServices::BS_LOAD_TEST, str.Str()); - ctx.Send(Parent, new TEvTestLoadFinished(Tag, nullptr, str.Str())); - Die(ctx); - return; - } - - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " end of work, TEvYardInitResult, " - << " Owner# " << (ui32)msg->PDiskParams->Owner - << " OwnerRound# " << msg->PDiskParams->OwnerRound); - - for (auto& worker : Workers) { - if (worker->PDiskParams->Owner == msg->PDiskParams->Owner) { - worker->PDiskParams = std::move(msg->PDiskParams); - worker->OwnerRound = worker->PDiskParams->OwnerRound; - worker->CheckStartingPoints(msg->StartingPoints); - SendRequest(ctx, worker->GetLogRead()); - break; - } - } - } - - void HandleEnd(NPDisk::TEvReadLogResult::TPtr& ev, const TActorContext& ctx) { - auto msg = ev->Get(); - Y_VERIFY(msg->Status == NKikimrProto::OK); - - for (auto& worker : Workers) { - if (worker->PDiskParams->Owner == msg->Owner) { - worker->CheckLogRecords(msg); - if (auto logRead = worker->GetLogRead()) { - SendRequest(ctx, std::move(logRead)); - } else if (auto harakiri = worker->GetHarakiri()) { - ++HarakiriInFlight; - SendRequest(ctx, std::move(harakiri)); - } - break; - } - } - } - - void HandleEnd(NPDisk::TEvHarakiriResult::TPtr& ev, const TActorContext& ctx) { - auto msg = ev->Get(); - if (msg->Status != NKikimrProto::OK) { - TStringStream str; - str << "TEvHarakiriResult is not OK, msg.ToString()# " << msg->ToString(); - LOG_ERROR_S(ctx, NKikimrServices::BS_LOAD_TEST, str.Str()); - ctx.Send(Parent, new TEvTestLoadFinished(Tag, nullptr, str.Str())); - Die(ctx); - return; - } - - Y_VERIFY(HarakiriInFlight); - - if (!--HarakiriInFlight) { - for (auto& worker : Workers) { - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " End of work," - << " owner# " << (ui32)worker->PDiskParams->Owner - << " GetReallyWrittenBytes()# " << worker->GetReallyWrittenBytes() - << " GetGlobalWrittenBytes()# " << worker->GetGlobalWrittenBytes()); - } + if (IsWardenlessTest) { + Workers.back()->OwnerRound = 1000 + index + idx; + } + ++idx; + } + + VERIFY_PARAM(DurationSeconds); + DurationSeconds = cmd.GetDurationSeconds(); + Y_ASSERT(DurationSeconds > DelayBeforeMeasurements.Seconds()); + // Report->Duration = TDuration::Seconds(DurationSeconds); + + // Monitoring initialization + TVector<float> percentiles {0.1f, 0.5f, 0.9f, 0.99f, 0.999f, 1.0f}; + LoadCounters = counters->GetSubgroup("tag", Sprintf("%" PRIu64, tag))-> + GetSubgroup("pdisk", Sprintf("%09" PRIu32, PDiskId)); + LogBytesWritten = LoadCounters->GetCounter("LogBytesWritten", true); + LogResponseTimes.Initialize(LoadCounters, "subsystem", "LoadActorLogWriteDuration", "Time in microseconds", percentiles); + } + + ~TPDiskLogWriterTestLoadActor() { + LoadCounters->ResetCounters(); + } + + void Bootstrap(const TActorContext& ctx) { + Become(&TPDiskLogWriterTestLoadActor::StateFunc); + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " Schedule PoisonPill"); + ctx.Schedule(TDuration::Seconds(DurationSeconds), new TEvents::TEvPoisonPill); + ctx.Schedule(TDuration::MilliSeconds(MonitoringUpdateCycleMs), new TEvUpdateMonitoring); + + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " Bootstrap, Workers.size# " << Workers.size()); + if (IsWardenlessTest) { + for (auto& worker : Workers) { + AppData(ctx)->Icb->RegisterLocalControl(worker->MaxInFlight, + Sprintf("PDiskWriteLoadActor_MaxInFlight_%04" PRIu64 "_%04" PRIu32, Tag, worker->Idx)); + SendRequest(ctx, worker->GetYardInit(PDiskGuid)); + } + } else { + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " Send TEvRegisterPDiskLoadActor"); + Send(MakeBlobStorageNodeWardenID(ctx.SelfID.NodeId()), new TEvRegisterPDiskLoadActor()); + } + OwnerInitInProgress = Workers.size(); + } + + void Handle(TEvRegisterPDiskLoadActorResult::TPtr& ev, const TActorContext& ctx) { + auto msg = ev->Get(); + + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag + << " TEvRegisterPDiskLoadActorResult recieved, ownerRound# " << (ui32)msg->OwnerRound); + for (auto& worker : Workers) { + worker->OwnerRound = msg->OwnerRound + 1; + SendRequest(ctx, worker->GetYardInit(PDiskGuid)); + } + } + + void Handle(NPDisk::TEvYardInitResult::TPtr& ev, const TActorContext& ctx) { + auto msg = ev->Get(); + if (msg->Status != NKikimrProto::OK) { + TStringStream str; + str << "TEvYardInitResult is not OK, msg.ToString()# " << msg->ToString(); + LOG_ERROR_S(ctx, NKikimrServices::BS_LOAD_TEST, str.Str()); + ctx.Send(Parent, new TEvTestLoadFinished(Tag, nullptr, str.Str())); + Die(ctx); + return; + } + + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " TEvYardInitResult, " + << " Owner# " << (ui32)msg->PDiskParams->Owner + << " OwnerRound# " << msg->PDiskParams->OwnerRound); + + for (auto& worker : Workers) { + if (!worker->PDiskParams) { + worker->PDiskParams = std::move(msg->PDiskParams); + worker->OwnerRound = Max(worker->OwnerRound, worker->PDiskParams->OwnerRound); + auto logRead = worker->GetLogRead(); + Y_VERIFY(logRead); + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " owner# " + << (ui32)worker->PDiskParams->Owner << " going to send first TEvLogRead# " << logRead->ToString()); + SendRequest(ctx, std::move(logRead)); + break; + } + } + } + + void Handle(NPDisk::TEvReadLogResult::TPtr& ev, const TActorContext& ctx) { + auto msg = ev->Get(); + Y_VERIFY(msg->Status == NKikimrProto::OK); + + for (auto& worker : Workers) { + if (worker->PDiskParams && worker->PDiskParams->Owner == msg->Owner) { + if (worker->FindLastWrittenLsn(msg)) { + // Lsn is found + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " owner# " << (ui32)msg->Owner + << " found first Lsn to write# " << worker->Lsn); + + --OwnerInitInProgress; + } else { + auto logRead = worker->GetLogRead(); + Y_VERIFY(logRead); + SendRequest(ctx, std::move(logRead)); + } + break; + } + } + + // All workers is initialized + if (!OwnerInitInProgress) { + TestStartTime = TAppData::TimeProvider->Now(); + if (IsDying) { + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " last TEvReadLogResult, " + << " all workers is initialized, but IsDying# true, so starting death process"); + StartDeathProcess(ctx); + } else { + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " last TEvReadLogResult, " + << " all workers is initialized, start test"); + SendWriteRequests(ctx); + } + } + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Death management + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void HandlePoisonPill(const TActorContext& ctx) { + IsDying = true; + if (OwnerInitInProgress) { + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " HandlePoisonPill, " + << "not all workers is initialized, so wait them to end initialization"); + } else { + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " HandlePoisonPill, " + << "all workers is initialized, so starting death process"); + StartDeathProcess(ctx); + } + } + + void StartDeathProcess(const TActorContext& ctx) { + Become(&TPDiskLogWriterTestLoadActor::StateEndOfWork); + if (IsWardenlessTest) { + for (auto& worker : Workers) { + ++worker->OwnerRound; + worker->PoisonPill(); + } + CheckDie(ctx); + } else { + Send(MakeBlobStorageNodeWardenID(ctx.SelfID.NodeId()), new TEvRegisterPDiskLoadActor()); + } + } + + void HandleEnd(TEvRegisterPDiskLoadActorResult::TPtr& ev, const TActorContext& ctx) { + auto msg = ev->Get(); + + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag + << " TEvRegisterPDiskLoadActorResult recieved, ownerRound# " << msg->OwnerRound); + for (auto& worker : Workers) { + worker->OwnerRound = msg->OwnerRound + 1; + worker->PoisonPill(); + } + CheckDie(ctx); + } + + + void CheckDie(const TActorContext& ctx) { + for (auto& worker : Workers) { + if (worker->CheckDie()) { + SendRequest(ctx, worker->GetYardInit(PDiskGuid)); + } + } + } + + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // State Dying + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void HandleEnd(NPDisk::TEvYardInitResult::TPtr& ev, const TActorContext& ctx) { + auto msg = ev->Get(); + if (msg->Status != NKikimrProto::OK) { + TStringStream str; + str << "TEvYardInitResult is not OK, msg.ToString()# " << msg->ToString(); + LOG_ERROR_S(ctx, NKikimrServices::BS_LOAD_TEST, str.Str()); + ctx.Send(Parent, new TEvTestLoadFinished(Tag, nullptr, str.Str())); + Die(ctx); + return; + } + + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " end of work, TEvYardInitResult, " + << " Owner# " << (ui32)msg->PDiskParams->Owner + << " OwnerRound# " << msg->PDiskParams->OwnerRound); + + for (auto& worker : Workers) { + if (worker->PDiskParams->Owner == msg->PDiskParams->Owner) { + worker->PDiskParams = std::move(msg->PDiskParams); + worker->OwnerRound = worker->PDiskParams->OwnerRound; + worker->CheckStartingPoints(msg->StartingPoints); + SendRequest(ctx, worker->GetLogRead()); + break; + } + } + } + + void HandleEnd(NPDisk::TEvReadLogResult::TPtr& ev, const TActorContext& ctx) { + auto msg = ev->Get(); + Y_VERIFY(msg->Status == NKikimrProto::OK); + + for (auto& worker : Workers) { + if (worker->PDiskParams->Owner == msg->Owner) { + worker->CheckLogRecords(msg); + if (auto logRead = worker->GetLogRead()) { + SendRequest(ctx, std::move(logRead)); + } else if (auto harakiri = worker->GetHarakiri()) { + ++HarakiriInFlight; + SendRequest(ctx, std::move(harakiri)); + } + break; + } + } + } + + void HandleEnd(NPDisk::TEvHarakiriResult::TPtr& ev, const TActorContext& ctx) { + auto msg = ev->Get(); + if (msg->Status != NKikimrProto::OK) { + TStringStream str; + str << "TEvHarakiriResult is not OK, msg.ToString()# " << msg->ToString(); + LOG_ERROR_S(ctx, NKikimrServices::BS_LOAD_TEST, str.Str()); + ctx.Send(Parent, new TEvTestLoadFinished(Tag, nullptr, str.Str())); + Die(ctx); + return; + } + + Y_VERIFY(HarakiriInFlight); + + if (!--HarakiriInFlight) { + for (auto& worker : Workers) { + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " End of work," + << " owner# " << (ui32)worker->PDiskParams->Owner + << " GetReallyWrittenBytes()# " << worker->GetReallyWrittenBytes() + << " GetGlobalWrittenBytes()# " << worker->GetGlobalWrittenBytes()); + } auto report = std::make_unique<TLoadReport>(); - report->LoadType = TLoadReport::LOAD_LOG_WRITE; - report->Duration = TAppData::TimeProvider->Now() - TestStartTime; + report->LoadType = TLoadReport::LOAD_LOG_WRITE; + report->Duration = TAppData::TimeProvider->Now() - TestStartTime; ctx.Send(Parent, new TEvTestLoadFinished(Tag, report.release(), "OK")); - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " End of work, TEvTestLoadFinished is sent"); - Die(ctx); - } - - CheckDie(ctx); - } - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Monitoring - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - void Handle(TEvUpdateMonitoring::TPtr& /*ev*/, const TActorContext& ctx) { - LogResponseTimes.Update(); - ctx.Schedule(TDuration::MilliSeconds(MonitoringUpdateCycleMs), new TEvUpdateMonitoring); - } - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Log writing - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - void SendWriteRequests(const TActorContext& ctx) { - LOG_TRACE_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " SendWriteRequests"); - for (auto& worker : Workers) { - auto now = TAppData::TimeProvider->Now(); + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " End of work, TEvTestLoadFinished is sent"); + Die(ctx); + } + + CheckDie(ctx); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Monitoring + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void Handle(TEvUpdateMonitoring::TPtr& /*ev*/, const TActorContext& ctx) { + LogResponseTimes.Update(); + ctx.Schedule(TDuration::MilliSeconds(MonitoringUpdateCycleMs), new TEvUpdateMonitoring); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Log writing + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void SendWriteRequests(const TActorContext& ctx) { + LOG_TRACE_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " SendWriteRequests"); + for (auto& worker : Workers) { + auto now = TAppData::TimeProvider->Now(); while (std::unique_ptr<NPDisk::TEvLog> ev = worker->TrySend(WrittenBytes)) { - *LogBytesWritten += ev->Data.size(); - ev->Cookie = reinterpret_cast<void*>(ReqIdx); - InFlightLogWrites.insert({ReqIdx, {worker->Idx, now, ev->Data.size()}}); - ++ReqIdx; - SendRequest(ctx, std::move(ev)); - } - } - - CheckDie(ctx); - } - - void Handle(NPDisk::TEvLogResult::TPtr& ev, const TActorContext& ctx) { - auto msg = ev->Get(); - if (msg->Status != NKikimrProto::OK) { - TStringStream str; - str << " TEvLogResult is not OK, msg.ToString()# " << msg->ToString(); - LOG_ERROR_S(ctx, NKikimrServices::BS_LOAD_TEST, str.Str()); - ctx.Send(Parent, new TEvTestLoadFinished(Tag, nullptr, str.Str())); - Die(ctx); - return; - } - - auto now = TAppData::TimeProvider->Now(); - for (const auto& res : msg->Results) { - auto it = InFlightLogWrites.find(reinterpret_cast<ui64>(res.Cookie)); - Y_VERIFY(it != InFlightLogWrites.end()); - const auto& stats = it->second; - LogResponseTimes.Increment((now - stats.SentTime).MicroSeconds()); - auto& worker = Workers[stats.WorkerIdx]; - - worker->OnLogResult(res, stats.Size); - WrittenBytes = Max(WrittenBytes, worker->GetGlobalWrittenBytes()); - LOG_TRACE_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " TEvLogResult, " - << " WrittenBytes# " << WrittenBytes); - InFlightLogWrites.erase(it); - } - - SendWriteRequests(ctx); - } - - template<typename TRequest> + *LogBytesWritten += ev->Data.size(); + ev->Cookie = reinterpret_cast<void*>(ReqIdx); + InFlightLogWrites.insert({ReqIdx, {worker->Idx, now, ev->Data.size()}}); + ++ReqIdx; + SendRequest(ctx, std::move(ev)); + } + } + + CheckDie(ctx); + } + + void Handle(NPDisk::TEvLogResult::TPtr& ev, const TActorContext& ctx) { + auto msg = ev->Get(); + if (msg->Status != NKikimrProto::OK) { + TStringStream str; + str << " TEvLogResult is not OK, msg.ToString()# " << msg->ToString(); + LOG_ERROR_S(ctx, NKikimrServices::BS_LOAD_TEST, str.Str()); + ctx.Send(Parent, new TEvTestLoadFinished(Tag, nullptr, str.Str())); + Die(ctx); + return; + } + + auto now = TAppData::TimeProvider->Now(); + for (const auto& res : msg->Results) { + auto it = InFlightLogWrites.find(reinterpret_cast<ui64>(res.Cookie)); + Y_VERIFY(it != InFlightLogWrites.end()); + const auto& stats = it->second; + LogResponseTimes.Increment((now - stats.SentTime).MicroSeconds()); + auto& worker = Workers[stats.WorkerIdx]; + + worker->OnLogResult(res, stats.Size); + WrittenBytes = Max(WrittenBytes, worker->GetGlobalWrittenBytes()); + LOG_TRACE_S(ctx, NKikimrServices::BS_LOAD_TEST, "Tag# " << Tag << " TEvLogResult, " + << " WrittenBytes# " << WrittenBytes); + InFlightLogWrites.erase(it); + } + + SendWriteRequests(ctx); + } + + template<typename TRequest> void SendRequest(const TActorContext& ctx, std::unique_ptr<TRequest>&& request) { ctx.Send(MakeBlobStoragePDiskID(ctx.ExecutorThread.ActorSystem->NodeId, PDiskId), request.release()); - } - - void Handle(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) { - TStringStream str; - HTML(str) { - TABLE() { - TABLEHEAD() { - TABLER() { - TABLEH() { str << "Parameter"; } - TABLEH() { str << "Value"; } - } - } - TABLEBODY() { - - PARAM("Elapsed time / Duration", (TAppData::TimeProvider->Now() - TestStartTime).Seconds() << "s / " - << DurationSeconds << "s"); - for (auto& worker : Workers) { - PARAM("Worker idx", worker->Idx); - PARAM("Worker ReallyWrittenBytes", worker->GetReallyWrittenBytes()); - PARAM("Worker GlobalWrittenBytes", worker->GetGlobalWrittenBytes()); - PARAM("Worker BurstSize", worker->BurstSize); - PARAM("Worker BurstInterval", worker->BurstInterval); - PARAM("Worker BurstIdx", worker->BurstIdx); - PARAM("Worker BurstWrittenBytes", worker->BurstWrittenBytes); - PARAM("Worker next Lsn", worker->Lsn); - PARAM("Worker CutLogLsn", worker->CutLogLsn); - PARAM("Worker StartingPoint", worker->StartingPoint); - - } - } - } - } - - ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str(), ev->Get()->SubRequestId)); - } - - STRICT_STFUNC(StateFunc, - CFunc(TEvents::TSystem::PoisonPill, HandlePoisonPill) - HFunc(TEvRegisterPDiskLoadActorResult, Handle) - HFunc(NPDisk::TEvYardInitResult, Handle) - HFunc(NPDisk::TEvReadLogResult, Handle) - HFunc(TEvUpdateMonitoring, Handle) - HFunc(NPDisk::TEvLogResult, Handle) - HFunc(NMon::TEvHttpInfo, Handle) - ) - - STRICT_STFUNC(StateEndOfWork, - HFunc(NPDisk::TEvYardInitResult, HandleEnd) - HFunc(NPDisk::TEvHarakiriResult, HandleEnd) - HFunc(NPDisk::TEvReadLogResult, HandleEnd) - HFunc(TEvRegisterPDiskLoadActorResult, HandleEnd) - - HFunc(TEvUpdateMonitoring, Handle) - HFunc(NPDisk::TEvLogResult, Handle) - HFunc(NMon::TEvHttpInfo, Handle) - ) -}; - -IActor *CreatePDiskLogWriterTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskLogLoadStart& cmd, + } + + void Handle(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) { + TStringStream str; + HTML(str) { + TABLE() { + TABLEHEAD() { + TABLER() { + TABLEH() { str << "Parameter"; } + TABLEH() { str << "Value"; } + } + } + TABLEBODY() { + + PARAM("Elapsed time / Duration", (TAppData::TimeProvider->Now() - TestStartTime).Seconds() << "s / " + << DurationSeconds << "s"); + for (auto& worker : Workers) { + PARAM("Worker idx", worker->Idx); + PARAM("Worker ReallyWrittenBytes", worker->GetReallyWrittenBytes()); + PARAM("Worker GlobalWrittenBytes", worker->GetGlobalWrittenBytes()); + PARAM("Worker BurstSize", worker->BurstSize); + PARAM("Worker BurstInterval", worker->BurstInterval); + PARAM("Worker BurstIdx", worker->BurstIdx); + PARAM("Worker BurstWrittenBytes", worker->BurstWrittenBytes); + PARAM("Worker next Lsn", worker->Lsn); + PARAM("Worker CutLogLsn", worker->CutLogLsn); + PARAM("Worker StartingPoint", worker->StartingPoint); + + } + } + } + } + + ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str(), ev->Get()->SubRequestId)); + } + + STRICT_STFUNC(StateFunc, + CFunc(TEvents::TSystem::PoisonPill, HandlePoisonPill) + HFunc(TEvRegisterPDiskLoadActorResult, Handle) + HFunc(NPDisk::TEvYardInitResult, Handle) + HFunc(NPDisk::TEvReadLogResult, Handle) + HFunc(TEvUpdateMonitoring, Handle) + HFunc(NPDisk::TEvLogResult, Handle) + HFunc(NMon::TEvHttpInfo, Handle) + ) + + STRICT_STFUNC(StateEndOfWork, + HFunc(NPDisk::TEvYardInitResult, HandleEnd) + HFunc(NPDisk::TEvHarakiriResult, HandleEnd) + HFunc(NPDisk::TEvReadLogResult, HandleEnd) + HFunc(TEvRegisterPDiskLoadActorResult, HandleEnd) + + HFunc(TEvUpdateMonitoring, Handle) + HFunc(NPDisk::TEvLogResult, Handle) + HFunc(NMon::TEvHttpInfo, Handle) + ) +}; + +IActor *CreatePDiskLogWriterTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskLogLoadStart& cmd, const TActorId& parent, const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, ui64 index, ui64 tag) { - return new TPDiskLogWriterTestLoadActor(cmd, parent, counters, index, tag); -} - -} // NKikimr + return new TPDiskLogWriterTestLoadActor(cmd, parent, counters, index, tag); +} + +} // NKikimr diff --git a/ydb/core/blobstorage/testload/test_load_pdisk_read.cpp b/ydb/core/blobstorage/testload/test_load_pdisk_read.cpp index d15ced1b61..3b446a74c2 100644 --- a/ydb/core/blobstorage/testload/test_load_pdisk_read.cpp +++ b/ydb/core/blobstorage/testload/test_load_pdisk_read.cpp @@ -1,4 +1,4 @@ -#include "test_load_actor.h" +#include "test_load_actor.h" #include <ydb/core/base/appdata.h> #include <ydb/core/base/counters.h> #include <ydb/core/control/immediate_control_board_wrapper.h> @@ -6,49 +6,49 @@ #include <ydb/core/blobstorage/pdisk/blobstorage_pdisk.h> #include <ydb/core/blobstorage/base/blobstorage_events.h> #include <library/cpp/monlib/service/pages/templates.h> -#include <util/random/fast.h> -#include <util/generic/queue.h> - -namespace NKikimr { - -class TPDiskReaderTestLoadActor : public TActorBootstrapped<TPDiskReaderTestLoadActor> { - struct TChunkInfo { - TChunkIdx Idx; - ui32 NumSlots; - ui32 SlotSizeBlocks; - ui32 Weight; - ui64 AccumWeight; - - struct TFindByWeight { - bool operator ()(ui64 left, const TChunkInfo& right) const { - return left < right.AccumWeight; - } - }; - }; - - struct TParts : public NPDisk::TEvChunkWrite::IParts { - const void *Buffer; - ui32 Len; - - TParts(const void *buffer, ui32 len) - : Buffer(buffer) - , Len(len) - {} - - TDataRef operator [](ui32 index) const override { - Y_VERIFY(index == 0); - return std::make_pair(Buffer, Len); - } - - ui32 Size() const override { - return 1; - } - }; - - struct TRequestInfo { +#include <util/random/fast.h> +#include <util/generic/queue.h> + +namespace NKikimr { + +class TPDiskReaderTestLoadActor : public TActorBootstrapped<TPDiskReaderTestLoadActor> { + struct TChunkInfo { + TChunkIdx Idx; + ui32 NumSlots; + ui32 SlotSizeBlocks; + ui32 Weight; + ui64 AccumWeight; + + struct TFindByWeight { + bool operator ()(ui64 left, const TChunkInfo& right) const { + return left < right.AccumWeight; + } + }; + }; + + struct TParts : public NPDisk::TEvChunkWrite::IParts { + const void *Buffer; + ui32 Len; + + TParts(const void *buffer, ui32 len) + : Buffer(buffer) + , Len(len) + {} + + TDataRef operator [](ui32 index) const override { + Y_VERIFY(index == 0); + return std::make_pair(Buffer, Len); + } + + ui32 Size() const override { + return 1; + } + }; + + struct TRequestInfo { ui32 Size = 0; TChunkIdx ChunkIdx = 0; - TInstant StartTime; + TInstant StartTime; TRequestInfo(ui32 size, TChunkIdx chunkIdx, TInstant startTime) : Size(size) @@ -58,195 +58,195 @@ class TPDiskReaderTestLoadActor : public TActorBootstrapped<TPDiskReaderTestLoad TRequestInfo(const TRequestInfo &) = default; TRequestInfo() = default; - }; - - struct TRequestStat { - ui64 BytesReadTotal; - ui32 Size; - TDuration Latency; - }; - + }; + + struct TRequestStat { + ui64 BytesReadTotal; + ui32 Size; + TDuration Latency; + }; + THashMap<ui64, TRequestInfo> RequestInfo; ui64 NextRequestIdx = 0; const TActorId Parent; - ui64 Tag; - ui32 DurationSeconds; - ui32 IntervalMsMin = 0; - ui32 IntervalMsMax = 0; - TControlWrapper MaxInFlight; - ui32 InFlight = 0; - ui32 LogInFlight = 0; - ui32 SlotIndex = 0; - TInstant LastRequest; - TInstant TestStartTime = TInstant::Max(); - TInstant MeasurementStartTime = TInstant::Max(); - double IntervalMs = 0; - ui32 PDiskId; - TVDiskID VDiskId; + ui64 Tag; + ui32 DurationSeconds; + ui32 IntervalMsMin = 0; + ui32 IntervalMsMax = 0; + TControlWrapper MaxInFlight; + ui32 InFlight = 0; + ui32 LogInFlight = 0; + ui32 SlotIndex = 0; + TInstant LastRequest; + TInstant TestStartTime = TInstant::Max(); + TInstant MeasurementStartTime = TInstant::Max(); + double IntervalMs = 0; + ui32 PDiskId; + TVDiskID VDiskId; NPDisk::TOwnerRound OwnerRound; - ui64 PDiskGuid; - TIntrusivePtr<TPDiskParams> PDiskParams; + ui64 PDiskGuid; + TIntrusivePtr<TPDiskParams> PDiskParams; TVector<TChunkInfo> Chunks; - TReallyFastRng32 Rng; + TReallyFastRng32 Rng; TString DataBuffer; - ui64 Lsn = 1; + ui64 Lsn = 1; TMultiMap<TInstant, TRequestStat> TimeSeries; TVector<TChunkIdx> DeleteChunks; - bool Sequential; - bool Harakiri = false; + bool Sequential; + bool Harakiri = false; bool IsWardenlessTest; - - // statistics - ui64 ChunkReserve_RequestsSent = 0; - ui64 ChunkRead_RequestsSent = 0; - ui64 ChunkRead_OK = 0; - ui64 ChunkRead_NonOK = 0; - ui64 ChunkWrite_RequestsSent = 0; - ui64 ChunkWrite_OK = 0; - ui64 DeletedChunksCount = 0; - - // Monitoring - NMonitoring::TDynamicCounters::TCounterPtr BytesRead; - NMonitoring::TPercentileTrackerLg<6, 5, 15> ResponseTimes; - - TIntrusivePtr<NMonitoring::TDynamicCounters> LoadCounters; - TIntrusivePtr<TLoadReport> Report; - TIntrusivePtr<NMonitoring::TCounterForPtr> PDiskBytesRead; - TMap<double, TIntrusivePtr<NMonitoring::TCounterForPtr>> DevicePercentiles; - + + // statistics + ui64 ChunkReserve_RequestsSent = 0; + ui64 ChunkRead_RequestsSent = 0; + ui64 ChunkRead_OK = 0; + ui64 ChunkRead_NonOK = 0; + ui64 ChunkWrite_RequestsSent = 0; + ui64 ChunkWrite_OK = 0; + ui64 DeletedChunksCount = 0; + + // Monitoring + NMonitoring::TDynamicCounters::TCounterPtr BytesRead; + NMonitoring::TPercentileTrackerLg<6, 5, 15> ResponseTimes; + + TIntrusivePtr<NMonitoring::TDynamicCounters> LoadCounters; + TIntrusivePtr<TLoadReport> Report; + TIntrusivePtr<NMonitoring::TCounterForPtr> PDiskBytesRead; + TMap<double, TIntrusivePtr<NMonitoring::TCounterForPtr>> DevicePercentiles; + TString ErrorReason; -public: +public: static constexpr auto ActorActivityType() { return NKikimrServices::TActivity::BS_LOAD_PDISK_READ; - } - + } + TPDiskReaderTestLoadActor(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskReadLoadStart& cmd, const TActorId& parent, - const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, ui64 index, ui64 tag) - : Parent(parent) - , Tag(tag) - , MaxInFlight(4, 0, 65536) + const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, ui64 index, ui64 tag) + : Parent(parent) + , Tag(tag) + , MaxInFlight(4, 0, 65536) , OwnerRound(1000 + index) - , Rng(Now().GetValue()) - , Report(new TLoadReport()) - { + , Rng(Now().GetValue()) + , Report(new TLoadReport()) + { ErrorReason = "Still waiting for TEvRegisterPDiskLoadActorResult"; - - VERIFY_PARAM(DurationSeconds); - DurationSeconds = cmd.GetDurationSeconds(); - Y_ASSERT(DurationSeconds > DelayBeforeMeasurements.Seconds()); - Report->Duration = TDuration::Seconds(DurationSeconds); - - IntervalMsMin = cmd.GetIntervalMsMin(); - IntervalMsMax = cmd.GetIntervalMsMax(); - - VERIFY_PARAM(InFlightReads); - MaxInFlight = cmd.GetInFlightReads(); - Report->InFlight = MaxInFlight; - - VERIFY_PARAM(PDiskId); - PDiskId = cmd.GetPDiskId(); - - VERIFY_PARAM(PDiskGuid); - PDiskGuid = cmd.GetPDiskGuid(); - - VERIFY_PARAM(VDiskId); - VDiskId = VDiskIDFromVDiskID(cmd.GetVDiskId()); - - Sequential = cmd.GetSequential(); + + VERIFY_PARAM(DurationSeconds); + DurationSeconds = cmd.GetDurationSeconds(); + Y_ASSERT(DurationSeconds > DelayBeforeMeasurements.Seconds()); + Report->Duration = TDuration::Seconds(DurationSeconds); + + IntervalMsMin = cmd.GetIntervalMsMin(); + IntervalMsMax = cmd.GetIntervalMsMax(); + + VERIFY_PARAM(InFlightReads); + MaxInFlight = cmd.GetInFlightReads(); + Report->InFlight = MaxInFlight; + + VERIFY_PARAM(PDiskId); + PDiskId = cmd.GetPDiskId(); + + VERIFY_PARAM(PDiskGuid); + PDiskGuid = cmd.GetPDiskGuid(); + + VERIFY_PARAM(VDiskId); + VDiskId = VDiskIDFromVDiskID(cmd.GetVDiskId()); + + Sequential = cmd.GetSequential(); IsWardenlessTest = cmd.GetIsWardenlessTest(); - - for (const auto& chunk : cmd.GetChunks()) { - if (!chunk.HasSlots() || !chunk.HasWeight() || !chunk.GetSlots() || !chunk.GetWeight()) { - ythrow TLoadActorException() << "chunk.Slots/Weight fields are either missing or zero"; - } - Chunks.push_back(TChunkInfo{ - 0, - chunk.GetSlots(), - 0, - chunk.GetWeight(), - 0, - }); - } - - // Monitoring initialization - LoadCounters = counters->GetSubgroup("tag", Sprintf("%" PRIu64, tag))-> - GetSubgroup("pdisk", Sprintf("%09" PRIu32, PDiskId)); - BytesRead = LoadCounters->GetCounter("LoadActorBytesRead", true); - TVector<float> percentiles {0.1f, 0.5f, 0.9f, 0.99f, 0.999f, 1.0f}; - ResponseTimes.Initialize(LoadCounters, "subsystem", "LoadActorReadDuration", "Time in microseconds", percentiles); - - TIntrusivePtr<NMonitoring::TDynamicCounters> pDiskCounters = GetServiceCounters(counters, "pdisks")-> - GetSubgroup("pdisk", Sprintf("%09" PRIu32, PDiskId)); - PDiskBytesRead = pDiskCounters->GetSubgroup("subsystem", "device")->GetCounter("DeviceBytesRead", true); - TIntrusivePtr<NMonitoring::TDynamicCounters> percentilesGroup; - percentilesGroup = pDiskCounters->GetSubgroup("subsystem", "deviceReadDuration")->GetSubgroup("sensor", "Time in microsec"); - for (double percentile : {0.1, 0.5, 0.9, 0.99, 0.999, 1.0}) { - DevicePercentiles.emplace(percentile, percentilesGroup->GetNamedCounter("percentile", - Sprintf("%.1f", percentile * 100.f))); - } - - if (Chunks.empty()) { - ythrow TLoadActorException() << "Chunks may not be empty"; - } - } - - ~TPDiskReaderTestLoadActor() { - LoadCounters->ResetCounters(); - } - - void Bootstrap(const TActorContext& ctx) { - Become(&TPDiskReaderTestLoadActor::StateFunc); - ctx.Schedule(TDuration::Seconds(DurationSeconds), new TEvents::TEvPoisonPill()); - ctx.Schedule(TDuration::MilliSeconds(MonitoringUpdateCycleMs), new TEvUpdateMonitoring); - AppData(ctx)->Icb->RegisterLocalControl(MaxInFlight, Sprintf("PDiskReadLoadActor_MaxInFlight_%4" PRIu64, Tag).c_str()); + + for (const auto& chunk : cmd.GetChunks()) { + if (!chunk.HasSlots() || !chunk.HasWeight() || !chunk.GetSlots() || !chunk.GetWeight()) { + ythrow TLoadActorException() << "chunk.Slots/Weight fields are either missing or zero"; + } + Chunks.push_back(TChunkInfo{ + 0, + chunk.GetSlots(), + 0, + chunk.GetWeight(), + 0, + }); + } + + // Monitoring initialization + LoadCounters = counters->GetSubgroup("tag", Sprintf("%" PRIu64, tag))-> + GetSubgroup("pdisk", Sprintf("%09" PRIu32, PDiskId)); + BytesRead = LoadCounters->GetCounter("LoadActorBytesRead", true); + TVector<float> percentiles {0.1f, 0.5f, 0.9f, 0.99f, 0.999f, 1.0f}; + ResponseTimes.Initialize(LoadCounters, "subsystem", "LoadActorReadDuration", "Time in microseconds", percentiles); + + TIntrusivePtr<NMonitoring::TDynamicCounters> pDiskCounters = GetServiceCounters(counters, "pdisks")-> + GetSubgroup("pdisk", Sprintf("%09" PRIu32, PDiskId)); + PDiskBytesRead = pDiskCounters->GetSubgroup("subsystem", "device")->GetCounter("DeviceBytesRead", true); + TIntrusivePtr<NMonitoring::TDynamicCounters> percentilesGroup; + percentilesGroup = pDiskCounters->GetSubgroup("subsystem", "deviceReadDuration")->GetSubgroup("sensor", "Time in microsec"); + for (double percentile : {0.1, 0.5, 0.9, 0.99, 0.999, 1.0}) { + DevicePercentiles.emplace(percentile, percentilesGroup->GetNamedCounter("percentile", + Sprintf("%.1f", percentile * 100.f))); + } + + if (Chunks.empty()) { + ythrow TLoadActorException() << "Chunks may not be empty"; + } + } + + ~TPDiskReaderTestLoadActor() { + LoadCounters->ResetCounters(); + } + + void Bootstrap(const TActorContext& ctx) { + Become(&TPDiskReaderTestLoadActor::StateFunc); + ctx.Schedule(TDuration::Seconds(DurationSeconds), new TEvents::TEvPoisonPill()); + ctx.Schedule(TDuration::MilliSeconds(MonitoringUpdateCycleMs), new TEvUpdateMonitoring); + AppData(ctx)->Icb->RegisterLocalControl(MaxInFlight, Sprintf("PDiskReadLoadActor_MaxInFlight_%4" PRIu64, Tag).c_str()); if (IsWardenlessTest) { ErrorReason = "Still waiting for YardInitResult"; SendRequest(ctx, std::make_unique<NPDisk::TEvYardInit>(OwnerRound, VDiskId, PDiskGuid)); } else { Send(MakeBlobStorageNodeWardenID(ctx.SelfID.NodeId()), new TEvRegisterPDiskLoadActor()); } - } - - void Handle(TEvRegisterPDiskLoadActorResult::TPtr& ev, const TActorContext& ctx) { - OwnerRound = ev->Get()->OwnerRound; + } + + void Handle(TEvRegisterPDiskLoadActorResult::TPtr& ev, const TActorContext& ctx) { + OwnerRound = ev->Get()->OwnerRound; ErrorReason = "Still waiting for YardInitResult"; SendRequest(ctx, std::make_unique<NPDisk::TEvYardInit>(OwnerRound, VDiskId, PDiskGuid)); - } - - void Handle(NPDisk::TEvYardInitResult::TPtr& ev, const TActorContext& ctx) { - auto msg = ev->Get(); - if (msg->Status != NKikimrProto::OK) { + } + + void Handle(NPDisk::TEvYardInitResult::TPtr& ev, const TActorContext& ctx) { + auto msg = ev->Get(); + if (msg->Status != NKikimrProto::OK) { TStringStream str; str << "yard init failed, Status# " << NKikimrProto::EReplyStatus_Name(msg->Status); ErrorReason = str.Str(); LOG_INFO(ctx, NKikimrServices::BS_LOAD_TEST, "%s", str.Str().c_str()); SendRequest(ctx, std::make_unique<TEvents::TEvPoisonPill>()); - return; - } + return; + } ErrorReason = "OK"; - PDiskParams = msg->PDiskParams; + PDiskParams = msg->PDiskParams; DataBuffer = TString::Uninitialized(PDiskParams->ChunkSize); char *data = const_cast<char*>(DataBuffer.data()); - for (ui32 i = 0; i < PDiskParams->ChunkSize; ++i) { - data[i] = Rng(); - } - for (TChunkInfo& chunk : Chunks) { - chunk.SlotSizeBlocks = PDiskParams->ChunkSize / PDiskParams->AppendBlockSize / chunk.NumSlots; - } - StartAllReservations(ctx); - } - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Chunk reservation - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - void StartAllReservations(const TActorContext& ctx) { + for (ui32 i = 0; i < PDiskParams->ChunkSize; ++i) { + data[i] = Rng(); + } + for (TChunkInfo& chunk : Chunks) { + chunk.SlotSizeBlocks = PDiskParams->ChunkSize / PDiskParams->AppendBlockSize / chunk.NumSlots; + } + StartAllReservations(ctx); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Chunk reservation + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void StartAllReservations(const TActorContext& ctx) { SendRequest(ctx, std::make_unique<NPDisk::TEvChunkReserve>(PDiskParams->Owner, - PDiskParams->OwnerRound, (ui32)Chunks.size())); - ++ChunkReserve_RequestsSent; - } - + PDiskParams->OwnerRound, (ui32)Chunks.size())); + ++ChunkReserve_RequestsSent; + } + ui64 NewTRequestInfo(ui32 size, TChunkIdx chunkIdx, TInstant startTime) { ui64 requestIdx = NextRequestIdx; RequestInfo[requestIdx] = TRequestInfo(size, chunkIdx, startTime); @@ -254,340 +254,340 @@ public: return requestIdx; } - void Handle(NPDisk::TEvChunkReserveResult::TPtr& ev, const TActorContext& ctx) { - auto msg = ev->Get(); - if (msg->Status == NKikimrProto::OK) { - for (ui32 i =0; i < Chunks.size(); ++i) { - TChunkIdx chunkIdx = msg->ChunkIds[i]; - Chunks[i].Idx = chunkIdx; + void Handle(NPDisk::TEvChunkReserveResult::TPtr& ev, const TActorContext& ctx) { + auto msg = ev->Get(); + if (msg->Status == NKikimrProto::OK) { + for (ui32 i =0; i < Chunks.size(); ++i) { + TChunkIdx chunkIdx = msg->ChunkIds[i]; + Chunks[i].Idx = chunkIdx; ui64 requestIdx = NewTRequestInfo((ui32)DataBuffer.size(), chunkIdx, TAppData::TimeProvider->Now()); TString tmp = DataBuffer; SendRequest(ctx, std::make_unique<NPDisk::TEvChunkWrite>(PDiskParams->Owner, PDiskParams->OwnerRound, chunkIdx, 0u, new NPDisk::TEvChunkWrite::TStrokaBackedUpParts(tmp), reinterpret_cast<void*>(requestIdx), true, NPriWrite::HullHugeAsyncBlob, Sequential)); - ++ChunkWrite_RequestsSent; - } - } else { - Die(ctx); - } - } - - void Handle(NPDisk::TEvChunkWriteResult::TPtr& ev, const TActorContext& ctx) { - auto msg = ev->Get(); - if (msg->Status == NKikimrProto::OK) { + ++ChunkWrite_RequestsSent; + } + } else { + Die(ctx); + } + } + + void Handle(NPDisk::TEvChunkWriteResult::TPtr& ev, const TActorContext& ctx) { + auto msg = ev->Get(); + if (msg->Status == NKikimrProto::OK) { ui64 requestIdx = reinterpret_cast<ui64>(msg->Cookie); SendLogRequest(ctx, requestIdx, msg->ChunkIdx); - ++ChunkWrite_OK; - } else { + ++ChunkWrite_OK; + } else { TStringStream str; str << "Chunk writing failed, loader going to die, Status# " << NKikimrProto::EReplyStatus_Name(msg->Status); ErrorReason = str.Str(); LOG_INFO(ctx, NKikimrServices::BS_LOAD_TEST, "%s", str.Str().c_str()); SendRequest(ctx, std::make_unique<TEvents::TEvPoisonPill>()); - } - if (ChunkWrite_OK == Chunks.size()) { - TestStartTime = TAppData::TimeProvider->Now(); - MeasurementStartTime = TestStartTime + DelayBeforeMeasurements; - - SendReadRequests(ctx); - } - } - - - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Rate management - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - void HandleWakeup(const TActorContext& ctx) { - SendReadRequests(ctx); - } - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Death management - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - void HandlePoisonPill(const TActorContext& ctx) { - Report->LoadType = TLoadReport::LOAD_READ; - MaxInFlight = 0; - CheckDie(ctx); - } - - void CheckDie(const TActorContext& ctx) { - if (!MaxInFlight && !InFlight && !LogInFlight && !Harakiri) { - if (PDiskParams) { + } + if (ChunkWrite_OK == Chunks.size()) { + TestStartTime = TAppData::TimeProvider->Now(); + MeasurementStartTime = TestStartTime + DelayBeforeMeasurements; + + SendReadRequests(ctx); + } + } + + + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Rate management + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void HandleWakeup(const TActorContext& ctx) { + SendReadRequests(ctx); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Death management + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void HandlePoisonPill(const TActorContext& ctx) { + Report->LoadType = TLoadReport::LOAD_READ; + MaxInFlight = 0; + CheckDie(ctx); + } + + void CheckDie(const TActorContext& ctx) { + if (!MaxInFlight && !InFlight && !LogInFlight && !Harakiri) { + if (PDiskParams) { SendRequest(ctx, std::make_unique<NPDisk::TEvHarakiri>(PDiskParams->Owner, PDiskParams->OwnerRound)); - Harakiri = true; - } else { + Harakiri = true; + } else { ctx.Send(Parent, new TEvTestLoadFinished(Tag, nullptr, ErrorReason)); - Die(ctx); - } - } - } - - void Handle(NPDisk::TEvHarakiriResult::TPtr& /*ev*/, const TActorContext& ctx) { + Die(ctx); + } + } + } + + void Handle(NPDisk::TEvHarakiriResult::TPtr& /*ev*/, const TActorContext& ctx) { ctx.Send(Parent, new TEvTestLoadFinished(Tag, Report, ErrorReason)); - Die(ctx); - } - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Monitoring - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - void Handle(TEvUpdateMonitoring::TPtr& ev, const TActorContext& ctx) { - Y_UNUSED(ev); - ctx.Schedule(TDuration::MilliSeconds(MonitoringUpdateCycleMs), new TEvUpdateMonitoring); - ResponseTimes.Update(); - - const TInstant now = TAppData::TimeProvider->Now(); - if (now > MeasurementStartTime) { - auto begin = TimeSeries.lower_bound(now - TDuration::MilliSeconds(MonitoringUpdateCycleMs)); - if (begin != TimeSeries.end()) { - auto end = std::prev(TimeSeries.lower_bound(now)); - if (end != begin) { - //double seconds = 1; - ui64 speedBps = (end->second.BytesReadTotal - begin->second.BytesReadTotal) / - TDuration::MilliSeconds(MonitoringUpdateCycleMs).SecondsFloat(); - Report->RwSpeedBps.push_back(speedBps); - } else { - Report->RwSpeedBps.push_back(0); - } - } - } - } - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Chunk reading - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - void SendReadRequests(const TActorContext& ctx) { - while (InFlight < MaxInFlight) { - // Randomize interval (if required) - if (!IntervalMs && IntervalMsMax && IntervalMsMin) { - IntervalMs = IntervalMsMin; - if (ui32 delta = (IntervalMsMax > IntervalMsMin? IntervalMsMax - IntervalMsMin: 0)) { - IntervalMs += Rng() % delta; - } - } - - if (IntervalMs) { - // Enforce intervals between requests + Die(ctx); + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Monitoring + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void Handle(TEvUpdateMonitoring::TPtr& ev, const TActorContext& ctx) { + Y_UNUSED(ev); + ctx.Schedule(TDuration::MilliSeconds(MonitoringUpdateCycleMs), new TEvUpdateMonitoring); + ResponseTimes.Update(); + + const TInstant now = TAppData::TimeProvider->Now(); + if (now > MeasurementStartTime) { + auto begin = TimeSeries.lower_bound(now - TDuration::MilliSeconds(MonitoringUpdateCycleMs)); + if (begin != TimeSeries.end()) { + auto end = std::prev(TimeSeries.lower_bound(now)); + if (end != begin) { + //double seconds = 1; + ui64 speedBps = (end->second.BytesReadTotal - begin->second.BytesReadTotal) / + TDuration::MilliSeconds(MonitoringUpdateCycleMs).SecondsFloat(); + Report->RwSpeedBps.push_back(speedBps); + } else { + Report->RwSpeedBps.push_back(0); + } + } + } + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Chunk reading + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void SendReadRequests(const TActorContext& ctx) { + while (InFlight < MaxInFlight) { + // Randomize interval (if required) + if (!IntervalMs && IntervalMsMax && IntervalMsMin) { + IntervalMs = IntervalMsMin; + if (ui32 delta = (IntervalMsMax > IntervalMsMin? IntervalMsMax - IntervalMsMin: 0)) { + IntervalMs += Rng() % delta; + } + } + + if (IntervalMs) { + // Enforce intervals between requests TInstant now = TAppData::TimeProvider->Now(); - TInstant nextRequest = LastRequest + TDuration::MilliSeconds(IntervalMs); - if (now < nextRequest) { - // Suspend sending until interval will elapse - ctx.Schedule(nextRequest - now, new TEvents::TEvWakeup); - break; - } - LastRequest = now; - IntervalMs = 0; // To enforce regeneration of new random interval - } - - // Prepare to send request - ui64 accumWeight = 0; - for (TChunkInfo& chunkInfo : Chunks) { - chunkInfo.AccumWeight = accumWeight; - accumWeight += chunkInfo.Weight; - } - if (!accumWeight) { - break; - } - - ui64 w = (ui64(Rng()) << 32 | Rng()) % accumWeight; - auto it = std::prev(std::upper_bound(Chunks.begin(), Chunks.end(), w, TChunkInfo::TFindByWeight())); - TChunkInfo& chunkInfo = *it; - - TChunkIdx chunkIdx = chunkInfo.Idx; - if (Sequential) { - SlotIndex = (SlotIndex + 1) % chunkInfo.NumSlots; - } else { - SlotIndex = Rng() % chunkInfo.NumSlots; - } - - ui32 size = chunkInfo.SlotSizeBlocks * PDiskParams->AppendBlockSize; - Report->Size = size; - ui32 offset = SlotIndex * size; + TInstant nextRequest = LastRequest + TDuration::MilliSeconds(IntervalMs); + if (now < nextRequest) { + // Suspend sending until interval will elapse + ctx.Schedule(nextRequest - now, new TEvents::TEvWakeup); + break; + } + LastRequest = now; + IntervalMs = 0; // To enforce regeneration of new random interval + } + + // Prepare to send request + ui64 accumWeight = 0; + for (TChunkInfo& chunkInfo : Chunks) { + chunkInfo.AccumWeight = accumWeight; + accumWeight += chunkInfo.Weight; + } + if (!accumWeight) { + break; + } + + ui64 w = (ui64(Rng()) << 32 | Rng()) % accumWeight; + auto it = std::prev(std::upper_bound(Chunks.begin(), Chunks.end(), w, TChunkInfo::TFindByWeight())); + TChunkInfo& chunkInfo = *it; + + TChunkIdx chunkIdx = chunkInfo.Idx; + if (Sequential) { + SlotIndex = (SlotIndex + 1) % chunkInfo.NumSlots; + } else { + SlotIndex = Rng() % chunkInfo.NumSlots; + } + + ui32 size = chunkInfo.SlotSizeBlocks * PDiskParams->AppendBlockSize; + Report->Size = size; + ui32 offset = SlotIndex * size; ui64 requestIdx = NewTRequestInfo(size, chunkIdx, TAppData::TimeProvider->Now()); SendRequest(ctx, std::make_unique<NPDisk::TEvChunkRead>(PDiskParams->Owner, PDiskParams->OwnerRound, chunkIdx, offset, size, ui8(0), reinterpret_cast<void*>(requestIdx))); - ++ChunkRead_RequestsSent; - - ++InFlight; - } - - CheckDie(ctx); - } - - void Handle(NPDisk::TEvChunkReadResult::TPtr& ev, const TActorContext& ctx) { - auto msg = ev->Get(); - if (msg->Status == NKikimrProto::OK) { - ++ChunkRead_OK; - } else { - ++ChunkRead_NonOK; - } + ++ChunkRead_RequestsSent; + + ++InFlight; + } + + CheckDie(ctx); + } + + void Handle(NPDisk::TEvChunkReadResult::TPtr& ev, const TActorContext& ctx) { + auto msg = ev->Get(); + if (msg->Status == NKikimrProto::OK) { + ++ChunkRead_OK; + } else { + ++ChunkRead_NonOK; + } ui64 requestIdx = reinterpret_cast<ui64>(msg->Cookie); *BytesRead += RequestInfo[requestIdx].Size; - + FinishRequest(ctx, requestIdx); - - CheckDie(ctx); - } - + + CheckDie(ctx); + } + void SendLogRequest(const TActorContext& ctx, ui64 requestIdx, TChunkIdx chunkIdx) { - TString logRecord = "Hello, my dear log! I've just written a chunk!"; - NPDisk::TCommitRecord record; - record.CommitChunks.push_back(chunkIdx); - TLsnSeg seg(Lsn, Lsn); - ++Lsn; + TString logRecord = "Hello, my dear log! I've just written a chunk!"; + NPDisk::TCommitRecord record; + record.CommitChunks.push_back(chunkIdx); + TLsnSeg seg(Lsn, Lsn); + ++Lsn; SendRequest(ctx, std::make_unique<NPDisk::TEvLog>(PDiskParams->Owner, PDiskParams->OwnerRound, - TLogSignature::SignatureHugeLogoBlob, record, logRecord, seg, + TLogSignature::SignatureHugeLogoBlob, record, logRecord, seg, reinterpret_cast<void*>(requestIdx))); - ++LogInFlight; - } - - void Handle(NPDisk::TEvLogResult::TPtr& ev, const TActorContext& ctx) { - auto msg = ev->Get(); - for (const auto& res : msg->Results) { + ++LogInFlight; + } + + void Handle(NPDisk::TEvLogResult::TPtr& ev, const TActorContext& ctx) { + auto msg = ev->Get(); + for (const auto& res : msg->Results) { ui64 requestIdx = reinterpret_cast<ui64>(res.Cookie); RequestInfo.erase(requestIdx); - --LogInFlight; - } - - CheckDie(ctx); - } - + --LogInFlight; + } + + CheckDie(ctx); + } + void FinishRequest(const TActorContext& ctx, ui64 requestIdx) { - TInstant now = TAppData::TimeProvider->Now(); - + TInstant now = TAppData::TimeProvider->Now(); + TRequestInfo *request = &RequestInfo[requestIdx]; - if (now > MeasurementStartTime) { + if (now > MeasurementStartTime) { Report->LatencyUs.Increment((now - request->StartTime).MicroSeconds()); - for(const auto& perc : DevicePercentiles) { - Report->DeviceLatency[perc.first] = Max(Report->DeviceLatency[perc.first], (ui64)*perc.second); - } - } - - TimeSeries.emplace(now, TRequestStat{ - static_cast<ui64>(*BytesRead), // current state of bytes read counter - request->Size, - now - request->StartTime - }); - ResponseTimes.Increment((now - request->StartTime).MicroSeconds()); - + for(const auto& perc : DevicePercentiles) { + Report->DeviceLatency[perc.first] = Max(Report->DeviceLatency[perc.first], (ui64)*perc.second); + } + } + + TimeSeries.emplace(now, TRequestStat{ + static_cast<ui64>(*BytesRead), // current state of bytes read counter + request->Size, + now - request->StartTime + }); + ResponseTimes.Increment((now - request->StartTime).MicroSeconds()); + RequestInfo.erase(requestIdx); - // cut time series to 60 seconds - auto pos = TimeSeries.upper_bound(now - TDuration::Seconds(60)); - TimeSeries.erase(TimeSeries.begin(), pos); - --InFlight; - SendReadRequests(ctx); - } - - template<typename TRequest> + // cut time series to 60 seconds + auto pos = TimeSeries.upper_bound(now - TDuration::Seconds(60)); + TimeSeries.erase(TimeSeries.begin(), pos); + --InFlight; + SendReadRequests(ctx); + } + + template<typename TRequest> void SendRequest(const TActorContext& ctx, std::unique_ptr<TRequest>&& request) { ctx.Send(MakeBlobStoragePDiskID(ctx.ExecutorThread.ActorSystem->NodeId, PDiskId), request.release()); - } - - void Handle(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) { - TStringStream str; -#define PARAM(NAME, VALUE) \ - TABLER() { \ - TABLED() { str << NAME; } \ - TABLED() { str << VALUE; } \ - } + } + + void Handle(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) { + TStringStream str; +#define PARAM(NAME, VALUE) \ + TABLER() { \ + TABLED() { str << NAME; } \ + TABLED() { str << VALUE; } \ + } TMap<ui32, TVector<TDuration>> latmap; - for (const auto& pair : TimeSeries) { - const TRequestStat& stat = pair.second; - latmap[stat.Size].push_back(stat.Latency); - } - HTML(str) { - TABLE() { - TABLEHEAD() { - TABLER() { - TABLEH() { str << "Parameter"; } - TABLEH() { str << "Value"; } - } - } - TABLEBODY() { - PARAM("Elapsed time / Duration", (TAppData::TimeProvider->Now() - TestStartTime).Seconds() << "s / " - << DurationSeconds << "s"); - PARAM("Current InFlight", InFlight); - PARAM("TEvChunkRead msgs sent", ChunkRead_RequestsSent); - PARAM("TEvChunkReadResult msgs received, OK", ChunkRead_OK); - PARAM("TEvChunkReadResult msgs received, not OK", ChunkRead_NonOK); - PARAM("Bytes Read", (i64)*BytesRead); - PARAM("Number of deleted chunks", DeletedChunksCount); - if (PDiskParams) { - PARAM("Owner", PDiskParams->Owner); - PARAM("Chunk size", PDiskParams->ChunkSize); - PARAM("Append block size", PDiskParams->AppendBlockSize); - } - - for (ui32 dt : {5, 10, 15, 20, 60}) { - TInstant now = TAppData::TimeProvider->Now(); - auto it = TimeSeries.upper_bound(now - TDuration::Seconds(dt)); - if (it != TimeSeries.begin()) { - --it; - } - if (it != TimeSeries.end()) { - auto end = std::prev(TimeSeries.end()); - if (end != it) { - double seconds = (end->first - it->first).GetValue() * 1e-6; - double speed = (end->second.BytesReadTotal - it->second.BytesReadTotal) / seconds; - speed /= 1e6; - PARAM("Average read speed at last " << dt << " seconds, MB/s", Sprintf("%.3f", speed)); - } - } - } - - for (auto& pair : latmap) { - str << "<br/>"; + for (const auto& pair : TimeSeries) { + const TRequestStat& stat = pair.second; + latmap[stat.Size].push_back(stat.Latency); + } + HTML(str) { + TABLE() { + TABLEHEAD() { + TABLER() { + TABLEH() { str << "Parameter"; } + TABLEH() { str << "Value"; } + } + } + TABLEBODY() { + PARAM("Elapsed time / Duration", (TAppData::TimeProvider->Now() - TestStartTime).Seconds() << "s / " + << DurationSeconds << "s"); + PARAM("Current InFlight", InFlight); + PARAM("TEvChunkRead msgs sent", ChunkRead_RequestsSent); + PARAM("TEvChunkReadResult msgs received, OK", ChunkRead_OK); + PARAM("TEvChunkReadResult msgs received, not OK", ChunkRead_NonOK); + PARAM("Bytes Read", (i64)*BytesRead); + PARAM("Number of deleted chunks", DeletedChunksCount); + if (PDiskParams) { + PARAM("Owner", PDiskParams->Owner); + PARAM("Chunk size", PDiskParams->ChunkSize); + PARAM("Append block size", PDiskParams->AppendBlockSize); + } + + for (ui32 dt : {5, 10, 15, 20, 60}) { + TInstant now = TAppData::TimeProvider->Now(); + auto it = TimeSeries.upper_bound(now - TDuration::Seconds(dt)); + if (it != TimeSeries.begin()) { + --it; + } + if (it != TimeSeries.end()) { + auto end = std::prev(TimeSeries.end()); + if (end != it) { + double seconds = (end->first - it->first).GetValue() * 1e-6; + double speed = (end->second.BytesReadTotal - it->second.BytesReadTotal) / seconds; + speed /= 1e6; + PARAM("Average read speed at last " << dt << " seconds, MB/s", Sprintf("%.3f", speed)); + } + } + } + + for (auto& pair : latmap) { + str << "<br/>"; TVector<TDuration>& latencies = pair.second; - std::sort(latencies.begin(), latencies.end()); - for (double percentile : {0.5, 0.9, 0.95, 0.99, 0.999, 1.0}) { - TDuration value = latencies[size_t(percentile * (latencies.size() - 1))]; - PARAM(Sprintf("Size# %" PRIu32 " Percentile# %.3f", pair.first, percentile), value); - } - } - PARAM("Average speed since start, MB/s", Report->GetAverageSpeed() / 1e6); - PARAM("Speed standard deviation since start, MB/s", Report->GetSpeedDeviation() / 1e6); - for (double percentile : {0.5, 0.9, 0.95, 0.99, 0.999, 1.0}) { + std::sort(latencies.begin(), latencies.end()); + for (double percentile : {0.5, 0.9, 0.95, 0.99, 0.999, 1.0}) { + TDuration value = latencies[size_t(percentile * (latencies.size() - 1))]; + PARAM(Sprintf("Size# %" PRIu32 " Percentile# %.3f", pair.first, percentile), value); + } + } + PARAM("Average speed since start, MB/s", Report->GetAverageSpeed() / 1e6); + PARAM("Speed standard deviation since start, MB/s", Report->GetSpeedDeviation() / 1e6); + for (double percentile : {0.5, 0.9, 0.95, 0.99, 0.999, 1.0}) { size_t value = Report->LatencyUs.GetPercentile(percentile); - PARAM(Sprintf("percentile# %.3f since start, ms", percentile), value / 1000.0); - } - for(const auto& perc : DevicePercentiles) { - PARAM(Sprintf("Device percentile# %.3f for last 15 seconds, ms", perc.first), (TAtomicBase)*perc.second); - } - - PARAM("PDiskBytesRead (Solomon counter), MB", (ui64)*PDiskBytesRead / 1e6); - } - } - } - - ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str(), ev->Get()->SubRequestId)); - } - - STRICT_STFUNC(StateFunc, - CFunc(TEvents::TSystem::Wakeup, HandleWakeup) - CFunc(TEvents::TSystem::PoisonPill, HandlePoisonPill) - HFunc(TEvRegisterPDiskLoadActorResult, Handle) - HFunc(NPDisk::TEvYardInitResult, Handle) - HFunc(NPDisk::TEvHarakiriResult, Handle) - HFunc(TEvUpdateMonitoring, Handle) - HFunc(NPDisk::TEvChunkReserveResult, Handle) - HFunc(NPDisk::TEvChunkReadResult, Handle) - HFunc(NPDisk::TEvChunkWriteResult, Handle) - HFunc(NPDisk::TEvLogResult, Handle) - HFunc(NMon::TEvHttpInfo, Handle) - ) -}; - -IActor *CreatePDiskReaderTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskReadLoadStart& cmd, + PARAM(Sprintf("percentile# %.3f since start, ms", percentile), value / 1000.0); + } + for(const auto& perc : DevicePercentiles) { + PARAM(Sprintf("Device percentile# %.3f for last 15 seconds, ms", perc.first), (TAtomicBase)*perc.second); + } + + PARAM("PDiskBytesRead (Solomon counter), MB", (ui64)*PDiskBytesRead / 1e6); + } + } + } + + ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str(), ev->Get()->SubRequestId)); + } + + STRICT_STFUNC(StateFunc, + CFunc(TEvents::TSystem::Wakeup, HandleWakeup) + CFunc(TEvents::TSystem::PoisonPill, HandlePoisonPill) + HFunc(TEvRegisterPDiskLoadActorResult, Handle) + HFunc(NPDisk::TEvYardInitResult, Handle) + HFunc(NPDisk::TEvHarakiriResult, Handle) + HFunc(TEvUpdateMonitoring, Handle) + HFunc(NPDisk::TEvChunkReserveResult, Handle) + HFunc(NPDisk::TEvChunkReadResult, Handle) + HFunc(NPDisk::TEvChunkWriteResult, Handle) + HFunc(NPDisk::TEvLogResult, Handle) + HFunc(NMon::TEvHttpInfo, Handle) + ) +}; + +IActor *CreatePDiskReaderTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskReadLoadStart& cmd, const TActorId& parent, const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, - ui64 index, ui64 tag) { - return new TPDiskReaderTestLoadActor(cmd, parent, counters, index, tag); -} - -} // NKikimr + ui64 index, ui64 tag) { + return new TPDiskReaderTestLoadActor(cmd, parent, counters, index, tag); +} + +} // NKikimr diff --git a/ydb/core/blobstorage/testload/test_load_pdisk_write.cpp b/ydb/core/blobstorage/testload/test_load_pdisk_write.cpp index cd69d9c44c..8ea16e014c 100644 --- a/ydb/core/blobstorage/testload/test_load_pdisk_write.cpp +++ b/ydb/core/blobstorage/testload/test_load_pdisk_write.cpp @@ -47,7 +47,7 @@ class TPDiskWriterTestLoadActor : public TActorBootstrapped<TPDiskWriterTestLoad ui32 Size; TChunkIdx ChunkIdx; TInstant StartTime; - TInstant LogStartTime; + TInstant LogStartTime; bool DataWritten; bool LogWritten; @@ -80,9 +80,9 @@ class TPDiskWriterTestLoadActor : public TActorBootstrapped<TPDiskWriterTestLoad ui32 DurationSeconds; ui32 IntervalMsMin = 0; ui32 IntervalMsMax = 0; - TControlWrapper MaxInFlight; + TControlWrapper MaxInFlight; ui32 InFlight = 0; - ui32 LogInFlight = 0; + ui32 LogInFlight = 0; TInstant LastRequest; ui32 IntervalMs = 0; ui32 PDiskId; @@ -105,9 +105,9 @@ class TPDiskWriterTestLoadActor : public TActorBootstrapped<TPDiskWriterTestLoad bool IsWardenlessTest; bool Harakiri = false; - TInstant TestStartTime; - TInstant MeasurementStartTime; - + TInstant TestStartTime; + TInstant MeasurementStartTime; + // statistics ui64 ChunkWrite_RequestsSent = 0; ui64 ChunkWrite_OK = 0; @@ -115,43 +115,43 @@ class TPDiskWriterTestLoadActor : public TActorBootstrapped<TPDiskWriterTestLoad ui64 ChunkReserve_RequestsSent = 0; ui64 DeletedChunksCount = 0; - // Monitoring - TIntrusivePtr<NMonitoring::TDynamicCounters> LoadCounters; - NMonitoring::TDynamicCounters::TCounterPtr BytesWritten; - NMonitoring::TDynamicCounters::TCounterPtr LogEntriesWritten; - NMonitoring::TPercentileTrackerLg<6, 5, 15> ResponseTimes; - NMonitoring::TPercentileTrackerLg<6, 5, 15> LogResponseTimes; - - TIntrusivePtr<TLoadReport> Report; - TIntrusivePtr<NMonitoring::TCounterForPtr> PDiskBytesWritten; - TMap<double, TIntrusivePtr<NMonitoring::TCounterForPtr>> DevicePercentiles; - + // Monitoring + TIntrusivePtr<NMonitoring::TDynamicCounters> LoadCounters; + NMonitoring::TDynamicCounters::TCounterPtr BytesWritten; + NMonitoring::TDynamicCounters::TCounterPtr LogEntriesWritten; + NMonitoring::TPercentileTrackerLg<6, 5, 15> ResponseTimes; + NMonitoring::TPercentileTrackerLg<6, 5, 15> LogResponseTimes; + + TIntrusivePtr<TLoadReport> Report; + TIntrusivePtr<NMonitoring::TCounterForPtr> PDiskBytesWritten; + TMap<double, TIntrusivePtr<NMonitoring::TCounterForPtr>> DevicePercentiles; + public: static constexpr auto ActorActivityType() { return NKikimrServices::TActivity::BS_LOAD_PDISK_WRITE; } TPDiskWriterTestLoadActor(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskLoadStart& cmd, const TActorId& parent, - const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, ui64 index, ui64 tag) + const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, ui64 index, ui64 tag) : Parent(parent) - , Tag(tag) - , MaxInFlight(4, 0, 65536) + , Tag(tag) + , MaxInFlight(4, 0, 65536) , OwnerRound(1000 + index) , Rng(Now().GetValue()) - , Report(new TLoadReport()) + , Report(new TLoadReport()) { VERIFY_PARAM(DurationSeconds); DurationSeconds = cmd.GetDurationSeconds(); - Y_ASSERT(DurationSeconds > DelayBeforeMeasurements.Seconds()); - Report->Duration = TDuration::Seconds(DurationSeconds); + Y_ASSERT(DurationSeconds > DelayBeforeMeasurements.Seconds()); + Report->Duration = TDuration::Seconds(DurationSeconds); IntervalMsMin = cmd.GetIntervalMsMin(); IntervalMsMax = cmd.GetIntervalMsMax(); VERIFY_PARAM(InFlightWrites); MaxInFlight = cmd.GetInFlightWrites(); - Report->InFlight = MaxInFlight; + Report->InFlight = MaxInFlight; VERIFY_PARAM(PDiskId); PDiskId = cmd.GetPDiskId(); @@ -182,62 +182,62 @@ public: }); } - // Monitoring initialization - LoadCounters = counters->GetSubgroup("tag", Sprintf("%" PRIu64, tag))-> - GetSubgroup("pdisk", Sprintf("%09" PRIu32, PDiskId)); - BytesWritten = LoadCounters->GetCounter("LoadActorBytesWritten", true); - LogEntriesWritten = LoadCounters->GetCounter("LoadActorLogEntriesWritten", true); - TVector<float> percentiles {0.1f, 0.5f, 0.9f, 0.99f, 0.999f, 1.0f}; - ResponseTimes.Initialize(LoadCounters, "subsystem", "LoadActorWriteDuration", "Time in microseconds", percentiles); - LogResponseTimes.Initialize(LoadCounters, "subsystem", "LoadActorLogWriteDuration", "Time in microseconds", percentiles); - - TIntrusivePtr<NMonitoring::TDynamicCounters> pDiskCounters = GetServiceCounters(counters, "pdisks")-> - GetSubgroup("pdisk", Sprintf("%09" PRIu32, PDiskId)); - PDiskBytesWritten = pDiskCounters->GetSubgroup("subsystem", "device")->GetCounter("DeviceBytesWritten", true); - TIntrusivePtr<NMonitoring::TDynamicCounters> percentilesGroup; - percentilesGroup = pDiskCounters-> GetSubgroup("subsystem", "deviceWriteDuration")->GetSubgroup("sensor", "Time in microsec"); - for (double percentile : {0.1, 0.5, 0.9, 0.99, 0.999, 1.0}) { - DevicePercentiles.emplace(percentile, percentilesGroup->GetNamedCounter("percentile", - Sprintf("%.1f", percentile * 100.f))); - } - - + // Monitoring initialization + LoadCounters = counters->GetSubgroup("tag", Sprintf("%" PRIu64, tag))-> + GetSubgroup("pdisk", Sprintf("%09" PRIu32, PDiskId)); + BytesWritten = LoadCounters->GetCounter("LoadActorBytesWritten", true); + LogEntriesWritten = LoadCounters->GetCounter("LoadActorLogEntriesWritten", true); + TVector<float> percentiles {0.1f, 0.5f, 0.9f, 0.99f, 0.999f, 1.0f}; + ResponseTimes.Initialize(LoadCounters, "subsystem", "LoadActorWriteDuration", "Time in microseconds", percentiles); + LogResponseTimes.Initialize(LoadCounters, "subsystem", "LoadActorLogWriteDuration", "Time in microseconds", percentiles); + + TIntrusivePtr<NMonitoring::TDynamicCounters> pDiskCounters = GetServiceCounters(counters, "pdisks")-> + GetSubgroup("pdisk", Sprintf("%09" PRIu32, PDiskId)); + PDiskBytesWritten = pDiskCounters->GetSubgroup("subsystem", "device")->GetCounter("DeviceBytesWritten", true); + TIntrusivePtr<NMonitoring::TDynamicCounters> percentilesGroup; + percentilesGroup = pDiskCounters-> GetSubgroup("subsystem", "deviceWriteDuration")->GetSubgroup("sensor", "Time in microsec"); + for (double percentile : {0.1, 0.5, 0.9, 0.99, 0.999, 1.0}) { + DevicePercentiles.emplace(percentile, percentilesGroup->GetNamedCounter("percentile", + Sprintf("%.1f", percentile * 100.f))); + } + + if (Chunks.empty()) { ythrow TLoadActorException() << "Chunks may not be empty"; } } - ~TPDiskWriterTestLoadActor() { - LoadCounters->ResetCounters(); - } - + ~TPDiskWriterTestLoadActor() { + LoadCounters->ResetCounters(); + } + void Bootstrap(const TActorContext& ctx) { Become(&TPDiskWriterTestLoadActor::StateFunc); ctx.Schedule(TDuration::Seconds(DurationSeconds), new TEvents::TEvPoisonPill); - ctx.Schedule(TDuration::MilliSeconds(MonitoringUpdateCycleMs), new TEvUpdateMonitoring); - AppData(ctx)->Icb->RegisterLocalControl(MaxInFlight, Sprintf("PDiskWriteLoadActor_MaxInFlight_%4" PRIu64, Tag).c_str()); + ctx.Schedule(TDuration::MilliSeconds(MonitoringUpdateCycleMs), new TEvUpdateMonitoring); + AppData(ctx)->Icb->RegisterLocalControl(MaxInFlight, Sprintf("PDiskWriteLoadActor_MaxInFlight_%4" PRIu64, Tag).c_str()); if (IsWardenlessTest) { SendRequest(ctx, std::make_unique<NPDisk::TEvYardInit>(OwnerRound, VDiskId, PDiskGuid)); } else { Send(MakeBlobStorageNodeWardenID(ctx.SelfID.NodeId()), new TEvRegisterPDiskLoadActor()); } - } - - void Handle(TEvRegisterPDiskLoadActorResult::TPtr& ev, const TActorContext& ctx) { - OwnerRound = ev->Get()->OwnerRound; + } + + void Handle(TEvRegisterPDiskLoadActorResult::TPtr& ev, const TActorContext& ctx) { + OwnerRound = ev->Get()->OwnerRound; SendRequest(ctx, std::make_unique<NPDisk::TEvYardInit>(OwnerRound, VDiskId, PDiskGuid)); } void Handle(NPDisk::TEvYardInitResult::TPtr& ev, const TActorContext& ctx) { auto msg = ev->Get(); - if (msg->Status != NKikimrProto::OK) { + if (msg->Status != NKikimrProto::OK) { TStringStream str; str << "yard init failed, Status# " << NKikimrProto::EReplyStatus_Name(msg->Status); LOG_INFO(ctx, NKikimrServices::BS_LOAD_TEST, "%s", str.Str().c_str()); ctx.Send(Parent, new TEvTestLoadFinished(Tag, nullptr, str.Str())); - Die(ctx); - return; - } + Die(ctx); + return; + } PDiskParams = msg->PDiskParams; DataBuffer.Resize(PDiskParams->ChunkSize); char *data = DataBuffer.data(); @@ -247,8 +247,8 @@ public: for (TChunkInfo& chunk : Chunks) { chunk.SlotSizeBlocks = PDiskParams->ChunkSize / PDiskParams->AppendBlockSize / chunk.NumSlots; } - TestStartTime = TAppData::TimeProvider->Now(); - MeasurementStartTime = TestStartTime + DelayBeforeMeasurements; + TestStartTime = TAppData::TimeProvider->Now(); + MeasurementStartTime = TestStartTime + DelayBeforeMeasurements; CheckForReserve(ctx); } @@ -323,20 +323,20 @@ public: //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void HandlePoisonPill(const TActorContext& ctx) { - Report->LoadType = TLoadReport::LOAD_WRITE; + Report->LoadType = TLoadReport::LOAD_WRITE; MaxInFlight = 0; CheckDie(ctx); } void CheckDie(const TActorContext& ctx) { - if (!MaxInFlight && !InFlight && !LogInFlight && !Harakiri) { - if (PDiskParams) { + if (!MaxInFlight && !InFlight && !LogInFlight && !Harakiri) { + if (PDiskParams) { SendRequest(ctx, std::make_unique<NPDisk::TEvHarakiri>(PDiskParams->Owner, PDiskParams->OwnerRound)); - Harakiri = true; - } else { - ctx.Send(Parent, new TEvTestLoadFinished(Tag, Report, "OK, but can't send TEvHarakiri to PDisk")); - Die(ctx); - } + Harakiri = true; + } else { + ctx.Send(Parent, new TEvTestLoadFinished(Tag, Report, "OK, but can't send TEvHarakiri to PDisk")); + Die(ctx); + } } } @@ -346,32 +346,32 @@ public: } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Monitoring - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - void Handle(TEvUpdateMonitoring::TPtr& ev, const TActorContext& ctx) { - Y_UNUSED(ev); - ctx.Schedule(TDuration::MilliSeconds(MonitoringUpdateCycleMs), new TEvUpdateMonitoring); - ResponseTimes.Update(); - LogResponseTimes.Update(); - - const TInstant now = TAppData::TimeProvider->Now(); - if (now > MeasurementStartTime) { - auto begin = TimeSeries.lower_bound(now - TDuration::MilliSeconds(MonitoringUpdateCycleMs)); - if (begin != TimeSeries.end()) { - auto end = std::prev(TimeSeries.lower_bound(now)); - if (end != begin) { - ui64 speedBps = (end->second.BytesWrittenTotal - begin->second.BytesWrittenTotal) / - TDuration::MilliSeconds(MonitoringUpdateCycleMs).SecondsFloat(); - Report->RwSpeedBps.push_back(speedBps); - } else { - Report->RwSpeedBps.push_back(0); - } - } - } - } - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Monitoring + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + void Handle(TEvUpdateMonitoring::TPtr& ev, const TActorContext& ctx) { + Y_UNUSED(ev); + ctx.Schedule(TDuration::MilliSeconds(MonitoringUpdateCycleMs), new TEvUpdateMonitoring); + ResponseTimes.Update(); + LogResponseTimes.Update(); + + const TInstant now = TAppData::TimeProvider->Now(); + if (now > MeasurementStartTime) { + auto begin = TimeSeries.lower_bound(now - TDuration::MilliSeconds(MonitoringUpdateCycleMs)); + if (begin != TimeSeries.end()) { + auto end = std::prev(TimeSeries.lower_bound(now)); + if (end != begin) { + ui64 speedBps = (end->second.BytesWrittenTotal - begin->second.BytesWrittenTotal) / + TDuration::MilliSeconds(MonitoringUpdateCycleMs).SecondsFloat(); + Report->RwSpeedBps.push_back(speedBps); + } else { + Report->RwSpeedBps.push_back(0); + } + } + } + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Chunk writing //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -422,9 +422,9 @@ public: chunkInfo.WriteQueue.pop_front(); ui32 size = chunkInfo.SlotSizeBlocks * PDiskParams->AppendBlockSize; - Report->Size = size; + Report->Size = size; ui32 offset = slotIndex * size; - const TInstant now = TAppData::TimeProvider->Now(); + const TInstant now = TAppData::TimeProvider->Now(); // like the parallel mode, but log is treated already written bool isLogWritten = (LogMode == NKikimrBlobStorage::TEvTestLoadRequest::LOG_NONE); ui64 requestIdx = NewTRequestInfo(size, chunkIdx, now, now, false, isLogWritten); @@ -455,7 +455,7 @@ public: ui64 requestIdx = reinterpret_cast<ui64>(msg->Cookie); TRequestInfo *info = &RequestInfo[requestIdx]; info->DataWritten = true; - *BytesWritten += info->Size; + *BytesWritten += info->Size; if (info->LogWritten) { // both data and log are written, this could happen only in LOG_PARALLEL mode; this request is done @@ -481,20 +481,20 @@ public: TLsnSeg seg(Lsn, Lsn); ++Lsn; SendRequest(ctx, std::make_unique<NPDisk::TEvLog>(PDiskParams->Owner, PDiskParams->OwnerRound, - TLogSignature::SignatureHugeLogoBlob, record, logRecord, seg, + TLogSignature::SignatureHugeLogoBlob, record, logRecord, seg, reinterpret_cast<void*>(requestIdx))); - ++LogInFlight; + ++LogInFlight; } void Handle(NPDisk::TEvLogResult::TPtr& ev, const TActorContext& ctx) { auto msg = ev->Get(); - TInstant now = TAppData::TimeProvider->Now(); + TInstant now = TAppData::TimeProvider->Now(); for (const auto& res : msg->Results) { ui64 requestIdx = reinterpret_cast<ui64>(res.Cookie); TRequestInfo *info = &RequestInfo[requestIdx]; info->LogWritten = true; - *LogEntriesWritten += 1; - LogResponseTimes.Increment((now - info->LogStartTime).MicroSeconds()); + *LogEntriesWritten += 1; + LogResponseTimes.Increment((now - info->LogStartTime).MicroSeconds()); if (info->DataWritten) { // both data and log are written, complete request and send another one if possible FinishRequest(ctx, requestIdx); @@ -502,7 +502,7 @@ public: // log is written, but data is not; this is parallel mode and this request will be deleted in chunk write // completion handler } - --LogInFlight; + --LogInFlight; } CheckDie(ctx); @@ -511,20 +511,20 @@ public: void FinishRequest(const TActorContext& ctx, ui64 requestIdx) { TInstant now = TAppData::TimeProvider->Now(); TRequestInfo *request = &RequestInfo[requestIdx]; - - if (now > MeasurementStartTime) { + + if (now > MeasurementStartTime) { Report->LatencyUs.Increment((now - request->StartTime).MicroSeconds()); - for(const auto& perc : DevicePercentiles) { - Report->DeviceLatency[perc.first] = Max(Report->DeviceLatency[perc.first], (ui64)*perc.second); - } - } - + for(const auto& perc : DevicePercentiles) { + Report->DeviceLatency[perc.first] = Max(Report->DeviceLatency[perc.first], (ui64)*perc.second); + } + } + TimeSeries.emplace(now, TRequestStat{ - static_cast<ui64>(*BytesWritten), // current state of bytes written counter + static_cast<ui64>(*BytesWritten), // current state of bytes written counter request->Size, now - request->StartTime }); - ResponseTimes.Increment((now - request->StartTime).MicroSeconds()); + ResponseTimes.Increment((now - request->StartTime).MicroSeconds()); // cut time series to 60 seconds auto pos = TimeSeries.upper_bound(now - TDuration::Seconds(60)); TimeSeries.erase(TimeSeries.begin(), pos); @@ -570,17 +570,17 @@ public: } } TABLEBODY() { - - PARAM("Elapsed time / Duration", (TAppData::TimeProvider->Now() - TestStartTime).Seconds() << "s / " - << DurationSeconds << "s"); + + PARAM("Elapsed time / Duration", (TAppData::TimeProvider->Now() - TestStartTime).Seconds() << "s / " + << DurationSeconds << "s"); PARAM("TEvChunkWrite msgs sent", ChunkWrite_RequestsSent); PARAM("TEvChunkWriteResult msgs received, OK", ChunkWrite_OK); PARAM("TEvChunkWriteResult msgs received, not OK", ChunkWrite_NonOK); PARAM("TEvChunkReserve msgs sent", ChunkReserve_RequestsSent); - PARAM("Bytes written", static_cast<ui64>(*BytesWritten)); + PARAM("Bytes written", static_cast<ui64>(*BytesWritten)); PARAM("Number of deleted chunks", DeletedChunksCount); if (PDiskParams) { - PARAM("Owner", PDiskParams->Owner); + PARAM("Owner", PDiskParams->Owner); PARAM("Chunk size", PDiskParams->ChunkSize); PARAM("Append block size", PDiskParams->AppendBlockSize); } @@ -596,7 +596,7 @@ public: if (end != it) { double seconds = (end->first - it->first).GetValue() * 1e-6; double speed = (end->second.BytesWrittenTotal - it->second.BytesWrittenTotal) / seconds; - speed /= 1e6; + speed /= 1e6; PARAM("Average write speed at last " << dt << " seconds, MB/s", Sprintf("%.3f", speed)); } } @@ -611,12 +611,12 @@ public: PARAM(Sprintf("Size# %" PRIu32 " Percentile# %.3f", pair.first, percentile), value); } } - PARAM("Average speed since start, MB/s", Report->GetAverageSpeed() / 1e6); - PARAM("Speed standard deviation since start, MB/s", Report->GetSpeedDeviation() / 1e6); - for (double percentile : {0.5, 0.9, 0.95, 0.99, 0.999, 1.0}) { + PARAM("Average speed since start, MB/s", Report->GetAverageSpeed() / 1e6); + PARAM("Speed standard deviation since start, MB/s", Report->GetSpeedDeviation() / 1e6); + for (double percentile : {0.5, 0.9, 0.95, 0.99, 0.999, 1.0}) { size_t value = Report->LatencyUs.GetPercentile(percentile); - PARAM(Sprintf("percentile# %.3f since start, ms", percentile), value / 1000.0); - } + PARAM(Sprintf("percentile# %.3f since start, ms", percentile), value / 1000.0); + } } } } @@ -627,10 +627,10 @@ public: STRICT_STFUNC(StateFunc, CFunc(TEvents::TSystem::Wakeup, HandleWakeup) CFunc(TEvents::TSystem::PoisonPill, HandlePoisonPill) - HFunc(TEvRegisterPDiskLoadActorResult, Handle) + HFunc(TEvRegisterPDiskLoadActorResult, Handle) HFunc(NPDisk::TEvYardInitResult, Handle) HFunc(NPDisk::TEvHarakiriResult, Handle) - HFunc(TEvUpdateMonitoring, Handle) + HFunc(TEvUpdateMonitoring, Handle) HFunc(NPDisk::TEvChunkWriteResult, Handle) HFunc(NPDisk::TEvChunkReserveResult, Handle) HFunc(NPDisk::TEvLogResult, Handle) @@ -640,7 +640,7 @@ public: IActor *CreatePDiskWriterTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TPDiskLoadStart& cmd, const TActorId& parent, const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, ui64 index, ui64 tag) { - return new TPDiskWriterTestLoadActor(cmd, parent, counters, index, tag); + return new TPDiskWriterTestLoadActor(cmd, parent, counters, index, tag); } } // NKikimr diff --git a/ydb/core/blobstorage/testload/test_load_quantile.h b/ydb/core/blobstorage/testload/test_load_quantile.h index 98ccb4ff25..0d0f0207c6 100644 --- a/ydb/core/blobstorage/testload/test_load_quantile.h +++ b/ydb/core/blobstorage/testload/test_load_quantile.h @@ -4,7 +4,7 @@ #include "test_load_time_series.h" #include <library/cpp/monlib/dynamic_counters/counters.h> - + namespace NKikimr { template<typename T> @@ -13,13 +13,13 @@ namespace NKikimr { using TItem = typename TTimeSeries<T>::TItem; using TTimeSeries<T>::Items; - using TPercentile = std::pair<float, NMonitoring::TDynamicCounters::TCounterPtr>; - using TPercentiles = TVector<TPercentile>; - - TPercentiles Percentiles; - TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; - NMonitoring::TDynamicCounters::TCounterPtr Samples; - + using TPercentile = std::pair<float, NMonitoring::TDynamicCounters::TCounterPtr>; + using TPercentiles = TVector<TPercentile>; + + TPercentiles Percentiles; + TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; + NMonitoring::TDynamicCounters::TCounterPtr Samples; + struct TCompareTimestamp { bool operator ()(const TItem& x, TInstant y) { return x.Timestamp < y; @@ -29,47 +29,47 @@ namespace NKikimr { public: using TTimeSeries<T>::TTimeSeries; - TQuantileTracker(TDuration lifetime, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, - const TString& metric, const TVector<float>& percentiles) - : TTimeSeries<T>(lifetime) - , Counters(counters) - { - Y_VERIFY(Counters); - Samples = Counters->GetCounter("samples", false); - for (auto perc : percentiles) { - auto subgroup = Counters->GetSubgroup("percentile", Sprintf("%.1f", perc * 100.f)); - Percentiles.emplace_back(perc, subgroup->GetCounter(metric, false)); - } - } - - void CalculateQuantiles() const { - Y_VERIFY(Counters); - - *Samples = Items.size(); - - if (Items.empty()) { - for (auto& perc : Percentiles) { - *perc.second = T(); - } - return; - } - - // create a vector of values matching time criterion - TVector<T> values; - values.reserve(Items.size()); - for (const TItem &item : Items) { - values.push_back(item.Value); - } - - // sort and calculate quantiles - std::sort(values.begin(), values.end()); - const size_t maxIndex = values.size() - 1; - for (auto& perc : Percentiles) { - const size_t index = Min<size_t>(maxIndex, maxIndex * perc.first); - *perc.second = values[index]; - } - } - + TQuantileTracker(TDuration lifetime, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, + const TString& metric, const TVector<float>& percentiles) + : TTimeSeries<T>(lifetime) + , Counters(counters) + { + Y_VERIFY(Counters); + Samples = Counters->GetCounter("samples", false); + for (auto perc : percentiles) { + auto subgroup = Counters->GetSubgroup("percentile", Sprintf("%.1f", perc * 100.f)); + Percentiles.emplace_back(perc, subgroup->GetCounter(metric, false)); + } + } + + void CalculateQuantiles() const { + Y_VERIFY(Counters); + + *Samples = Items.size(); + + if (Items.empty()) { + for (auto& perc : Percentiles) { + *perc.second = T(); + } + return; + } + + // create a vector of values matching time criterion + TVector<T> values; + values.reserve(Items.size()); + for (const TItem &item : Items) { + values.push_back(item.Value); + } + + // sort and calculate quantiles + std::sort(values.begin(), values.end()); + const size_t maxIndex = values.size() - 1; + for (auto& perc : Percentiles) { + const size_t index = Min<size_t>(maxIndex, maxIndex * perc.first); + *perc.second = values[index]; + } + } + bool CalculateQuantiles(size_t count, const size_t *numerators, size_t denominator, T *res, size_t *numSamples = nullptr, TDuration *interval = nullptr) const { if (numSamples) { diff --git a/ydb/core/blobstorage/testload/test_load_vdisk_write.cpp b/ydb/core/blobstorage/testload/test_load_vdisk_write.cpp index ae9966b404..39ecca9dbd 100644 --- a/ydb/core/blobstorage/testload/test_load_vdisk_write.cpp +++ b/ydb/core/blobstorage/testload/test_load_vdisk_write.cpp @@ -25,7 +25,7 @@ namespace NKikimr { {}; const TActorId ParentActorId; - const ui64 Tag; + const ui64 Tag; const TIntrusivePtr<TBlobStorageGroupInfo> Info; const TVDiskID VDiskId; @@ -37,7 +37,7 @@ namespace NKikimr { const ui32 Channel; const ui32 Generation; - const ui64 DurationSeconds; + const ui64 DurationSeconds; const ui32 InFlightPutsMax; const ui64 InFlightPutBytesMax; @@ -48,8 +48,8 @@ namespace NKikimr { TIntervalGenerator PutIntervalGenerator; TIntervalGenerator CollectIntervalGenerator; - TInstant StartTime; - + TInstant StartTime; + bool IsConnected = false; ui32 CollectStep = 0; @@ -61,9 +61,9 @@ namespace NKikimr { ui32 BlobCookie = 0; TInstant NextWriteRequestTimestamp; ui32 InFlightPuts = 0; - ui32 TEvVPutsSent = 0; + ui32 TEvVPutsSent = 0; ui64 InFlightPutBytes = 0; - ui64 BytesWritten = 0; + ui64 BytesWritten = 0; TMap<ui64, ui32> InFlightRequests; ui64 PutCookie = 1; bool EvTryToIssuePutsScheduled = false; @@ -78,7 +78,7 @@ namespace NKikimr { TVDiskLoadActor(const NKikimrBlobStorage::TEvTestLoadRequest::TVDiskLoadStart& cmd, const NActors::TActorId& parent, ui64 tag) : ParentActorId(parent) - , Tag(tag) + , Tag(tag) , Info(TBlobStorageGroupInfo::Parse(cmd.GetGroupInfo(), nullptr, nullptr)) , VDiskId(VDiskIDFromVDiskID(cmd.GetVDiskId())) , VDiskActorId(Info->GetActorId(VDiskId)) @@ -86,7 +86,7 @@ namespace NKikimr { , TabletId(cmd.GetTabletId()) , Channel(cmd.GetChannel()) , Generation(cmd.GetGeneration()) - , DurationSeconds(cmd.GetDurationSeconds()) + , DurationSeconds(cmd.GetDurationSeconds()) , InFlightPutsMax(cmd.GetInFlightPutsMax()) , InFlightPutBytesMax(cmd.GetInFlightPutBytesMax()) , PutHandleClass(cmd.GetPutHandleClass()) @@ -98,11 +98,11 @@ namespace NKikimr { } void Bootstrap(const TActorContext& ctx) { - LOG_INFO(ctx, NKikimrServices::BS_LOAD_TEST, "Load actor starter, erasure# %s", + LOG_INFO(ctx, NKikimrServices::BS_LOAD_TEST, "Load actor starter, erasure# %s", GType.ToString().data()); Become(&TVDiskLoadActor::StateFunc); - StartTime = TAppData::TimeProvider->Now(); - ctx.Schedule(TDuration::Seconds(DurationSeconds), new TEvents::TEvPoisonPill); + StartTime = TAppData::TimeProvider->Now(); + ctx.Schedule(TDuration::Seconds(DurationSeconds), new TEvents::TEvPoisonPill); CreateQueueBackpressure(ctx); } @@ -190,7 +190,7 @@ namespace NKikimr { auto ev = std::make_unique<TEvBlobStorage::TEvVPut>(logoBlobId, parts.Parts[logoBlobId.PartId() - 1].OwnedString, VDiskId, true, &cookie, TInstant::Max(), PutHandleClass); ctx.Send(QueueActorId, ev.release()); - ++TEvVPutsSent; + ++TEvVPutsSent; } void HandleTryToIssuePuts(const TActorContext& ctx) { @@ -210,9 +210,9 @@ namespace NKikimr { --InFlightPuts; InFlightPutBytes -= size; - if (record.GetStatus() == NKikimrProto::OK) { - BytesWritten += size; - } + if (record.GetStatus() == NKikimrProto::OK) { + BytesWritten += size; + } TryToIssuePuts(ctx); } @@ -266,16 +266,16 @@ namespace NKikimr { void Handle(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) { TStringStream str; -#define NAMED_PARAM(NAME, PARAM) \ - TABLER() { \ - TABLED() { \ - str << NAME; \ - } \ - TABLED() { \ - str << PARAM; \ - } \ - } - +#define NAMED_PARAM(NAME, PARAM) \ + TABLER() { \ + TABLED() { \ + str << NAME; \ + } \ + TABLED() { \ + str << PARAM; \ + } \ + } + #define PARAM(NAME) \ TABLER() { \ TABLED() { \ @@ -299,8 +299,8 @@ namespace NKikimr { } } TABLEBODY() { - NAMED_PARAM("Elapsed time / Duration", (TAppData::TimeProvider->Now() - StartTime).Seconds() - << "s / " << DurationSeconds << "s"); + NAMED_PARAM("Elapsed time / Duration", (TAppData::TimeProvider->Now() - StartTime).Seconds() + << "s / " << DurationSeconds << "s"); PARAM(TabletId) PARAM(Channel) PARAM(Generation) @@ -315,11 +315,11 @@ namespace NKikimr { PARAM(IsConnected) PARAM(VDiskId) PARAM(VDiskActorId) - PARAM(BytesWritten) - PARAM(TEvVPutsSent) - TString avgSpeed = Sprintf("%.3lf %s", (double) BytesWritten / (1 << 20) / - (TAppData::TimeProvider->Now() - StartTime).Seconds(), "MB/s"); - NAMED_PARAM("Average speed", avgSpeed); + PARAM(BytesWritten) + PARAM(TEvVPutsSent) + TString avgSpeed = Sprintf("%.3lf %s", (double) BytesWritten / (1 << 20) / + (TAppData::TimeProvider->Now() - StartTime).Seconds(), "MB/s"); + NAMED_PARAM("Average speed", avgSpeed); } } } @@ -341,7 +341,7 @@ namespace NKikimr { IActor *CreateVDiskWriterTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TVDiskLoadStart& cmd, const NActors::TActorId& parent, ui64 tag) { - return new TVDiskLoadActor(cmd, parent, tag); + return new TVDiskLoadActor(cmd, parent, tag); } } // NKikimr diff --git a/ydb/core/blobstorage/testload/test_load_write.cpp b/ydb/core/blobstorage/testload/test_load_write.cpp index 6a8f751743..3169c1609e 100644 --- a/ydb/core/blobstorage/testload/test_load_write.cpp +++ b/ydb/core/blobstorage/testload/test_load_write.cpp @@ -3,14 +3,14 @@ #include "test_load_interval_gen.h" #include "test_load_quantile.h" #include "test_load_speed.h" - + #include <ydb/core/util/yverify_stream.h> #include <ydb/core/util/lz4_data_generator.h> - + #include <google/protobuf/text_format.h> - + #include <library/cpp/monlib/service/pages/templates.h> - + #include <util/datetime/cputimer.h> #include <util/generic/queue.h> #include <util/generic/set.h> @@ -79,53 +79,53 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo } }; - struct TReqInfo { - TDuration SendTime; - TEvBlobStorage::EEv EvType; - ui64 Size; - NKikimrBlobStorage::EPutHandleClass PutHandleClass; - }; - + struct TReqInfo { + TDuration SendTime; + TEvBlobStorage::EEv EvType; + ui64 Size; + NKikimrBlobStorage::EPutHandleClass PutHandleClass; + }; + class TTabletWriter { - using TLatencyTrackerUs = NMonitoring::TPercentileTrackerLg<5, 5, 10>; - static_assert(TLatencyTrackerUs::TRACKER_LIMIT >= 100e6, - "TLatencyTrackerUs must have limit grater than 100 second"); - - const TVector<float> Percentiles{0.1, 0.15, 0.5, 0.9, 0.99, 0.999, 1.0}; - - const TDuration ExposePeriod = TDuration::Seconds(10); - - TIntrusivePtr<NMonitoring::TDynamicCounters> TagCounters; + using TLatencyTrackerUs = NMonitoring::TPercentileTrackerLg<5, 5, 10>; + static_assert(TLatencyTrackerUs::TRACKER_LIMIT >= 100e6, + "TLatencyTrackerUs must have limit grater than 100 second"); + + const TVector<float> Percentiles{0.1, 0.15, 0.5, 0.9, 0.99, 0.999, 1.0}; + + const TDuration ExposePeriod = TDuration::Seconds(10); + + TIntrusivePtr<NMonitoring::TDynamicCounters> TagCounters; TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; TWakeupQueue& WakeupQueue; TQueryDispatcher& QueryDispatcher; const ui64 TabletId; const ui32 Channel; ui32 Generation; - ui32 GarbageCollectStep; + ui32 GarbageCollectStep; ui32 WriteStep; ui32 Cookie; - ui32 GroupBlockRetries; + ui32 GroupBlockRetries; const ui32 GroupId; const NKikimrBlobStorage::EPutHandleClass PutHandleClass; TSizeGenerator WriteSizeGen; TIntervalGenerator WriteIntervalGen; - TIntervalGenerator GarbageCollectIntervalGen; + TIntervalGenerator GarbageCollectIntervalGen; TInstant NextWriteTimestamp; ui32 WritesInFlight = 0; ui64 WriteBytesInFlight = 0; const ui32 MaxWritesInFlight; const ui64 MaxWriteBytesInFlight; - const ui64 MaxTotalBytesWritten; + const ui64 MaxTotalBytesWritten; const bool Soft; ui64 TotalBytesWritten = 0; ui64 TotalBytesRead = 0; TSpeedTracker<ui64> MegabytesPerSecondST; TQuantileTracker<ui64> MegabytesPerSecondQT; - TLatencyTrackerUs ResponseQT; + TLatencyTrackerUs ResponseQT; THashMap<ui64, ui64> SentTimestamp; - TDeque<std::pair<ui64, ui64>> WritesInFlightTimestamps; - TIntrusivePtr<NMonitoring::TCounterForPtr> MaxInFlightLatency; + TDeque<std::pair<ui64, ui64>> WritesInFlightTimestamps; + TIntrusivePtr<NMonitoring::TCounterForPtr> MaxInFlightLatency; ui64 WriteQueryId = 0; const NKikimrBlobStorage::EGetHandleClass GetHandleClass; TSizeGenerator ReadSizeGen; @@ -140,10 +140,10 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo THashMap<ui64, ui64> ReadSentTimestamp; TSpeedTracker<ui64> ReadMegabytesPerSecondST; TQuantileTracker<ui64> ReadMegabytesPerSecondQT; - TLatencyTrackerUs ReadResponseQT; + TLatencyTrackerUs ReadResponseQT; bool NextWriteInQueue = false; bool NextReadInQueue = false; - bool IsWorkingNow = true; + bool IsWorkingNow = true; TQuantileTracker<ui32> WritesInFlightQT; TQuantileTracker<ui64> WriteBytesInFlightQT; @@ -151,230 +151,230 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo TQuantileTracker<ui64> ReadBytesInFlightQT; TDeque<TInstant> IssuedWriteTimestamp; - TInstant LastLatencyTrackerUpdate; - - TInstant StartTimestamp; - TDuration ScriptedRoundDuration; - // Incremented in every write request - ui64 ScriptedCounter; - // Incremented on cycle; - ui64 ScriptedRound; - TVector<TReqInfo> ScriptedRequests; - + TInstant LastLatencyTrackerUpdate; + + TInstant StartTimestamp; + TDuration ScriptedRoundDuration; + // Incremented in every write request + ui64 ScriptedCounter; + // Incremented on cycle; + ui64 ScriptedRound; + TVector<TReqInfo> ScriptedRequests; + public: TTabletWriter(ui64 tag, TIntrusivePtr<NMonitoring::TDynamicCounters> counters, TWakeupQueue& wakeupQueue, TQueryDispatcher& queryDispatcher, ui64 tabletId, ui32 channel, TMaybe<ui32> generation, ui32 groupId, NKikimrBlobStorage::EPutHandleClass putHandleClass, const TSizeGenerator& writeSizeGen, const TIntervalGenerator& writeIntervalGen, - const TIntervalGenerator& garbageCollectIntervalGen, ui32 maxWritesInFlight, ui64 maxWriteBytesInFlight, + const TIntervalGenerator& garbageCollectIntervalGen, ui32 maxWritesInFlight, ui64 maxWriteBytesInFlight, ui64 maxTotalBytesWritten, bool soft, NKikimrBlobStorage::EGetHandleClass getHandleClass, const TSizeGenerator& readSizeGen, - const TIntervalGenerator& readIntervalGen, ui32 maxReadsInFlight, ui64 maxReadBytesInFlight, - TDuration scriptedRoundDuration, TVector<TReqInfo>&& scriptedRequests) - : TagCounters(counters->GetSubgroup("tag", Sprintf("%" PRIu64, tag))) - , Counters(TagCounters->GetSubgroup("channel", Sprintf("%" PRIu32, channel))) + const TIntervalGenerator& readIntervalGen, ui32 maxReadsInFlight, ui64 maxReadBytesInFlight, + TDuration scriptedRoundDuration, TVector<TReqInfo>&& scriptedRequests) + : TagCounters(counters->GetSubgroup("tag", Sprintf("%" PRIu64, tag))) + , Counters(TagCounters->GetSubgroup("channel", Sprintf("%" PRIu32, channel))) , WakeupQueue(wakeupQueue) , QueryDispatcher(queryDispatcher) , TabletId(tabletId) , Channel(channel) , Generation(generation ? *generation : 0) - , GarbageCollectStep(1) + , GarbageCollectStep(1) , WriteStep(3) , Cookie(1) - , GroupBlockRetries(3) + , GroupBlockRetries(3) , GroupId(groupId) , PutHandleClass(putHandleClass) , WriteSizeGen(writeSizeGen) , WriteIntervalGen(writeIntervalGen) - , GarbageCollectIntervalGen(garbageCollectIntervalGen) + , GarbageCollectIntervalGen(garbageCollectIntervalGen) , MaxWritesInFlight(maxWritesInFlight) , MaxWriteBytesInFlight(maxWriteBytesInFlight) - , MaxTotalBytesWritten(maxTotalBytesWritten) + , MaxTotalBytesWritten(maxTotalBytesWritten) , Soft(soft) , MegabytesPerSecondST(TDuration::Seconds(3)) // average speed at last 3 seconds - , MegabytesPerSecondQT(ExposePeriod, Counters->GetSubgroup("metric", "writeSpeed"), - "bytesPerSecond", Percentiles) - , ResponseQT() + , MegabytesPerSecondQT(ExposePeriod, Counters->GetSubgroup("metric", "writeSpeed"), + "bytesPerSecond", Percentiles) + , ResponseQT() , GetHandleClass(getHandleClass) , ReadSizeGen(readSizeGen) , ReadIntervalGen(readIntervalGen) , MaxReadsInFlight(maxReadsInFlight) , MaxReadBytesInFlight(maxReadBytesInFlight) , ReadMegabytesPerSecondST(TDuration::Seconds(3)) - , ReadMegabytesPerSecondQT(ExposePeriod, Counters->GetSubgroup("metric", "readSpeed"), - "bytesPerSecond", Percentiles) - , ReadResponseQT() - , WritesInFlightQT(ExposePeriod, Counters->GetSubgroup("metric", "writesInFlight"), - "items", Percentiles) - , WriteBytesInFlightQT(ExposePeriod, Counters->GetSubgroup("metric", "writeBytesInFlight"), - "bytes", Percentiles) - , ReadsInFlightQT(ExposePeriod, Counters->GetSubgroup("metric", "readsInFlight"), - "items", Percentiles) - , ReadBytesInFlightQT(ExposePeriod, Counters->GetSubgroup("metric", "readBytesInFlight"), - "bytes", Percentiles) - , ScriptedRoundDuration(scriptedRoundDuration) - , ScriptedCounter(0) - , ScriptedRound(0) - , ScriptedRequests(std::move(scriptedRequests)) - { - *Counters->GetCounter("tabletId") = tabletId; - const auto& percCounters = Counters->GetSubgroup("sensor", "microseconds"); - MaxInFlightLatency = percCounters->GetCounter("MaxInFlightLatency"); - ResponseQT.Initialize(percCounters->GetSubgroup("metric", "writeResponse"), Percentiles); - ReadResponseQT.Initialize(percCounters->GetSubgroup("metric", "readResponse"), Percentiles); - } - - TString PrintMe() { - return TStringBuilder() << "TabletId# " << TabletId << " Generation# " << Generation; - } - - ~TTabletWriter() { - TagCounters->ResetCounters(); - } - - template<typename T> - bool CheckStatus(const TActorContext& ctx, T *ev, const TVector<NKikimrProto::EReplyStatus>& goodStatuses) { + , ReadMegabytesPerSecondQT(ExposePeriod, Counters->GetSubgroup("metric", "readSpeed"), + "bytesPerSecond", Percentiles) + , ReadResponseQT() + , WritesInFlightQT(ExposePeriod, Counters->GetSubgroup("metric", "writesInFlight"), + "items", Percentiles) + , WriteBytesInFlightQT(ExposePeriod, Counters->GetSubgroup("metric", "writeBytesInFlight"), + "bytes", Percentiles) + , ReadsInFlightQT(ExposePeriod, Counters->GetSubgroup("metric", "readsInFlight"), + "items", Percentiles) + , ReadBytesInFlightQT(ExposePeriod, Counters->GetSubgroup("metric", "readBytesInFlight"), + "bytes", Percentiles) + , ScriptedRoundDuration(scriptedRoundDuration) + , ScriptedCounter(0) + , ScriptedRound(0) + , ScriptedRequests(std::move(scriptedRequests)) + { + *Counters->GetCounter("tabletId") = tabletId; + const auto& percCounters = Counters->GetSubgroup("sensor", "microseconds"); + MaxInFlightLatency = percCounters->GetCounter("MaxInFlightLatency"); + ResponseQT.Initialize(percCounters->GetSubgroup("metric", "writeResponse"), Percentiles); + ReadResponseQT.Initialize(percCounters->GetSubgroup("metric", "readResponse"), Percentiles); + } + + TString PrintMe() { + return TStringBuilder() << "TabletId# " << TabletId << " Generation# " << Generation; + } + + ~TTabletWriter() { + TagCounters->ResetCounters(); + } + + template<typename T> + bool CheckStatus(const TActorContext& ctx, T *ev, const TVector<NKikimrProto::EReplyStatus>& goodStatuses) { if (goodStatuses.empty() || Count(goodStatuses, ev->Status)) { - return true; - } else { - LOG_ERROR_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " recieved not OK, msg# " - << ev->ToString()); - IsWorkingNow = false; - ctx.Send(ctx.SelfID, new TEvStopTest()); - return false; - } - } - - // Issue TEvDiscover + return true; + } else { + LOG_ERROR_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " recieved not OK, msg# " + << ev->ToString()); + IsWorkingNow = false; + ctx.Send(ctx.SelfID, new TEvStopTest()); + return false; + } + } + + // Issue TEvDiscover void Bootstrap(const TActorContext& ctx) { NextWriteTimestamp = TAppData::TimeProvider->Now(); auto ev = std::make_unique<TEvBlobStorage::TEvDiscover>(TabletId, Generation, false, true, TInstant::Max(), 0); - LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " is bootstrapped, going to send " - << ev->ToString()); - auto callback = [this] (IEventBase *event, const TActorContext& ctx) { - auto *res = dynamic_cast<TEvBlobStorage::TEvDiscoverResult *>(event); - Y_VERIFY(res); - if (!CheckStatus(ctx, res, {NKikimrProto::EReplyStatus::OK, NKikimrProto::EReplyStatus::NODATA})) { - return; - } - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " recieved " << res->ToString()); - Generation = res->BlockedGeneration + 1; - IssueTEvBlock(ctx); - }; + LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " is bootstrapped, going to send " + << ev->ToString()); + auto callback = [this] (IEventBase *event, const TActorContext& ctx) { + auto *res = dynamic_cast<TEvBlobStorage::TEvDiscoverResult *>(event); + Y_VERIFY(res); + if (!CheckStatus(ctx, res, {NKikimrProto::EReplyStatus::OK, NKikimrProto::EReplyStatus::NODATA})) { + return; + } + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " recieved " << res->ToString()); + Generation = res->BlockedGeneration + 1; + IssueTEvBlock(ctx); + }; SendToBSProxy(ctx, GroupId, ev.release(), QueryDispatcher.ObtainCookie(std::move(callback))); - } - - void IssueTEvBlock(const TActorContext& ctx) { + } + + void IssueTEvBlock(const TActorContext& ctx) { auto ev = std::make_unique<TEvBlobStorage::TEvBlock>(TabletId, Generation, TInstant::Max()); - LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " going to send " << ev->ToString()); - auto callback = [this] (IEventBase *event, const TActorContext& ctx) { - auto *res = dynamic_cast<TEvBlobStorage::TEvBlockResult *>(event); - Y_VERIFY(res); - if (!CheckStatus(ctx, res, {NKikimrProto::EReplyStatus::OK, NKikimrProto::EReplyStatus::RACE})) { - return; - } else if (res->Status == NKikimrProto::EReplyStatus::RACE && GroupBlockRetries-- > 0) { - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " recieved " << res->ToString()); - IssueTEvBlock(ctx); - return; - } - - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " recieved " << res->ToString()); - // For work use next generation after blocked - ++Generation; - IssueLastBlob(ctx); - }; + LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " going to send " << ev->ToString()); + auto callback = [this] (IEventBase *event, const TActorContext& ctx) { + auto *res = dynamic_cast<TEvBlobStorage::TEvBlockResult *>(event); + Y_VERIFY(res); + if (!CheckStatus(ctx, res, {NKikimrProto::EReplyStatus::OK, NKikimrProto::EReplyStatus::RACE})) { + return; + } else if (res->Status == NKikimrProto::EReplyStatus::RACE && GroupBlockRetries-- > 0) { + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " recieved " << res->ToString()); + IssueTEvBlock(ctx); + return; + } + + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " recieved " << res->ToString()); + // For work use next generation after blocked + ++Generation; + IssueLastBlob(ctx); + }; SendToBSProxy(ctx, GroupId, ev.release(), QueryDispatcher.ObtainCookie(std::move(callback))); - } - - void IssueLastBlob(const TActorContext& ctx) { - const ui32 size = 1; - const ui32 lastStep = Max<ui32>(); - const TLogoBlobID id(TabletId, Generation, lastStep, Channel, size, 0); - const TString buffer = GenerateBuffer(id); + } + + void IssueLastBlob(const TActorContext& ctx) { + const ui32 size = 1; + const ui32 lastStep = Max<ui32>(); + const TLogoBlobID id(TabletId, Generation, lastStep, Channel, size, 0); + const TString buffer = GenerateBuffer(id); auto ev = std::make_unique<TEvBlobStorage::TEvPut>(id, buffer, TInstant::Max(), PutHandleClass); - - auto callback = [this] (IEventBase *event, const TActorContext& ctx) { - auto *res = dynamic_cast<TEvBlobStorage::TEvPutResult *>(event); - Y_VERIFY(res); - if (!CheckStatus(ctx, res, {NKikimrProto::EReplyStatus::OK})) { - return; - } - - IssueTEvCollectGarbage(ctx); - }; - + + auto callback = [this] (IEventBase *event, const TActorContext& ctx) { + auto *res = dynamic_cast<TEvBlobStorage::TEvPutResult *>(event); + Y_VERIFY(res); + if (!CheckStatus(ctx, res, {NKikimrProto::EReplyStatus::OK})) { + return; + } + + IssueTEvCollectGarbage(ctx); + }; + SendToBSProxy(ctx, GroupId, ev.release(), QueryDispatcher.ObtainCookie(std::move(callback))); - } - - void IssueTEvCollectGarbage(const TActorContext& ctx) { - auto ev = TEvBlobStorage::TEvCollectGarbage::CreateHardBarrier(TabletId, Generation, GarbageCollectStep, - Channel, Generation, 0, TInstant::Max()); - LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " going to send " << ev->ToString()); - ++GarbageCollectStep; - auto callback = [this] (IEventBase *event, const TActorContext& ctx) { - auto *res = dynamic_cast<TEvBlobStorage::TEvCollectGarbageResult *>(event); - Y_VERIFY(res); - if (!CheckStatus(ctx, res, {NKikimrProto::EReplyStatus::OK})) { - return; - } - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " recieved " << res->ToString()); - StartWorking(ctx); - }; - - SendToBSProxy(ctx, GroupId, ev.Release(), QueryDispatcher.ObtainCookie(std::move(callback))); - } - - void StartWorking(const TActorContext& ctx) { + } + + void IssueTEvCollectGarbage(const TActorContext& ctx) { + auto ev = TEvBlobStorage::TEvCollectGarbage::CreateHardBarrier(TabletId, Generation, GarbageCollectStep, + Channel, Generation, 0, TInstant::Max()); + LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " going to send " << ev->ToString()); + ++GarbageCollectStep; + auto callback = [this] (IEventBase *event, const TActorContext& ctx) { + auto *res = dynamic_cast<TEvBlobStorage::TEvCollectGarbageResult *>(event); + Y_VERIFY(res); + if (!CheckStatus(ctx, res, {NKikimrProto::EReplyStatus::OK})) { + return; + } + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " recieved " << res->ToString()); + StartWorking(ctx); + }; + + SendToBSProxy(ctx, GroupId, ev.Release(), QueryDispatcher.ObtainCookie(std::move(callback))); + } + + void StartWorking(const TActorContext& ctx) { StartTimestamp = TAppData::TimeProvider->Now(); InitializeTrackers(StartTimestamp); IssueWriteIfPossible(ctx); - ScheduleGarbageCollect(ctx); + ScheduleGarbageCollect(ctx); ExposeCounters(ctx); } - void StopWorking(const TActorContext& ctx) { - auto ev = TEvBlobStorage::TEvCollectGarbage::CreateHardBarrier(TabletId, Generation, GarbageCollectStep, - Channel, Generation, Max<ui32>(), TInstant::Max()); - LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " end working, going to send " << ev->ToString()); - ++GarbageCollectStep; - auto callback = [this](IEventBase *event, const TActorContext& ctx) { - auto *res = dynamic_cast<TEvBlobStorage::TEvCollectGarbageResult *>(event); - Y_VERIFY(res); - if (!CheckStatus(ctx, res, {NKikimrProto::EReplyStatus::OK})) { - return; - } - LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " recieved " << res->ToString()); - - if (IsWorkingNow) { - ctx.Send(ctx.SelfID, new TEvStopTest()); - } - }; - SendToBSProxy(ctx, GroupId, ev.Release(), QueryDispatcher.ObtainCookie(std::move(callback))); - } - + void StopWorking(const TActorContext& ctx) { + auto ev = TEvBlobStorage::TEvCollectGarbage::CreateHardBarrier(TabletId, Generation, GarbageCollectStep, + Channel, Generation, Max<ui32>(), TInstant::Max()); + LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " end working, going to send " << ev->ToString()); + ++GarbageCollectStep; + auto callback = [this](IEventBase *event, const TActorContext& ctx) { + auto *res = dynamic_cast<TEvBlobStorage::TEvCollectGarbageResult *>(event); + Y_VERIFY(res); + if (!CheckStatus(ctx, res, {NKikimrProto::EReplyStatus::OK})) { + return; + } + LOG_INFO_S(ctx, NKikimrServices::BS_LOAD_TEST, PrintMe() << " recieved " << res->ToString()); + + if (IsWorkingNow) { + ctx.Send(ctx.SelfID, new TEvStopTest()); + } + }; + SendToBSProxy(ctx, GroupId, ev.Release(), QueryDispatcher.ObtainCookie(std::move(callback))); + } + void InitializeTrackers(TInstant now) { - LastLatencyTrackerUpdate = now; - - MegabytesPerSecondST.Add(now, 0); - MegabytesPerSecondQT.Add(now, 0); - - ReadMegabytesPerSecondST.Add(now, 0); - ReadMegabytesPerSecondQT.Add(now, 0); - - WritesInFlightQT.Add(now, 0); - WriteBytesInFlightQT.Add(now, 0); - ReadsInFlightQT.Add(now, 0); - ReadBytesInFlightQT.Add(now, 0); - } - + LastLatencyTrackerUpdate = now; + + MegabytesPerSecondST.Add(now, 0); + MegabytesPerSecondQT.Add(now, 0); + + ReadMegabytesPerSecondST.Add(now, 0); + ReadMegabytesPerSecondQT.Add(now, 0); + + WritesInFlightQT.Add(now, 0); + WriteBytesInFlightQT.Add(now, 0); + ReadsInFlightQT.Add(now, 0); + ReadBytesInFlightQT.Add(now, 0); + } + void UpdateQuantile(TInstant now) { ui64 speed; - MegabytesPerSecondST.Add(now, TotalBytesWritten); + MegabytesPerSecondST.Add(now, TotalBytesWritten); if (MegabytesPerSecondST.CalculateSpeed(&speed)) { MegabytesPerSecondQT.Add(now, speed); } - ReadMegabytesPerSecondST.Add(now, TotalBytesRead); + ReadMegabytesPerSecondST.Add(now, TotalBytesRead); if (ReadMegabytesPerSecondST.CalculateSpeed(&speed)) { ReadMegabytesPerSecondQT.Add(now, speed); } @@ -382,15 +382,15 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo WriteBytesInFlightQT.Add(now, WriteBytesInFlight); ReadsInFlightQT.Add(now, ReadsInFlight); ReadBytesInFlightQT.Add(now, ReadBytesInFlight); - if (now > LastLatencyTrackerUpdate + TDuration::Seconds(1)) { - LastLatencyTrackerUpdate = now; - ResponseQT.Update(); - ReadResponseQT.Update(); - if (WritesInFlightTimestamps) { + if (now > LastLatencyTrackerUpdate + TDuration::Seconds(1)) { + LastLatencyTrackerUpdate = now; + ResponseQT.Update(); + ReadResponseQT.Update(); + if (WritesInFlightTimestamps) { const auto& maxLatency = CyclesToDuration(GetCycleCountFast() - WritesInFlightTimestamps.front().second); - *MaxInFlightLatency = maxLatency.MicroSeconds(); - } - } + *MaxInFlightLatency = maxLatency.MicroSeconds(); + } + } } static TString PercentileName(int value) { @@ -398,13 +398,13 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo } void ExposeCounters(const TActorContext &ctx) { - MegabytesPerSecondQT.CalculateQuantiles(); - ReadMegabytesPerSecondQT.CalculateQuantiles(); - - WritesInFlightQT.CalculateQuantiles(); - WriteBytesInFlightQT.CalculateQuantiles(); - ReadsInFlightQT.CalculateQuantiles(); - ReadBytesInFlightQT.CalculateQuantiles(); + MegabytesPerSecondQT.CalculateQuantiles(); + ReadMegabytesPerSecondQT.CalculateQuantiles(); + + WritesInFlightQT.CalculateQuantiles(); + WriteBytesInFlightQT.CalculateQuantiles(); + ReadsInFlightQT.CalculateQuantiles(); + ReadBytesInFlightQT.CalculateQuantiles(); using namespace std::placeholders; WakeupQueue.Put(TAppData::TimeProvider->Now() + ExposePeriod, @@ -427,7 +427,7 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo DUMP_PARAM(TabletId) DUMP_PARAM(Channel) DUMP_PARAM(Generation) - DUMP_PARAM(GarbageCollectStep) + DUMP_PARAM(GarbageCollectStep) DUMP_PARAM(WriteStep) DUMP_PARAM(Cookie) DUMP_PARAM(GroupId) @@ -442,7 +442,7 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo DUMP_PARAM(MaxWritesInFlight) DUMP_PARAM(MaxWriteBytesInFlight) DUMP_PARAM(TotalBytesWritten) - DUMP_PARAM(MaxTotalBytesWritten) + DUMP_PARAM(MaxTotalBytesWritten) DUMP_PARAM(TotalBytesRead) DUMP_PARAM(NextReadTimestamp) DUMP_PARAM(ReadsInFlight) @@ -459,8 +459,8 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo TABLER() { TABLED() { str << "Writes per second"; } if (IssuedWriteTimestamp.size() > 1) { - const double rps = IssuedWriteTimestamp.size() / - (IssuedWriteTimestamp.back() - IssuedWriteTimestamp.front()).SecondsFloat(); + const double rps = IssuedWriteTimestamp.size() / + (IssuedWriteTimestamp.back() - IssuedWriteTimestamp.front()).SecondsFloat(); TABLED() { str << Sprintf("%.2lf", rps); } } else { TABLED() { str << "no writes"; } @@ -488,48 +488,48 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo } private: - void UpdateNextWakeups(const TActorContext& ctx, const TInstant& now) { - if (now < NextWriteTimestamp && !NextWriteInQueue) { - using namespace std::placeholders; - WakeupQueue.Put(NextWriteTimestamp, std::bind(&TTabletWriter::IssueWriteIfPossible, this, _1), ctx); - NextWriteInQueue = true; - } - - if (now < NextReadTimestamp && !NextReadInQueue) { - using namespace std::placeholders; - WakeupQueue.Put(NextReadTimestamp, std::bind(&TTabletWriter::IssueReadIfPossible, this, _1), ctx); - NextReadInQueue = true; - } - } - + void UpdateNextWakeups(const TActorContext& ctx, const TInstant& now) { + if (now < NextWriteTimestamp && !NextWriteInQueue) { + using namespace std::placeholders; + WakeupQueue.Put(NextWriteTimestamp, std::bind(&TTabletWriter::IssueWriteIfPossible, this, _1), ctx); + NextWriteInQueue = true; + } + + if (now < NextReadTimestamp && !NextReadInQueue) { + using namespace std::placeholders; + WakeupQueue.Put(NextReadTimestamp, std::bind(&TTabletWriter::IssueReadIfPossible, this, _1), ctx); + NextReadInQueue = true; + } + } + void IssueWriteIfPossible(const TActorContext& ctx) { const TInstant now = TAppData::TimeProvider->Now(); while ((WritesInFlight < MaxWritesInFlight || !MaxWritesInFlight) && (WriteBytesInFlight < MaxWriteBytesInFlight || !MaxWriteBytesInFlight) && - (TotalBytesWritten + WriteBytesInFlight < MaxTotalBytesWritten || !MaxTotalBytesWritten) && - now >= NextWriteTimestamp && - (!ScriptedRequests || ScriptedRequests[ScriptedCounter].EvType == TEvBlobStorage::EvPut)) { + (TotalBytesWritten + WriteBytesInFlight < MaxTotalBytesWritten || !MaxTotalBytesWritten) && + now >= NextWriteTimestamp && + (!ScriptedRequests || ScriptedRequests[ScriptedCounter].EvType == TEvBlobStorage::EvPut)) { IssueWriteRequest(ctx, now); } - if (ScriptedRequests) { - UpdateNextTimestemps(false); - } - UpdateNextWakeups(ctx, now); + if (ScriptedRequests) { + UpdateNextTimestemps(false); + } + UpdateNextWakeups(ctx, now); } void IssueWriteRequest(const TActorContext& ctx, TInstant now) { - ui32 size; - NKikimrBlobStorage::EPutHandleClass putHandleClass; - if (ScriptedRequests) { - const auto& req = ScriptedRequests[ScriptedCounter]; - size = req.Size; - putHandleClass = req.PutHandleClass; - } else { - size = WriteSizeGen.Generate(); - putHandleClass = PutHandleClass; - } + ui32 size; + NKikimrBlobStorage::EPutHandleClass putHandleClass; + if (ScriptedRequests) { + const auto& req = ScriptedRequests[ScriptedCounter]; + size = req.Size; + putHandleClass = req.PutHandleClass; + } else { + size = WriteSizeGen.Generate(); + putHandleClass = PutHandleClass; + } const TLogoBlobID id(TabletId, Generation, WriteStep, Channel, size, Cookie); const TString buffer = GenerateBuffer(id); auto ev = std::make_unique<TEvBlobStorage::TEvPut>(id, buffer, TInstant::Max(), putHandleClass); @@ -539,8 +539,8 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo auto *res = dynamic_cast<TEvBlobStorage::TEvPutResult *>(event); Y_VERIFY(res); if (!CheckStatus(ctx, res, {})) { - return; - } + return; + } const TLogoBlobID& id = res->Id; const ui32 size = id.BlobSize(); @@ -560,17 +560,17 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo TotalBytesWritten += size; auto it = SentTimestamp.find(writeQueryId); - const auto sendCycles = it->second; + const auto sendCycles = it->second; Y_VERIFY(it != SentTimestamp.end()); const TDuration response = CyclesToDuration(GetCycleCountFast() - sendCycles); SentTimestamp.erase(it); - // It's very likely that "writeQueryId" will be found at the start - auto itInFlight = Find(WritesInFlightTimestamps, std::make_pair(writeQueryId, sendCycles)); - Y_VERIFY(itInFlight != WritesInFlightTimestamps.end()); - WritesInFlightTimestamps.erase(itInFlight); - - ResponseQT.Increment(response.MicroSeconds()); + // It's very likely that "writeQueryId" will be found at the start + auto itInFlight = Find(WritesInFlightTimestamps, std::make_pair(writeQueryId, sendCycles)); + Y_VERIFY(itInFlight != WritesInFlightTimestamps.end()); + WritesInFlightTimestamps.erase(itInFlight); + + ResponseQT.Increment(response.MicroSeconds()); IssueWriteIfPossible(ctx); if (ConfirmedBlobIds.size() == 1) { @@ -582,8 +582,8 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo }; SendToBSProxy(ctx, GroupId, ev.release(), QueryDispatcher.ObtainCookie(std::move(writeCallback))); const auto nowCycles = GetCycleCountFast(); - WritesInFlightTimestamps.emplace_back(writeQueryId, nowCycles); - SentTimestamp.emplace(writeQueryId, nowCycles); + WritesInFlightTimestamps.emplace_back(writeQueryId, nowCycles); + SentTimestamp.emplace(writeQueryId, nowCycles); IssuedWriteTimestamp.push_back(TAppData::TimeProvider->Now()); while (IssuedWriteTimestamp.size() > 10000 || IssuedWriteTimestamp.back() - IssuedWriteTimestamp.front() >= TDuration::Seconds(5)) { IssuedWriteTimestamp.pop_front(); @@ -594,48 +594,48 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo ++WritesInFlight; WriteBytesInFlight += size; - if (ScriptedRequests) { - UpdateNextTimestemps(true); + if (ScriptedRequests) { + UpdateNextTimestemps(true); } else { - // calculate time of next write request - TDuration duration = WriteIntervalGen.Generate(); - if (Soft) { - NextWriteTimestamp += duration; - } else { - NextWriteTimestamp = now + duration; - } - } - + // calculate time of next write request + TDuration duration = WriteIntervalGen.Generate(); + if (Soft) { + NextWriteTimestamp += duration; + } else { + NextWriteTimestamp = now + duration; + } + } + NextWriteInQueue = false; } - void UpdateNextTimestemps(bool incrementCounter) { - Y_VERIFY(ScriptedRequests); - - if (incrementCounter) { - if (++ScriptedCounter == ScriptedRequests.size()) { - ScriptedCounter = 0; - ++ScriptedRound; - } - } - TDuration duration = ScriptedRequests[ScriptedCounter].SendTime; - duration += ScriptedRound * ScriptedRoundDuration; - - switch (ScriptedRequests[ScriptedCounter].EvType) { - case TEvBlobStorage::EvGet: - NextReadTimestamp = StartTimestamp + duration; - break; - case TEvBlobStorage::EvPut: - NextWriteTimestamp = StartTimestamp + duration; - break; - default: - Y_FAIL_S("Unsupported request type# " << (ui64)ScriptedRequests[ScriptedCounter].EvType); - break; - } - } - - void ScheduleGarbageCollect(const TActorContext& ctx) { - TDuration duration = GarbageCollectIntervalGen.Generate(); + void UpdateNextTimestemps(bool incrementCounter) { + Y_VERIFY(ScriptedRequests); + + if (incrementCounter) { + if (++ScriptedCounter == ScriptedRequests.size()) { + ScriptedCounter = 0; + ++ScriptedRound; + } + } + TDuration duration = ScriptedRequests[ScriptedCounter].SendTime; + duration += ScriptedRound * ScriptedRoundDuration; + + switch (ScriptedRequests[ScriptedCounter].EvType) { + case TEvBlobStorage::EvGet: + NextReadTimestamp = StartTimestamp + duration; + break; + case TEvBlobStorage::EvPut: + NextWriteTimestamp = StartTimestamp + duration; + break; + default: + Y_FAIL_S("Unsupported request type# " << (ui64)ScriptedRequests[ScriptedCounter].EvType); + break; + } + } + + void ScheduleGarbageCollect(const TActorContext& ctx) { + TDuration duration = GarbageCollectIntervalGen.Generate(); if (duration != TDuration()) { using namespace std::placeholders; WakeupQueue.Put(TAppData::TimeProvider->Now() + duration, @@ -643,9 +643,9 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo } } - void IssueGarbageCollectRequest(const TActorContext& ctx) { + void IssueGarbageCollectRequest(const TActorContext& ctx) { auto ev = std::make_unique<TEvBlobStorage::TEvCollectGarbage>(TabletId, Generation, GarbageCollectStep, Channel, - true, Generation, GarbageCollectStep, nullptr, nullptr, TInstant::Max(), false); + true, Generation, GarbageCollectStep, nullptr, nullptr, TInstant::Max(), false); auto callback = [](IEventBase *event, const TActorContext& /*ctx*/) { auto *res = dynamic_cast<TEvBlobStorage::TEvCollectGarbageResult *>(event); Y_VERIFY(res); @@ -654,47 +654,47 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo // just as we have sent this request, we have to trim all confirmed blobs which are going to be deleted const auto it = std::lower_bound(ConfirmedBlobIds.begin(), ConfirmedBlobIds.end(), - TLogoBlobID(TabletId, Generation, GarbageCollectStep, Channel, TLogoBlobID::MaxBlobSize, + TLogoBlobID(TabletId, Generation, GarbageCollectStep, Channel, TLogoBlobID::MaxBlobSize, TLogoBlobID::MaxCookie, TLogoBlobID::MaxPartId)); ConfirmedBlobIds.erase(ConfirmedBlobIds.begin(), it); - ++GarbageCollectStep; + ++GarbageCollectStep; ++WriteStep; Cookie = 1; - ScheduleGarbageCollect(ctx); + ScheduleGarbageCollect(ctx); } void IssueReadIfPossible(const TActorContext& ctx) { const TInstant now = TAppData::TimeProvider->Now(); - while (ReadsInFlight < MaxReadsInFlight && - (ReadBytesInFlight < MaxReadBytesInFlight || !MaxReadBytesInFlight) && - now >= NextReadTimestamp && - ConfirmedBlobIds && - (!ScriptedRequests || ScriptedRequests[ScriptedCounter].EvType == TEvBlobStorage::EvGet)) { + while (ReadsInFlight < MaxReadsInFlight && + (ReadBytesInFlight < MaxReadBytesInFlight || !MaxReadBytesInFlight) && + now >= NextReadTimestamp && + ConfirmedBlobIds && + (!ScriptedRequests || ScriptedRequests[ScriptedCounter].EvType == TEvBlobStorage::EvGet)) { IssueReadRequest(ctx, now); } - if (ScriptedRequests) { - UpdateNextTimestemps(false); - } - UpdateNextWakeups(ctx, now); + if (ScriptedRequests) { + UpdateNextTimestemps(false); + } + UpdateNextWakeups(ctx, now); } void IssueReadRequest(const TActorContext& ctx, TInstant now) { auto iter = ConfirmedBlobIds.begin(); std::advance(iter, RandomNumber(ConfirmedBlobIds.size())); const TLogoBlobID &id = *iter; - - ui32 size; - if (ScriptedRequests) { - const auto& req = ScriptedRequests[ScriptedCounter]; - size = req.Size ? req.Size : id.BlobSize(); - } else { - size = ReadSizeGen.Generate(); - } - size = Min(size, id.BlobSize()); - + + ui32 size; + if (ScriptedRequests) { + const auto& req = ScriptedRequests[ScriptedCounter]; + size = req.Size ? req.Size : id.BlobSize(); + } else { + size = ReadSizeGen.Generate(); + } + size = Min(size, id.BlobSize()); + const ui32 offset = RandomNumber<ui32>(id.BlobSize() - size + 1); auto ev = std::make_unique<TEvBlobStorage::TEvGet>(id, offset, size, TInstant::Max(), GetHandleClass); @@ -703,21 +703,21 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo auto readCallback = [this, size, readQueryId](IEventBase *event, const TActorContext& ctx) { auto *res = dynamic_cast<TEvBlobStorage::TEvGetResult*>(event); Y_VERIFY(res); - if (!CheckStatus(ctx, res, {NKikimrProto::EReplyStatus::OK})) { - return; - } + if (!CheckStatus(ctx, res, {NKikimrProto::EReplyStatus::OK})) { + return; + } Y_VERIFY(ReadsInFlight >= 1 && ReadBytesInFlight >= size); --ReadsInFlight; ReadBytesInFlight -= size; - TotalBytesRead += size; + TotalBytesRead += size; auto it = ReadSentTimestamp.find(readQueryId); Y_VERIFY(it != ReadSentTimestamp.end()); const TDuration response = CyclesToDuration(GetCycleCountFast() - it->second); ReadSentTimestamp.erase(it); - ReadResponseQT.Increment(response.MicroSeconds()); + ReadResponseQT.Increment(response.MicroSeconds()); IssueReadIfPossible(ctx); }; @@ -728,25 +728,25 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo ReadBytesInFlight += size; // calculate time of next write request - if (ScriptedRequests) { - UpdateNextTimestemps(true); + if (ScriptedRequests) { + UpdateNextTimestemps(true); } else { - TDuration duration = ReadIntervalGen.Generate(); - if (Soft) { - NextReadTimestamp += duration; - } else { - NextReadTimestamp = now + duration; - } + TDuration duration = ReadIntervalGen.Generate(); + if (Soft) { + NextReadTimestamp += duration; + } else { + NextReadTimestamp = now + duration; + } } NextReadInQueue = false; } static TString GenerateBuffer(const TLogoBlobID& id) { - return GenDataForLZ4(id.BlobSize()); + return GenDataForLZ4(id.BlobSize()); } }; - TString ConfingString; + TString ConfingString; const ui64 Tag; const TActorId Parent; @@ -763,20 +763,20 @@ class TLogWriterTestLoadActor : public TActorBootstrapped<TLogWriterTestLoadActo NMonitoring::TDynamicCounters::TCounterPtr ScheduleCounter; - ui32 TestStoppedRecieved = 0; - + ui32 TestStoppedRecieved = 0; + public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::BS_LOAD_ACTOR; } TLogWriterTestLoadActor(const NKikimrBlobStorage::TEvTestLoadRequest::TLoadStart& cmd, const TActorId& parent, - TIntrusivePtr<NMonitoring::TDynamicCounters> counters, ui64 tag) - : Tag(tag) + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, ui64 tag) + : Tag(tag) , Parent(parent) , ScheduleCounter(counters->GetSubgroup("subsystem", "scheduler")->GetCounter("ScheduleCounter", true)) { - google::protobuf::TextFormat::PrintToString(cmd, &ConfingString); + google::protobuf::TextFormat::PrintToString(cmd, &ConfingString); if (cmd.HasDurationSeconds()) { TestDuration = TDuration::Seconds(cmd.GetDurationSeconds()); } @@ -799,14 +799,14 @@ public: TSizeGenerator writeSizeGen(profile.GetSizes()); TIntervalGenerator writeIntervalGen(profile.GetWriteIntervals()); - TIntervalGenerator garbageCollectIntervalGen(profile.GetFlushIntervals()); + TIntervalGenerator garbageCollectIntervalGen(profile.GetFlushIntervals()); const ui32 maxWritesInFlight = profile.GetMaxInFlightRequests(); const ui64 maxWriteBytesInFlight = profile.GetMaxInFlightBytes(); - ui64 maxTotalBytesWritten = 0; - if (profile.HasMaxTotalBytesWritten()) { - maxTotalBytesWritten = profile.GetMaxTotalBytesWritten(); - } + ui64 maxTotalBytesWritten = 0; + if (profile.HasMaxTotalBytesWritten()) { + maxTotalBytesWritten = profile.GetMaxTotalBytesWritten(); + } const bool soft = profile.GetSoft(); NKikimrBlobStorage::EGetHandleClass getHandleClass = NKikimrBlobStorage::EGetHandleClass::FastRead; @@ -819,26 +819,26 @@ public: const ui64 maxReadBytesInFlight = profile.GetMaxInFlightReadBytes(); for (const auto& tablet : profile.GetTablets()) { - auto scriptedRoundDuration = TDuration::MicroSeconds(tablet.GetScriptedCycleDurationSec() * 1e6); - TVector<TReqInfo> scriptedRequests; - for (const auto& req : tablet.GetRequests()) { - scriptedRequests.push_back(TReqInfo{ - TDuration::Seconds(req.GetSendTime()), - static_cast<TEvBlobStorage::EEv>(req.GetType()), - req.GetSize(), - req.HasPutHandleClass() ? req.GetPutHandleClass() : NKikimrBlobStorage::EPutHandleClass::UserData - }); - } - + auto scriptedRoundDuration = TDuration::MicroSeconds(tablet.GetScriptedCycleDurationSec() * 1e6); + TVector<TReqInfo> scriptedRequests; + for (const auto& req : tablet.GetRequests()) { + scriptedRequests.push_back(TReqInfo{ + TDuration::Seconds(req.GetSendTime()), + static_cast<TEvBlobStorage::EEv>(req.GetType()), + req.GetSize(), + req.HasPutHandleClass() ? req.GetPutHandleClass() : NKikimrBlobStorage::EPutHandleClass::UserData + }); + } + if (!tablet.HasTabletId() || !tablet.HasChannel() || !tablet.HasGroupId()) { ythrow TLoadActorException() << "TTabletInfo.{TabletId,Channel,GroupId} fields are mandatory"; } TabletWriters.emplace_back(Tag, counters, WakeupQueue, QueryDispatcher, tablet.GetTabletId(), tablet.GetChannel(), tablet.HasGeneration() ? TMaybe<ui32>(tablet.GetGeneration()) : TMaybe<ui32>(), - tablet.GetGroupId(), putHandleClass, writeSizeGen, writeIntervalGen, garbageCollectIntervalGen, + tablet.GetGroupId(), putHandleClass, writeSizeGen, writeIntervalGen, garbageCollectIntervalGen, maxWritesInFlight, maxWriteBytesInFlight, maxTotalBytesWritten, soft, getHandleClass, readSizeGen, readIntervalGen, - maxReadsInFlight, maxReadBytesInFlight, scriptedRoundDuration, std::move(scriptedRequests)); + maxReadsInFlight, maxReadBytesInFlight, scriptedRoundDuration, std::move(scriptedRequests)); } } if (cmd.HasScheduleThresholdUs()) { @@ -852,33 +852,33 @@ public: void Bootstrap(const TActorContext& ctx) { Become(&TLogWriterTestLoadActor::StateFunc); if (TestDuration) { - ctx.Schedule(*TestDuration, new TEvents::TEvPoisonPill()); + ctx.Schedule(*TestDuration, new TEvents::TEvPoisonPill()); } - for (auto& writer : TabletWriters) { + for (auto& writer : TabletWriters) { writer.Bootstrap(ctx); } HandleWakeup(ctx); HandleUpdateQuantile(ctx); } - void HandlePoison(const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Load tablet recieved PoisonPill, going to die"); - for (auto& writer : TabletWriters) { - writer.StopWorking(ctx); // Sends TEvStopTest then all garbage is collected - } - } - + void HandlePoison(const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::BS_LOAD_TEST, "Load tablet recieved PoisonPill, going to die"); + for (auto& writer : TabletWriters) { + writer.StopWorking(ctx); // Sends TEvStopTest then all garbage is collected + } + } + void HandleStopTest(const TActorContext& ctx) { - ++TestStoppedRecieved; - if (TestStoppedRecieved == TabletWriters.size()) { - ctx.Send(Parent, new TEvTestLoadFinished(Tag, nullptr, "HandleStopTest")); - Die(ctx); - } + ++TestStoppedRecieved; + if (TestStoppedRecieved == TabletWriters.size()) { + ctx.Send(Parent, new TEvTestLoadFinished(Tag, nullptr, "HandleStopTest")); + Die(ctx); + } } void HandleUpdateQuantile(const TActorContext& ctx) { TInstant now = TAppData::TimeProvider->Now(); - for (auto& writer : TabletWriters) { + for (auto& writer : TabletWriters) { writer.UpdateQuantile(now); } ctx.Schedule(TDuration::MilliSeconds(5), new TEvUpdateQuantile); @@ -938,15 +938,15 @@ public: } } TABLEBODY() { - for (auto& writer : TabletWriters) { + for (auto& writer : TabletWriters) { str << "<tr><td colspan=\"2\">" << "<b>Tablet</b>" << "</td></tr>"; writer.DumpState(str); } } } - COLLAPSED_BUTTON_CONTENT(Sprintf("configProtobuf%" PRIu64, Tag), "Config") { - str << "<pre>" << ConfingString << "</pre>"; - } + COLLAPSED_BUTTON_CONTENT(Sprintf("configProtobuf%" PRIu64, Tag), "Config") { + str << "<pre>" << ConfingString << "</pre>"; + } } ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str(), ev->Get()->SubRequestId)); } @@ -959,9 +959,9 @@ public: CFunc(EvStopTest, HandleStopTest); CFunc(EvUpdateQuantile, HandleUpdateQuantile); CFunc(TEvents::TSystem::Wakeup, HandleWakeup); - CFunc(TEvents::TSystem::PoisonPill, HandlePoison); - HFunc(TEvBlobStorage::TEvDiscoverResult, HandleDispatcher); - HFunc(TEvBlobStorage::TEvBlockResult, HandleDispatcher); + CFunc(TEvents::TSystem::PoisonPill, HandlePoison); + HFunc(TEvBlobStorage::TEvDiscoverResult, HandleDispatcher); + HFunc(TEvBlobStorage::TEvBlockResult, HandleDispatcher); HFunc(TEvBlobStorage::TEvPutResult, HandleDispatcher); HFunc(TEvBlobStorage::TEvGetResult, HandleDispatcher); HFunc(TEvBlobStorage::TEvCollectGarbageResult, HandleDispatcher); @@ -971,8 +971,8 @@ public: }; IActor *CreateWriterTestLoad(const NKikimrBlobStorage::TEvTestLoadRequest::TLoadStart& cmd, const TActorId& parent, - TIntrusivePtr<NMonitoring::TDynamicCounters> counters, ui64 tag) { - return new TLogWriterTestLoadActor(cmd, parent, std::move(counters), tag); + TIntrusivePtr<NMonitoring::TDynamicCounters> counters, ui64 tag) { + return new TLogWriterTestLoadActor(cmd, parent, std::move(counters), tag); } } // NKikimr diff --git a/ydb/core/blobstorage/testload/ya.make b/ydb/core/blobstorage/testload/ya.make index df18b9e950..f1448fe240 100644 --- a/ydb/core/blobstorage/testload/ya.make +++ b/ydb/core/blobstorage/testload/ya.make @@ -28,7 +28,7 @@ SRCS( test_load_memory.cpp test_load_pdisk_read.cpp test_load_pdisk_write.cpp - test_load_pdisk_log.cpp + test_load_pdisk_log.cpp test_load_quantile.h test_load_size_gen.h test_load_speed.h diff --git a/ydb/core/blobstorage/ut_blobstorage/lib/node_warden_mock_bsc.cpp b/ydb/core/blobstorage/ut_blobstorage/lib/node_warden_mock_bsc.cpp index 6c44b80050..16292d98e9 100644 --- a/ydb/core/blobstorage/ut_blobstorage/lib/node_warden_mock_bsc.cpp +++ b/ydb/core/blobstorage/ut_blobstorage/lib/node_warden_mock_bsc.cpp @@ -119,8 +119,8 @@ void TNodeWardenMockActor::Handle(TEvBlobStorage::TEvControllerNodeServiceSetUpd case NKikimrBlobStorage::EEntityStatus::DESTROY: pdiskIdsToRemove.insert(pdiskId); break; - case NKikimrBlobStorage::EEntityStatus::RESTART: - break; + case NKikimrBlobStorage::EEntityStatus::RESTART: + break; } } diff --git a/ydb/core/blobstorage/ut_group/main.cpp b/ydb/core/blobstorage/ut_group/main.cpp index 55cf3522e8..538f46ae03 100644 --- a/ydb/core/blobstorage/ut_group/main.cpp +++ b/ydb/core/blobstorage/ut_group/main.cpp @@ -412,7 +412,7 @@ private: } void StartVDisk(TTestActorSystem& runtime, TDiskRecord& disk) { - TVDiskConfig::TBaseInfo baseInfo(disk.VDiskId, disk.PDiskActorId, disk.PDiskGuid, disk.PDiskId, + TVDiskConfig::TBaseInfo baseInfo(disk.VDiskId, disk.PDiskActorId, disk.PDiskGuid, disk.PDiskId, TPDiskCategory::DEVICE_TYPE_SSD, disk.VDiskSlotId, NKikimrBlobStorage::TVDiskKind::Default, ++Round, TString()); auto vdiskConfig = AllVDiskKinds->MakeVDiskConfig(baseInfo); diff --git a/ydb/core/blobstorage/ut_pdiskfit/lib/basic_test.cpp b/ydb/core/blobstorage/ut_pdiskfit/lib/basic_test.cpp index 19949dff53..94eae0767a 100644 --- a/ydb/core/blobstorage/ut_pdiskfit/lib/basic_test.cpp +++ b/ydb/core/blobstorage/ut_pdiskfit/lib/basic_test.cpp @@ -15,15 +15,15 @@ class TFakeVDisk const ui64 PDiskGuid; TStateManager *StateManager; TIntrusivePtr<TPDiskParams> PDiskParams; - TFakeVDiskParams Params; + TFakeVDiskParams Params; - NKikimr::NPDisk::TLogPosition ReadLogPosition{0, 0}; + NKikimr::NPDisk::TLogPosition ReadLogPosition{0, 0}; ui64 Lsn = 0; struct TLogRecord { ui64 Lsn; - TLogSignature Signature; + TLogSignature Signature; ui32 DataLen; ui32 Checksum; TVector<TChunkIdx> CommitChunks; @@ -142,7 +142,7 @@ class TFakeVDisk } else { str << " "; } - str << x.Lsn << ":" << x.Signature.ToString() << ":" << x.DataLen; + str << x.Lsn << ":" << x.Signature.ToString() << ":" << x.DataLen; } str << "] InFlight# ["; first = true; @@ -152,7 +152,7 @@ class TFakeVDisk } else { str << " "; } - str << x.Lsn << ":" << x.Signature.ToString() << ":" << x.DataLen; + str << x.Lsn << ":" << x.Signature.ToString() << ":" << x.DataLen; } str << "] FirstLsnToKeep# " << FirstLsnToKeep << "}"; return str.Str(); @@ -265,8 +265,8 @@ class TFakeVDisk ui32 InFlightLog = 0; - ui32 LogsSent = 0; - + ui32 LogsSent = 0; + ui64 NextWriteCookie = 1; ui32 ReadMsgPending = 0; @@ -274,15 +274,15 @@ class TFakeVDisk bool StateVerified = false; public: - TFakeVDisk(const TVDiskID& vdiskId, const TActorId& pdiskServiceId, ui64 pdiskGuid, TStateManager *stateManager, - TFakeVDiskParams params) + TFakeVDisk(const TVDiskID& vdiskId, const TActorId& pdiskServiceId, ui64 pdiskGuid, TStateManager *stateManager, + TFakeVDiskParams params) : TActor<TFakeVDisk>(&TFakeVDisk::StateFunc) , TObjectWithState(Sprintf("vdisk[%s]", vdiskId.ToString().data())) , VDiskId(vdiskId) , PDiskServiceId(pdiskServiceId) , PDiskGuid(pdiskGuid) , StateManager(stateManager) - , Params(params) + , Params(params) , Recovered(DeserializeRecoveredState()) { // TStringStream str; @@ -320,10 +320,10 @@ public: }); } - TString SelfInfo() const { - return TStringBuilder() << " VDiskId# " << VDiskId.ToStringWOGeneration() << " Owner# " << PDiskParams->Owner; - } - + TString SelfInfo() const { + return TStringBuilder() << " VDiskId# " << VDiskId.ToStringWOGeneration() << " Owner# " << PDiskParams->Owner; + } + void Bootstrap(const TActorContext& ctx) { Become(&TFakeVDisk::StateFunc); SendPDiskRequest(ctx, new NPDisk::TEvYardInit(2, VDiskId, PDiskGuid), [] {}); @@ -335,26 +335,26 @@ public: PDiskParams = msg->PDiskParams; State.BlocksInChunk = PDiskParams->ChunkSize / PDiskParams->AppendBlockSize; - THashSet<TChunkIdx> owned(msg->OwnedChunks.begin(), msg->OwnedChunks.end()); - for (const auto& [idx, info] : Recovered.Chunks) { - if (info.GetCommitState() == ECommitState::COMMITTED) { - Y_VERIFY_S(owned.count(idx), SelfInfo() << " has commited chunk# " << idx << " from Recovered.Chunks," - << " but can't find it in OwnedChunks list from PDisk"); - owned.erase(idx); - } - } - for (auto idx : owned) { - auto it = Recovered.Chunks.find(idx); - Y_VERIFY_S(it != Recovered.Chunks.end(), SelfInfo() << " has owned chunk# " << idx - << " from PDisks's OwnedChunks, but can't find it in Recovered list"); - - auto& info = it->second; - Y_VERIFY(info.GetCommitState() == ECommitState::COMMIT_IN_PROGRESS || - info.GetCommitState() == ECommitState::DELETE_IN_PROGRESS); - } - + THashSet<TChunkIdx> owned(msg->OwnedChunks.begin(), msg->OwnedChunks.end()); + for (const auto& [idx, info] : Recovered.Chunks) { + if (info.GetCommitState() == ECommitState::COMMITTED) { + Y_VERIFY_S(owned.count(idx), SelfInfo() << " has commited chunk# " << idx << " from Recovered.Chunks," + << " but can't find it in OwnedChunks list from PDisk"); + owned.erase(idx); + } + } + for (auto idx : owned) { + auto it = Recovered.Chunks.find(idx); + Y_VERIFY_S(it != Recovered.Chunks.end(), SelfInfo() << " has owned chunk# " << idx + << " from PDisks's OwnedChunks, but can't find it in Recovered list"); + + auto& info = it->second; + Y_VERIFY(info.GetCommitState() == ECommitState::COMMIT_IN_PROGRESS || + info.GetCommitState() == ECommitState::DELETE_IN_PROGRESS); + } + TStringStream str; - str << SelfInfo() << " starting, owned chunks# " << FormatList(msg->OwnedChunks) << Endl; + str << SelfInfo() << " starting, owned chunks# " << FormatList(msg->OwnedChunks) << Endl; Cerr << str.Str(); IssueReadLogRequest(ctx); @@ -374,9 +374,9 @@ public: Lsn = item.Lsn; TStringStream str; - str << "TEvReadLogResult " << SelfInfo() << " Lsn# " << item.Lsn << " Len# " << item.Data.size() - << " ChunkCommitSignature# " << (item.Signature.HasCommitRecord() ? "true" : "false") - << Endl; + str << "TEvReadLogResult " << SelfInfo() << " Lsn# " << item.Lsn << " Len# " << item.Data.size() + << " ChunkCommitSignature# " << (item.Signature.HasCommitRecord() ? "true" : "false") + << Endl; Cerr << str.Str(); State.Confirmed.insert(TLogRecord{item.Lsn, item.Signature, (ui32)item.Data.size(), @@ -401,15 +401,15 @@ public: // there is a record in current set (recovered from PDisk), but not in previous set (stored state); there // may be an item in flight? auto it = Recovered.InFlight.find(*current); - Y_VERIFY_S(it != Recovered.InFlight.end(), "unexpected log record " << SelfInfo() << " Lsn# " << current->Lsn - << " Signature# " << current->Signature.ToString()); - Y_VERIFY_S(it->DataLen == current->DataLen && it->Checksum == current->Checksum && - it->Signature == current->Signature, SelfInfo() << "Lsn# " << it->Lsn - << " InFlightData# " << it->DataLen << " StoredData# " << current->DataLen); + Y_VERIFY_S(it != Recovered.InFlight.end(), "unexpected log record " << SelfInfo() << " Lsn# " << current->Lsn + << " Signature# " << current->Signature.ToString()); + Y_VERIFY_S(it->DataLen == current->DataLen && it->Checksum == current->Checksum && + it->Signature == current->Signature, SelfInfo() << "Lsn# " << it->Lsn + << " InFlightData# " << it->DataLen << " StoredData# " << current->DataLen); } else if (prev) { // lost item if (prev->Lsn >= Recovered.FirstLsnToKeep) { - Y_FAIL_S("lost item Owner# " << PDiskParams->Owner << " Lsn# " << prev->Lsn); + Y_FAIL_S("lost item Owner# " << PDiskParams->Owner << " Lsn# " << prev->Lsn); } } } @@ -473,7 +473,7 @@ public: void Activity(const TActorContext& ctx) { while (InFlightLog < 10) { ui64 writeLogScore = 100; - ui64 allocateScore = State.Chunks.size() < 20 ? 10 : 0; + ui64 allocateScore = State.Chunks.size() < 20 ? 10 : 0; ui64 writeScore = State.Chunks.empty() ? 0 : 5; ui64 totalScore = writeLogScore + allocateScore + writeScore; @@ -483,10 +483,10 @@ public: } ui64 option = RandomNumber<ui64>(totalScore); - if (Params.LogsToBeSent && LogsSent >= Params.LogsToBeSent) { - break; - } else if (option < writeLogScore) { - IssueLogMessage(1, ctx); + if (Params.LogsToBeSent && LogsSent >= Params.LogsToBeSent) { + break; + } else if (option < writeLogScore) { + IssueLogMessage(1, ctx); } else if ((option -= writeLogScore) < allocateScore) { IssueAllocateRequest(ctx); } else if ((option -= allocateScore) < writeScore) { @@ -497,8 +497,8 @@ public: } } - void IssueLogMessage(TLogSignature signature, const TActorContext& ctx) { - ui32 size = Params.SizeMin + RandomNumber<ui32>(Params.SizeMax - Params.SizeMin + 1); + void IssueLogMessage(TLogSignature signature, const TActorContext& ctx) { + ui32 size = Params.SizeMin + RandomNumber<ui32>(Params.SizeMax - Params.SizeMin + 1); TString data = GenerateRandomDataBuffer(size); auto *info = new TLogRecord; @@ -513,16 +513,16 @@ public: TState::TChunkInfo& chunk = pair.second; switch (chunk.GetCommitState()) { case ECommitState::RESERVED: - // reserved chunk is a subject for commit; 1% chance to commit chunk - if (RandomNumber<double>() < 0.01) { + // reserved chunk is a subject for commit; 1% chance to commit chunk + if (RandomNumber<double>() < 0.01) { info->CommitChunks.push_back(chunkIdx); chunk.SetCommitState(ECommitState::COMMIT_IN_PROGRESS); } break; case ECommitState::COMMITTED: - // committed chunk is a subject for deletion; 0.5% change to delete chunk - if (RandomNumber<double>() < 0.005) { + // committed chunk is a subject for deletion; 0.5% change to delete chunk + if (RandomNumber<double>() < 0.005) { bool hasWrites = false; for (const TWriteRecord& w : State.WritesInFlight) { if (w.ChunkIdx == chunkIdx) { @@ -546,8 +546,8 @@ public: NPDisk::TCommitRecord cr; // advance LSN every 30000 items avg - if (Lsn > Params.LsnToKeepCount && RandomNumber<double>() < Params.LogCutProbability) { - cr.FirstLsnToKeep = Lsn - Params.LsnToKeepCount; + if (Lsn > Params.LsnToKeepCount && RandomNumber<double>() < Params.LogCutProbability) { + cr.FirstLsnToKeep = Lsn - Params.LsnToKeepCount; cr.IsStartingPoint = true; // make starting point if we cut log } @@ -558,24 +558,24 @@ public: if (cr.FirstLsnToKeep) { TStringStream str; - str << SelfInfo() << " FirstLsnToKeep# " << cr.FirstLsnToKeep << Endl; + str << SelfInfo() << " FirstLsnToKeep# " << cr.FirstLsnToKeep << Endl; Cerr << str.Str(); } TStringStream msg; - msg << "TEvLog " << SelfInfo() << " Lsn# " << Lsn << " Size# " << info->DataLen; - auto printChunks = [] (TVector<TChunkIdx> chunks) { - bool first = true; - TStringStream str; - for (TChunkIdx chunk : chunks) { - str << (first ? first = false, "" : " ") << chunk; - } - return str.Str(); - }; - msg << " Commit# " << printChunks(cr.CommitChunks) << " Delete# " << printChunks(cr.DeleteChunks) - << " IsStartingPoint# " << (cr.IsStartingPoint ? "true" : "false") << Endl; - if (cr.IsStartingPoint || cr.CommitChunks || cr.DeleteChunks) { - Cerr << msg.Str(); + msg << "TEvLog " << SelfInfo() << " Lsn# " << Lsn << " Size# " << info->DataLen; + auto printChunks = [] (TVector<TChunkIdx> chunks) { + bool first = true; + TStringStream str; + for (TChunkIdx chunk : chunks) { + str << (first ? first = false, "" : " ") << chunk; + } + return str.Str(); + }; + msg << " Commit# " << printChunks(cr.CommitChunks) << " Delete# " << printChunks(cr.DeleteChunks) + << " IsStartingPoint# " << (cr.IsStartingPoint ? "true" : "false") << Endl; + if (cr.IsStartingPoint || cr.CommitChunks || cr.DeleteChunks) { + Cerr << msg.Str(); } auto lsn = Lsn++; @@ -588,7 +588,7 @@ public: } }); - ++LogsSent; + ++LogsSent; ++InFlightLog; } @@ -631,7 +631,7 @@ public: }); TStringStream str; - str << "TEvLogResult " << SelfInfo() << " Lsn# " << result.Lsn << + str << "TEvLogResult " << SelfInfo() << " Lsn# " << result.Lsn << " Status# " << NKikimrProto::EReplyStatus_Name(msg->Status) << Endl; Cerr << str.Str(); } @@ -903,7 +903,7 @@ public: } }; -IActor *CreateFakeVDisk(const TVDiskID& vdiskId, const TActorId& pdiskServiceId, ui64 pdiskGuid, - TStateManager *stateManager, TFakeVDiskParams params) { - return new TFakeVDisk(vdiskId, pdiskServiceId, pdiskGuid, stateManager, params); +IActor *CreateFakeVDisk(const TVDiskID& vdiskId, const TActorId& pdiskServiceId, ui64 pdiskGuid, + TStateManager *stateManager, TFakeVDiskParams params) { + return new TFakeVDisk(vdiskId, pdiskServiceId, pdiskGuid, stateManager, params); } diff --git a/ydb/core/blobstorage/ut_pdiskfit/lib/basic_test.h b/ydb/core/blobstorage/ut_pdiskfit/lib/basic_test.h index 7031b1cadd..17a25ef9fb 100644 --- a/ydb/core/blobstorage/ut_pdiskfit/lib/basic_test.h +++ b/ydb/core/blobstorage/ut_pdiskfit/lib/basic_test.h @@ -13,21 +13,21 @@ using namespace NKikimr; class TStateManager; -struct TFakeVDiskParams { - // 0 means no limit - ui32 LogsToBeSent = 0; - - // LogRecord size distribution - ui32 SizeMin = 1000; - ui32 SizeMax = 2000; - - ui32 LsnToKeepCount = 1000; - double LogCutProbability = 1.0 / 30000; -}; - - +struct TFakeVDiskParams { + // 0 means no limit + ui32 LogsToBeSent = 0; + + // LogRecord size distribution + ui32 SizeMin = 1000; + ui32 SizeMax = 2000; + + ui32 LsnToKeepCount = 1000; + double LogCutProbability = 1.0 / 30000; +}; + + IActor *CreateFakeVDisk(const TVDiskID& vdiskId, const TActorId& pdiskServiceId, ui64 pdiskGuid, - TStateManager *stateManager, TFakeVDiskParams params); + TStateManager *stateManager, TFakeVDiskParams params); class TBasicTest : public TActorBootstrapped<TBasicTest> { TAutoEvent *StopEvent = nullptr; @@ -36,12 +36,12 @@ class TBasicTest : public TActorBootstrapped<TBasicTest> { TIntrusivePtr<TPDiskConfig> PDiskConfig; TActorId PDiskServiceId; const ui32 NumVDisks; - bool InduceLogSplicing; + bool InduceLogSplicing; public: - TBasicTest(ui32 numVDisks, bool induceLogSplicing) + TBasicTest(ui32 numVDisks, bool induceLogSplicing) : NumVDisks(numVDisks) - , InduceLogSplicing(induceLogSplicing) + , InduceLogSplicing(induceLogSplicing) {} template<typename TEnv> @@ -49,8 +49,8 @@ public: StopEvent = stopEvent; StateManager = stateManager; Counters = env->Counters; - PDiskConfig = new TPDiskConfig(env->PDiskFilePath, env->PDiskGuid, 1, - TPDiskCategory(TPDiskCategory::DEVICE_TYPE_ROT, 0).GetRaw()); + PDiskConfig = new TPDiskConfig(env->PDiskFilePath, env->PDiskGuid, 1, + TPDiskCategory(TPDiskCategory::DEVICE_TYPE_ROT, 0).GetRaw()); PDiskConfig->GetDriveDataSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; PDiskConfig->WriteCacheSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; env->ActorSystem->Register(this); @@ -61,17 +61,17 @@ public: TVector<TActorId> actors; for (ui32 i = 0; i < NumVDisks; ++i) { TVDiskID vdiskId(i, 0, 0, 0, 0); - TFakeVDiskParams params; - if (InduceLogSplicing) { - params.LogCutProbability = 1e-3; - params.SizeMin = 4000; - params.SizeMax = 4000; - if (i == 0) { - params.LogsToBeSent = 100; - } - } + TFakeVDiskParams params; + if (InduceLogSplicing) { + params.LogCutProbability = 1e-3; + params.SizeMin = 4000; + params.SizeMax = 4000; + if (i == 0) { + params.LogsToBeSent = 100; + } + } TActorId actorId = ctx.ExecutorThread.ActorSystem->Register(CreateFakeVDisk(vdiskId, PDiskServiceId, - PDiskConfig->PDiskGuid, StateManager, params)); + PDiskConfig->PDiskGuid, StateManager, params)); actors.push_back(actorId); } for (const TActorId& actor : actors) { diff --git a/ydb/core/blobstorage/ut_pdiskfit/lib/fail_injection_test.h b/ydb/core/blobstorage/ut_pdiskfit/lib/fail_injection_test.h index 898e28d8f6..7e0bfa05e0 100644 --- a/ydb/core/blobstorage/ut_pdiskfit/lib/fail_injection_test.h +++ b/ydb/core/blobstorage/ut_pdiskfit/lib/fail_injection_test.h @@ -77,30 +77,30 @@ private: } }; -class TFailCounterGenerator { - -}; - -ui32 GenerateFailCounter(bool frequentFails) { - TReallyFastRng32 rng(Now().GetValue()); - - double p = (rng() % (1000 * 1000 * 1000)) / 1e9; - - if (frequentFails) { - return p < 0.05 ? rng() % 10 + 1 - : p < 0.10 ? rng() % 1000 + 1000 : - rng() % 5000 + 5000; - } else { - return p < 0.9 ? rng() % 10 + 1 - : p < 0.99 ? rng() % 100 + 100 : - rng() % 1000 + 1000; - } -} - +class TFailCounterGenerator { + +}; + +ui32 GenerateFailCounter(bool frequentFails) { + TReallyFastRng32 rng(Now().GetValue()); + + double p = (rng() % (1000 * 1000 * 1000)) / 1e9; + + if (frequentFails) { + return p < 0.05 ? rng() % 10 + 1 + : p < 0.10 ? rng() % 1000 + 1000 : + rng() % 5000 + 5000; + } else { + return p < 0.9 ? rng() % 10 + 1 + : p < 0.99 ? rng() % 100 + 100 : + rng() % 1000 + 1000; + } +} + struct TPDiskFailureInjectionTest { // default values for unit test ui32 NumIterations = 10; // 0 = unlimited - ui32 NumFailsInIteration = 1000; // 0 = unlimited + ui32 NumFailsInIteration = 1000; // 0 = unlimited TTempDir TempDir; TString PDiskFilePath; ui64 DiskSize = 16ULL << 30; // 10 GB @@ -113,7 +113,7 @@ struct TPDiskFailureInjectionTest { TProgramShouldContinue KikimrShouldContinue; std::unique_ptr<NKikimr::TAppData> AppData; - std::shared_ptr<NKikimr::NPDisk::IIoContextFactory> IoContext; + std::shared_ptr<NKikimr::NPDisk::IIoContextFactory> IoContext; std::unique_ptr<NActors::TActorSystem> ActorSystem; TAutoEvent StopEvent; @@ -170,8 +170,8 @@ struct TPDiskFailureInjectionTest { // initialize app data with pool ids and registries AppData.reset(new NKikimr::TAppData(0u, 1u, 2u, 3u, {}, nullptr, nullptr, nullptr, &KikimrShouldContinue)); - IoContext = std::make_shared<NKikimr::NPDisk::TIoContextFactoryOSS>(); - AppData->IoContextFactory = IoContext.get(); + IoContext = std::make_shared<NKikimr::NPDisk::TIoContextFactoryOSS>(); + AppData->IoContextFactory = IoContext.get(); // create actor system setup environment auto setup = MakeHolder<TActorSystemSetup>(); @@ -202,10 +202,10 @@ struct TPDiskFailureInjectionTest { NKikimrServices::EServiceKikimr_Name ); - TString explanation; - loggerSettings->SetLevel(NActors::NLog::PRI_INFO, NKikimrServices::BS_PDISK, explanation); - loggerSettings->SetLevel(NActors::NLog::PRI_DEBUG, NKikimrServices::BS_PDISK_TEST, explanation); - + TString explanation; + loggerSettings->SetLevel(NActors::NLog::PRI_INFO, NKikimrServices::BS_PDISK, explanation); + loggerSettings->SetLevel(NActors::NLog::PRI_DEBUG, NKikimrServices::BS_PDISK_TEST, explanation); + // create/register logger actor auto logger = std::make_unique<TLoggerActor>(loggerSettings, CreateStderrBackend(), Counters->GetSubgroup("logger", "counters")); @@ -224,9 +224,9 @@ struct TPDiskFailureInjectionTest { test->Run(this, &StopEvent, stateManager); } - //template<bool FREQUENT_FAILS, typename TTest, typename... TArgs> + //template<bool FREQUENT_FAILS, typename TTest, typename... TArgs> template<typename TTest, typename... TArgs> - void RunCycle(bool frequentFails, TArgs&&... args) { + void RunCycle(bool frequentFails, TArgs&&... args) { TInstant startTime = TInstant::Now(); for (ui32 iteration = 0; NumIterations == 0 || iteration < NumIterations; ++iteration) { @@ -259,7 +259,7 @@ struct TPDiskFailureInjectionTest { TReallyFastRng32 rng(Now().GetValue()); - ui32 failCounter = GenerateFailCounter(frequentFails); + ui32 failCounter = GenerateFailCounter(frequentFails); injector.SetFailCounter(failCounter); Cerr << "failCounter# " << failCounter << Endl; diff --git a/ydb/core/blobstorage/ut_pdiskfit/pdiskfit/pdiskfit.cpp b/ydb/core/blobstorage/ut_pdiskfit/pdiskfit/pdiskfit.cpp index 95b86ec67e..cb4ec086f9 100644 --- a/ydb/core/blobstorage/ut_pdiskfit/pdiskfit/pdiskfit.cpp +++ b/ydb/core/blobstorage/ut_pdiskfit/pdiskfit/pdiskfit.cpp @@ -87,7 +87,7 @@ int main(int argc, char *argv[]) { fit.ErasureEncode = erasureEncode; if (test == "basic") { - fit.RunCycle<TBasicTest>(false, numVDisks, false); + fit.RunCycle<TBasicTest>(false, numVDisks, false); } else { Cerr << "unknown test type " << test << Endl; return 1; diff --git a/ydb/core/blobstorage/ut_pdiskfit/ut/main.cpp b/ydb/core/blobstorage/ut_pdiskfit/ut/main.cpp index 04ee46176f..011d7cf839 100644 --- a/ydb/core/blobstorage/ut_pdiskfit/ut/main.cpp +++ b/ydb/core/blobstorage/ut_pdiskfit/ut/main.cpp @@ -7,11 +7,11 @@ class TWatchdogThread : public ISimpleThread { TMutex Mutex; TCondVar Stop; - TAtomic QuitFlag = 0; + TAtomic QuitFlag = 0; public: ~TWatchdogThread() { - AtomicSet(QuitFlag, 1); + AtomicSet(QuitFlag, 1); with_lock (Mutex) { Stop.Signal(); } @@ -23,7 +23,7 @@ public: with_lock (Mutex) { do { Cerr << Sprintf("Watchdog# %s\n", TInstant::Now().ToString().data()); - } while (!AtomicGet(QuitFlag) && !Stop.WaitT(Mutex, TDuration::Seconds(5))); + } while (!AtomicGet(QuitFlag) && !Stop.WaitT(Mutex, TDuration::Seconds(5))); } return nullptr; @@ -35,23 +35,23 @@ Y_UNIT_TEST_SUITE(TPDiskFIT) { TWatchdogThread watchdog; watchdog.Start(); TPDiskFailureInjectionTest test; - test.TestDuration = NSan::PlainOrUnderSanitizer(TDuration::Minutes(4), TDuration::Minutes(3)); - test.RunCycle<TBasicTest>(false, 8, false); + test.TestDuration = NSan::PlainOrUnderSanitizer(TDuration::Minutes(4), TDuration::Minutes(3)); + test.RunCycle<TBasicTest>(false, 8, false); + } + + Y_UNIT_TEST(FailTest) { + TWatchdogThread watchdog; + watchdog.Start(); + TPDiskFailureInjectionTest test; + test.TestDuration = NSan::PlainOrUnderSanitizer(TDuration::Minutes(4), TDuration::Minutes(3)); + test.RunCycle<TBasicTest>(true, 8, false); + } + + Y_UNIT_TEST(LogSpliceError) { + TWatchdogThread watchdog; + watchdog.Start(); + TPDiskFailureInjectionTest test; + test.TestDuration = NSan::PlainOrUnderSanitizer(TDuration::Minutes(4), TDuration::Minutes(3)); + test.RunCycle<TBasicTest>(true, 8, true); } - - Y_UNIT_TEST(FailTest) { - TWatchdogThread watchdog; - watchdog.Start(); - TPDiskFailureInjectionTest test; - test.TestDuration = NSan::PlainOrUnderSanitizer(TDuration::Minutes(4), TDuration::Minutes(3)); - test.RunCycle<TBasicTest>(true, 8, false); - } - - Y_UNIT_TEST(LogSpliceError) { - TWatchdogThread watchdog; - watchdog.Start(); - TPDiskFailureInjectionTest test; - test.TestDuration = NSan::PlainOrUnderSanitizer(TDuration::Minutes(4), TDuration::Minutes(3)); - test.RunCycle<TBasicTest>(true, 8, true); - } } diff --git a/ydb/core/blobstorage/ut_pdiskfit/ut/ya.make b/ydb/core/blobstorage/ut_pdiskfit/ut/ya.make index 420a833964..03e653bb4b 100644 --- a/ydb/core/blobstorage/ut_pdiskfit/ut/ya.make +++ b/ydb/core/blobstorage/ut_pdiskfit/ut/ya.make @@ -6,11 +6,11 @@ OWNER( IF (OS_LINUX AND NOT WITH_VALGRIND) UNITTEST() - TIMEOUT(1200) + TIMEOUT(1200) - SIZE(LARGE) + SIZE(LARGE) - TAG(ya:fat) + TAG(ya:fat) IF (BUILD_TYPE == "RELEASE") SRCS( diff --git a/ydb/core/blobstorage/ut_vdisk/lib/astest.h b/ydb/core/blobstorage/ut_vdisk/lib/astest.h index 919271c2ce..2cf254eff2 100644 --- a/ydb/core/blobstorage/ut_vdisk/lib/astest.h +++ b/ydb/core/blobstorage/ut_vdisk/lib/astest.h @@ -35,7 +35,7 @@ private: TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; std::unique_ptr<NActors::TMon> Monitoring; std::unique_ptr<NKikimr::TAppData> AppData; - std::shared_ptr<NKikimr::NPDisk::IIoContextFactory> IoContext; + std::shared_ptr<NKikimr::NPDisk::IIoContextFactory> IoContext; std::unique_ptr<NActors::TActorSystem> ActorSystem1; TSystemEvent DoneEvent { TSystemEvent::rAuto }; public: @@ -121,8 +121,8 @@ inline void TTestWithActorSystem::Run(NActors::IActor *testActor) { nullptr, nullptr, &KikimrShouldContinue)); AppData->Counters = Counters; AppData->Mon = Monitoring.get(); - IoContext = std::make_shared<NKikimr::NPDisk::TIoContextFactoryOSS>(); - AppData->IoContextFactory = IoContext.get(); + IoContext = std::make_shared<NKikimr::NPDisk::TIoContextFactoryOSS>(); + AppData->IoContextFactory = IoContext.get(); ActorSystem1.reset(new TActorSystem(setup1, AppData.get(), logSettings)); loggerActor->Log(Now(), NKikimr::NLog::PRI_NOTICE, NActorsServices::TEST, "Actor system created"); diff --git a/ydb/core/blobstorage/ut_vdisk/lib/prepare.cpp b/ydb/core/blobstorage/ut_vdisk/lib/prepare.cpp index c52917a279..b2c020c3d2 100644 --- a/ydb/core/blobstorage/ut_vdisk/lib/prepare.cpp +++ b/ydb/core/blobstorage/ut_vdisk/lib/prepare.cpp @@ -24,33 +24,33 @@ using namespace NKikimr; ////////////////////////////////////////////////////////////////////////////////////// TAllPDisksConfiguration TAllPDisksConfiguration::MkDevice(const TString &devicePath, ui32 chunkSize, - TString deviceType) { - return TAllPDisksConfiguration(1, chunkSize, 0, TString(), devicePath, deviceType); + TString deviceType) { + return TAllPDisksConfiguration(1, chunkSize, 0, TString(), devicePath, deviceType); } TAllPDisksConfiguration TAllPDisksConfiguration::MkOneTmp(ui32 chunkSize, ui64 diskSize, - TString deviceType) { - return TAllPDisksConfiguration(1, chunkSize, diskSize, TString(), TString(), deviceType); + TString deviceType) { + return TAllPDisksConfiguration(1, chunkSize, diskSize, TString(), TString(), deviceType); } TAllPDisksConfiguration TAllPDisksConfiguration::MkManyTmp(ui32 pDisksNum, ui32 chunkSize, ui64 diskSize, - TString deviceType) { + TString deviceType) { Y_ASSERT(pDisksNum > 0); - return TAllPDisksConfiguration(pDisksNum, chunkSize, diskSize, TString(), TString(), deviceType); + return TAllPDisksConfiguration(pDisksNum, chunkSize, diskSize, TString(), TString(), deviceType); } TAllPDisksConfiguration::TAllPDisksConfiguration(ui32 num, ui32 chunkSize, ui64 diskSize, const TString &dir, const TString &device, - TString deviceType) + TString deviceType) : PDisksNum(num) , ChunkSize(chunkSize) , DiskSize(diskSize) , Dir(dir) , Device(device) - , DeviceType(deviceType) + , DeviceType(deviceType) {} @@ -82,7 +82,7 @@ void TOnePDisk::FormatDisk(bool force) { sysLogKey, // sysLogKey NPDisk::YdbDefaultPDiskSequence, // mainKey "", // textMessage - false // isErasureEncode + false // isErasureEncode ); } } @@ -151,9 +151,9 @@ void TAllPDisks::ActorSetupCmd(NActors::TActorSystemSetup *setup, ui32 node, TOnePDisk &inst = PDisks[i]; inst.PDiskActorID = MakeBlobStoragePDiskID(node, i); TIntrusivePtr<TPDiskConfig> pDiskConfig; - TPDiskCategory::EDeviceType deviceType = TPDiskCategory::DeviceTypeFromStr(Cfg.DeviceType); + TPDiskCategory::EDeviceType deviceType = TPDiskCategory::DeviceTypeFromStr(Cfg.DeviceType); pDiskConfig.Reset(new TPDiskConfig(inst.Filename, inst.PDiskGuid, inst.PDiskID, - TPDiskCategory(deviceType, 0).GetRaw())); + TPDiskCategory(deviceType, 0).GetRaw())); pDiskConfig->GetDriveDataSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; pDiskConfig->WriteCacheSwitch = NKikimrBlobStorage::TPDiskConfig::DoNotTouch; TActorSetupCmd pDiskSetup(CreatePDisk(pDiskConfig.Get(), @@ -241,7 +241,7 @@ bool TDefaultVDiskSetup::SetUp(TAllVDisks::TVDiskInstance &vdisk, TAllPDisks *pd vdisk.ActorID = MakeBlobStorageVDiskID(1, id + 1, 0); vdisk.VDiskID = TVDiskID(0, 1, 0, d, j); - NKikimr::TVDiskConfig::TBaseInfo baseInfo(vdisk.VDiskID, pdisk.PDiskActorID, pdisk.PDiskGuid, + NKikimr::TVDiskConfig::TBaseInfo baseInfo(vdisk.VDiskID, pdisk.PDiskActorID, pdisk.PDiskGuid, pdisk.PDiskID, NKikimr::TPDiskCategory::DEVICE_TYPE_ROT, slotId, NKikimrBlobStorage::TVDiskKind::Default, initOwnerRound, {}); vdisk.Cfg = MakeIntrusive<NKikimr::TVDiskConfig>(baseInfo); @@ -374,9 +374,9 @@ void TConfiguration::Prepare(IVDiskSetup *vdiskSetup, bool newPDisks, bool runRe nullptr, nullptr, &KikimrShouldContinue)); AppData->Counters = Counters; AppData->Mon = Monitoring.get(); - IoContext = std::make_shared<NKikimr::NPDisk::TIoContextFactoryOSS>(); - AppData->IoContextFactory = IoContext.get(); - + IoContext = std::make_shared<NKikimr::NPDisk::TIoContextFactoryOSS>(); + AppData->IoContextFactory = IoContext.get(); + ActorSystem1.reset(new TActorSystem(setup1, AppData.get(), logSettings)); Monitoring->RegisterActorPage(actorsMonPage, "logger", "Logger", false, ActorSystem1.get(), loggerActorId); loggerActor->Log(Now(), NKikimr::NLog::PRI_NOTICE, NActorsServices::TEST, "Actor system created"); diff --git a/ydb/core/blobstorage/ut_vdisk/lib/prepare.h b/ydb/core/blobstorage/ut_vdisk/lib/prepare.h index 700c9d444b..fc5da8d342 100644 --- a/ydb/core/blobstorage/ut_vdisk/lib/prepare.h +++ b/ydb/core/blobstorage/ut_vdisk/lib/prepare.h @@ -24,21 +24,21 @@ struct TAllPDisksConfiguration { const ui64 DiskSize; const TString Dir; const TString Device; - const TString DeviceType; + const TString DeviceType; static TAllPDisksConfiguration MkDevice(const TString &devicePath, ui32 chunkSize, - TString deviceType); - static TAllPDisksConfiguration MkOneTmp(ui32 chunkSize, ui64 diskSize, - TString deviceType); + TString deviceType); + static TAllPDisksConfiguration MkOneTmp(ui32 chunkSize, ui64 diskSize, + TString deviceType); static TAllPDisksConfiguration MkManyTmp(ui32 pDisksNum, ui32 chunkSize, ui64 diskSize, - TString deviceType); + TString deviceType); TAllPDisksConfiguration(const TAllPDisksConfiguration &) = default; private: TAllPDisksConfiguration(ui32 num, ui32 chunkSize, ui64 diskSize, - const TString &dir, const TString &device, - TString deviceType); + const TString &dir, const TString &device, + TString deviceType); }; ////////////////////////////////////////////////////////////////////////////////////// @@ -156,7 +156,7 @@ struct TConfiguration { TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; std::unique_ptr<NActors::TMon> Monitoring; std::unique_ptr<NKikimr::TAppData> AppData; - std::shared_ptr<NKikimr::NPDisk::IIoContextFactory> IoContext; + std::shared_ptr<NKikimr::NPDisk::IIoContextFactory> IoContext; std::unique_ptr<NActors::TActorSystem> ActorSystem1; std::unique_ptr<TAllPDisks> PDisks; std::unique_ptr<TAllVDisks> VDisks; @@ -173,7 +173,7 @@ struct TConfiguration { TCondVar TimeoutCallbacksCV; TConfiguration(const TAllPDisksConfiguration &pcfg = - TAllPDisksConfiguration::MkOneTmp(512u << 10u, 16ull << 30ull, "ROT"), + TAllPDisksConfiguration::MkOneTmp(512u << 10u, 16ull << 30ull, "ROT"), ui32 domainsNum = 4u, ui32 disksInDomain = 2u, NKikimr::TErasureType::EErasureSpecies erasure = diff --git a/ydb/core/blobstorage/ut_vdisk/lib/test_brokendevice.cpp b/ydb/core/blobstorage/ut_vdisk/lib/test_brokendevice.cpp index f40b5e24a5..74bfddbcab 100644 --- a/ydb/core/blobstorage/ut_vdisk/lib/test_brokendevice.cpp +++ b/ydb/core/blobstorage/ut_vdisk/lib/test_brokendevice.cpp @@ -107,7 +107,7 @@ private: HFunc(TEvBlobStorage::TEvVPutResult, BrokenState_Handle); CFunc(TEvents::TSystem::Wakeup, BrokenState_Timeout); IgnoreFunc(TEvBlobStorage::TEvVWindowChange); - IgnoreFunc(NPDisk::TEvYardControlResult); + IgnoreFunc(NPDisk::TEvYardControlResult); IgnoreFunc(TEvents::TEvUndelivered); ) diff --git a/ydb/core/blobstorage/ut_vdisk/lib/test_huge.cpp b/ydb/core/blobstorage/ut_vdisk/lib/test_huge.cpp index 3564a94fe0..7364483b0c 100644 --- a/ydb/core/blobstorage/ut_vdisk/lib/test_huge.cpp +++ b/ydb/core/blobstorage/ut_vdisk/lib/test_huge.cpp @@ -113,7 +113,7 @@ struct THugeModuleContext { // THugeModuleRecoveryActor ///////////////////////////////////////////////////////////////////////////////////////////////////////// class THugeModuleRecoveryActor : public TActorBootstrapped<THugeModuleRecoveryActor> { - using TStartingPoints = TMap<TLogSignature, NPDisk::TLogRecord>; + using TStartingPoints = TMap<TLogSignature, NPDisk::TLogRecord>; std::shared_ptr<THugeModuleContext> HmCtx; ui64 Lsn = 0; @@ -125,7 +125,7 @@ class THugeModuleRecoveryActor : public TActorBootstrapped<THugeModuleRecoveryAc auto &vDiskInstance = HmCtx->Conf->VDisks->Get(0); HmCtx->Config = vDiskInstance.Cfg; HmCtx->VCtx.Reset(new TVDiskContext(ctx.SelfID, HmCtx->Conf->GroupInfo->PickTopology(), HmCtx->Counters, - vDiskInstance.VDiskID, ctx.ExecutorThread.ActorSystem, TPDiskCategory::DEVICE_TYPE_UNKNOWN)); + vDiskInstance.VDiskID, ctx.ExecutorThread.ActorSystem, TPDiskCategory::DEVICE_TYPE_UNKNOWN)); TVDiskID selfVDiskID = HmCtx->Conf->GroupInfo->GetVDiskId(HmCtx->VCtx->ShortSelfVDisk); ctx.Send(HmCtx->Config->BaseInfo.PDiskActorID, @@ -142,7 +142,7 @@ class THugeModuleRecoveryActor : public TActorBootstrapped<THugeModuleRecoveryAc auto logFunc = [] (const TString) { /* empty */ }; TStartingPoints::const_iterator it; - it = startingPoints.find(TLogSignature::SignatureHugeBlobEntryPoint); + it = startingPoints.find(TLogSignature::SignatureHugeBlobEntryPoint); if (it == startingPoints.end()) { RepairedHuge = std::make_shared<THullHugeKeeperPersState>( HmCtx->VCtx, diff --git a/ydb/core/blobstorage/ut_vdisk/lib/test_repl.cpp b/ydb/core/blobstorage/ut_vdisk/lib/test_repl.cpp index c3e4d33f5c..ae4288ab25 100644 --- a/ydb/core/blobstorage/ut_vdisk/lib/test_repl.cpp +++ b/ydb/core/blobstorage/ut_vdisk/lib/test_repl.cpp @@ -199,7 +199,7 @@ private: TIntrusivePtr<NMonitoring::TDynamicCounters> counters = new NMonitoring::TDynamicCounters; auto groupInfo = TBlobStorageGroupInfo(TBlobStorageGroupType::ErasureMirror3, 2, 4); VCtx.Reset(new TVDiskContext(ctx.SelfID, groupInfo.PickTopology(), counters, VDiskInfo.VDiskID, - ctx.ExecutorThread.ActorSystem, TPDiskCategory::DEVICE_TYPE_UNKNOWN)); + ctx.ExecutorThread.ActorSystem, TPDiskCategory::DEVICE_TYPE_UNKNOWN)); ReplCtx = std::make_shared<TReplCtx>( VCtx, diff --git a/ydb/core/blobstorage/ut_vdisk/lib/test_synclog.cpp b/ydb/core/blobstorage/ut_vdisk/lib/test_synclog.cpp index abff3e0ae0..b5608c382d 100644 --- a/ydb/core/blobstorage/ut_vdisk/lib/test_synclog.cpp +++ b/ydb/core/blobstorage/ut_vdisk/lib/test_synclog.cpp @@ -68,7 +68,7 @@ class TDataWriterActor : public TActorBootstrapped<TDataWriterActor> { TIntrusivePtr<TEventSerializedData> buffers = serializer.Release(logCmd.IsExtendedFormat()); ctx.Send(TestCtx->LoggerId, new NPDisk::TEvLog(TestCtx->PDiskCtx->Dsk->Owner, TestCtx->PDiskCtx->Dsk->OwnerRound, - TLogSignature::SignatureBlock, buffers->GetString(), seg, nullptr)); + TLogSignature::SignatureBlock, buffers->GetString(), seg, nullptr)); // FIXME: problems on reboot ctx.Send(TestCtx->SyncLogId, new NSyncLog::TEvSyncLogPut(seg.Point(), tabletId, Generation, 0)); } @@ -166,7 +166,7 @@ class TSyncLogTestWriteActor : public TActorBootstrapped<TSyncLogTestWriteActor> auto &vDiskInstance = Conf->VDisks->Get(0); auto &groupInfo = Conf->GroupInfo; VCtx = MakeIntrusive<TVDiskContext>(ctx.SelfID, groupInfo->PickTopology(), counters, vDiskInstance.VDiskID, - ctx.ExecutorThread.ActorSystem, TPDiskCategory::DEVICE_TYPE_UNKNOWN); + ctx.ExecutorThread.ActorSystem, TPDiskCategory::DEVICE_TYPE_UNKNOWN); VDiskConfig = vDiskInstance.Cfg; TestCtx->SelfVDiskId = groupInfo->GetVDiskId(VCtx->ShortSelfVDisk); diff --git a/ydb/core/blobstorage/ut_vdisk/lib/vdisk_mock.cpp b/ydb/core/blobstorage/ut_vdisk/lib/vdisk_mock.cpp index 9158506702..0d68629c64 100644 --- a/ydb/core/blobstorage/ut_vdisk/lib/vdisk_mock.cpp +++ b/ydb/core/blobstorage/ut_vdisk/lib/vdisk_mock.cpp @@ -37,7 +37,7 @@ public: void Bootstrap(const TActorContext& ctx) { VCtx.Reset(new TVDiskContext(ctx.SelfID, Top, new NMonitoring::TDynamicCounters, VDiskId, - ctx.ExecutorThread.ActorSystem, TPDiskCategory::DEVICE_TYPE_UNKNOWN)); + ctx.ExecutorThread.ActorSystem, TPDiskCategory::DEVICE_TYPE_UNKNOWN)); Become(&TVDiskMockActor::StateFunc); } diff --git a/ydb/core/blobstorage/ut_vdisk/vdisk_test.cpp b/ydb/core/blobstorage/ut_vdisk/vdisk_test.cpp index 7bee869de9..694828c9f8 100644 --- a/ydb/core/blobstorage/ut_vdisk/vdisk_test.cpp +++ b/ydb/core/blobstorage/ut_vdisk/vdisk_test.cpp @@ -32,7 +32,7 @@ void TestRun(TTest *test, ui32 disksInDomain = DefDisksInDomain, NKikimr::TErasureType::EErasureSpecies erasure = DefErasure) { - TConfiguration Conf(TAllPDisksConfiguration::MkOneTmp(chunkSize, diskSize, "ROT"), + TConfiguration Conf(TAllPDisksConfiguration::MkOneTmp(chunkSize, diskSize, "ROT"), domainsNum, disksInDomain, erasure); @@ -668,7 +668,7 @@ Y_UNIT_TEST_SUITE(TBsVDiskRepl1) { ui32 domainsNum = 4u; ui32 disksInDomain = 2u; ui32 pDisksNum = domainsNum * disksInDomain; - TConfiguration Conf(TAllPDisksConfiguration::MkManyTmp(pDisksNum, 512u << 10u, 16ull << 30ull, "ROT"), + TConfiguration Conf(TAllPDisksConfiguration::MkManyTmp(pDisksNum, 512u << 10u, 16ull << 30ull, "ROT"), domainsNum, disksInDomain); TFastVDiskSetup vdiskSetup; Conf.Prepare(&vdiskSetup); @@ -691,7 +691,7 @@ Y_UNIT_TEST_SUITE(TBsVDiskRepl2) { ui32 domainsNum = 4u; ui32 disksInDomain = 2u; ui32 pDisksNum = domainsNum * disksInDomain; - TConfiguration Conf(TAllPDisksConfiguration::MkManyTmp(pDisksNum, 512u << 10u, 16ull << 30ull, "ROT"), + TConfiguration Conf(TAllPDisksConfiguration::MkManyTmp(pDisksNum, 512u << 10u, 16ull << 30ull, "ROT"), domainsNum, disksInDomain); TFastVDiskSetup vdiskSetup; Conf.Prepare(&vdiskSetup); @@ -715,7 +715,7 @@ Y_UNIT_TEST_SUITE(TBsVDiskRepl3) { ui32 domainsNum = 4u; ui32 disksInDomain = 1u; ui32 pDisksNum = domainsNum * disksInDomain; - TConfiguration Conf(TAllPDisksConfiguration::MkManyTmp(pDisksNum, 512u << 10u, 16ull << 30ull, "ROT"), + TConfiguration Conf(TAllPDisksConfiguration::MkManyTmp(pDisksNum, 512u << 10u, 16ull << 30ull, "ROT"), domainsNum, disksInDomain); TFastVDiskSetup vdiskSetup; Conf.Prepare(&vdiskSetup); @@ -739,7 +739,7 @@ Y_UNIT_TEST_SUITE(TBsVDiskRepl3) { ui32 domainsNum = 4u; ui32 disksInDomain = 1u; ui32 pDisksNum = domainsNum * disksInDomain; - TConfiguration Conf(TAllPDisksConfiguration::MkManyTmp(pDisksNum, 512u << 10u, 1ull << 30ull, "ROT"), + TConfiguration Conf(TAllPDisksConfiguration::MkManyTmp(pDisksNum, 512u << 10u, 1ull << 30ull, "ROT"), domainsNum, disksInDomain); // Write some data to each disk { @@ -787,7 +787,7 @@ Y_UNIT_TEST_SUITE(TBsVDiskRepl3) { ui32 domainsNum = 4u; ui32 disksInDomain = 2u; ui32 pDisksNum = domainsNum * disksInDomain; - TConfiguration Conf(TAllPDisksConfiguration::MkManyTmp(pDisksNum, 16u << 20u, 16ull << 30ull, "ROT"), + TConfiguration Conf(TAllPDisksConfiguration::MkManyTmp(pDisksNum, 16u << 20u, 16ull << 30ull, "ROT"), domainsNum, disksInDomain); TFastVDiskSetupRepl vdiskSetup; Conf.Prepare(&vdiskSetup); diff --git a/ydb/core/blobstorage/ut_vdisk2/env.h b/ydb/core/blobstorage/ut_vdisk2/env.h index 0ab38d3991..4d53effb99 100644 --- a/ydb/core/blobstorage/ut_vdisk2/env.h +++ b/ydb/core/blobstorage/ut_vdisk2/env.h @@ -98,7 +98,7 @@ namespace NKikimr { Info.Reset(new TBlobStorageGroupInfo(TBlobStorageGroupType::ErasureNone, 1, 1, 1, &vdiskIds)); // create vdisk config - TVDiskConfig::TBaseInfo baseInfo(VDiskId, PDiskServiceId, PDiskGuid, PDiskId, + TVDiskConfig::TBaseInfo baseInfo(VDiskId, PDiskServiceId, PDiskGuid, PDiskId, TPDiskCategory::DEVICE_TYPE_SSD, VSlotId, NKikimrBlobStorage::TVDiskKind::Default, 1, "static"); VDiskConfig = AllVDiskKinds->MakeVDiskConfig(baseInfo); diff --git a/ydb/core/blobstorage/vdisk/common/blobstorage_dblogcutter.cpp b/ydb/core/blobstorage/vdisk/common/blobstorage_dblogcutter.cpp index a0fdac4135..5f176b61b7 100644 --- a/ydb/core/blobstorage/vdisk/common/blobstorage_dblogcutter.cpp +++ b/ydb/core/blobstorage/vdisk/common/blobstorage_dblogcutter.cpp @@ -130,7 +130,7 @@ namespace NKikimr { commitRec.FirstLsnToKeep = *freeUpToLsn; commitRec.IsStartingPoint = false; TLsnSeg seg = LogCutterCtx.LsnMngr->AllocLsnForLocalUse(); - ui8 signature = TLogSignature::SignatureHullCutLog; + ui8 signature = TLogSignature::SignatureHullCutLog; ctx.Send(LogCutterCtx.LoggerId, new NPDisk::TEvLog(LogCutterCtx.PDiskCtx->Dsk->Owner, LogCutterCtx.PDiskCtx->Dsk->OwnerRound, signature, commitRec, TString(), seg, nullptr)); diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_config.h b/ydb/core/blobstorage/vdisk/common/vdisk_config.h index 18a693fd18..ff4bb4f0d6 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_config.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_config.h @@ -26,7 +26,7 @@ namespace NKikimr { ui32 VDiskSlotId = 0; EKind Kind = NKikimrBlobStorage::TVDiskKind::Default; // name of the storage pool this VDisk belongs to - TString StoragePoolName; + TString StoragePoolName; // is the donor mode enabled for this disk? (no communication with group, actually, no group -- only reads) const bool DonorMode = false; // a set of donor disks for this one @@ -49,7 +49,7 @@ namespace NKikimr { ui32 vdiskSlotId, EKind kind, ui64 initOwnerRound, - TString storagePoolName, + TString storagePoolName, const bool donorMode = false, std::vector<std::pair<TVDiskID, TActorId>> donorDiskIds = {}, ui64 scrubCookie = 0, @@ -62,7 +62,7 @@ namespace NKikimr { , DeviceType(deviceType) , VDiskSlotId(vdiskSlotId) , Kind(kind) - , StoragePoolName(storagePoolName) + , StoragePoolName(storagePoolName) , DonorMode(donorMode) , DonorDiskIds(std::move(donorDiskIds)) , ScrubCookie(scrubCookie) diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_config_ut.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_config_ut.cpp index faf9d98b38..941df64a10 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_config_ut.cpp +++ b/ydb/core/blobstorage/vdisk/common/vdisk_config_ut.cpp @@ -14,7 +14,7 @@ namespace NKikimr { } TVDiskConfig::TBaseInfo GetDefaultBaseInfo(NKikimrBlobStorage::TVDiskKind::EVDiskKind kind) { - return TVDiskConfig::TBaseInfo(TVDiskID(), TActorId(), 0x1234, 0x5678, + return TVDiskConfig::TBaseInfo(TVDiskID(), TActorId(), 0x1234, 0x5678, TPDiskCategory::DEVICE_TYPE_ROT, 0x01, kind, 1, {}); } diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp index e84d161de2..0635f0ec9e 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp +++ b/ydb/core/blobstorage/vdisk/common/vdisk_context.cpp @@ -24,7 +24,7 @@ namespace NKikimr { std::shared_ptr<TBlobStorageGroupInfo::TTopology> top, const TIntrusivePtr<NMonitoring::TDynamicCounters>& vdiskCounters, const TVDiskID &selfVDisk, - TActorSystem *as, // as can be nullptr for tests + TActorSystem *as, // as can be nullptr for tests TPDiskCategory::EDeviceType type, bool donorMode, TReplQuoter::TPtr replPDiskReadQuoter, @@ -36,7 +36,7 @@ namespace NKikimr { , Top(std::move(top)) , VDiskCounters(vdiskCounters) , VDiskMemCounters(vdiskCounters->GetSubgroup("subsystem", "memhull")) - , Histograms(VDiskCounters, type) + , Histograms(VDiskCounters, type) , IFaceMonGroup(std::make_shared<NMonGroup::TVDiskIFaceGroup>(VDiskCounters, "subsystem", "interface")) , GroupId(selfVDisk.GroupID) , ShortSelfVDisk(selfVDisk) diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_context.h b/ydb/core/blobstorage/vdisk/common/vdisk_context.h index 5c43723ba0..7bb7dfd670 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_context.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_context.h @@ -35,7 +35,7 @@ namespace NKikimr { const std::shared_ptr<TBlobStorageGroupInfo::TTopology> Top; const TIntrusivePtr<NMonitoring::TDynamicCounters> VDiskCounters; const TIntrusivePtr<NMonitoring::TDynamicCounters> VDiskMemCounters; - // latency histograms + // latency histograms NVDiskMon::THistograms Histograms; std::shared_ptr<NMonGroup::TVDiskIFaceGroup> IFaceMonGroup; // Self VDisk related info diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_events.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_events.cpp index 4a04213632..25a0e24320 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_events.cpp +++ b/ydb/core/blobstorage/vdisk/common/vdisk_events.cpp @@ -14,7 +14,7 @@ namespace NKikimr { : TEvVResultBaseWithQoSPB(now, counterPtr, histoPtr, std::move(traceId), TInterconnectChannels::IC_BLOBSTORAGE_SMALL_MSG, recByteSize, record, skeletonFrontIDPtr) { - IncrementSize(bufferSizeBytes); + IncrementSize(bufferSizeBytes); Record.SetStatus(status); LogoBlobIDFromLogoBlobID(logoBlobId, Record.MutableBlobID()); VDiskIDFromVDiskID(vdisk, Record.MutableVDiskID()); @@ -23,9 +23,9 @@ namespace NKikimr { } Record.SetStatusFlags(oosStatus.Flags); Record.SetApproximateFreeSpaceShare(oosStatus.ApproximateFreeSpaceShare); - if (record && record->HasTimestamps()) { - Record.MutableTimestamps()->CopyFrom(record->GetTimestamps()); - } + if (record && record->HasTimestamps()) { + Record.MutableTimestamps()->CopyFrom(record->GetTimestamps()); + } if (status == NKikimrProto::OK) { Record.SetIncarnationGuid(incarnationGuid); } @@ -50,7 +50,7 @@ namespace NKikimr { void TEvBlobStorage::TEvVMultiPut::StorePayload(NKikimrBlobStorage::TVMultiPutItem &item, const TString& buffer) { if (KIKIMR_USE_PROTOBUF_WITH_PAYLOAD) { AddPayload(TRope(buffer)); - Y_VERIFY_DEBUG(Record.ItemsSize() == GetPayloadCount()); + Y_VERIFY_DEBUG(Record.ItemsSize() == GetPayloadCount()); } else { item.SetBuffer(buffer); } diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_events.h b/ydb/core/blobstorage/vdisk/common/vdisk_events.h index b36a68d877..47fbd959c9 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_events.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_events.h @@ -357,7 +357,7 @@ namespace NKikimr { const NVDiskMon::TLtcHistoPtr &histoPtr, NWilson::TTraceId traceId) : TVDiskNonlocalResultBase(channel) , Start(now) - , Size(0) + , Size(0) , CounterPtr(counterPtr) , HistoPtr(histoPtr) , TraceId(std::move(traceId)) @@ -376,20 +376,20 @@ namespace NKikimr { } if (HistoPtr) { - HistoPtr->Collect(TAppData::TimeProvider->Now() - Start, Size); + HistoPtr->Collect(TAppData::TimeProvider->Now() - Start, Size); } TActivationContext::Send(ev.release()); } - protected: - void IncrementSize(ui64 size) { - Size += size; - } - + protected: + void IncrementSize(ui64 size) { + Size += size; + } + private: const TInstant Start; - ui64 Size; + ui64 Size; NMonitoring::TDynamicCounters::TCounterPtr CounterPtr; NVDiskMon::TLtcHistoPtr HistoPtr; bool Finalized = false; @@ -473,7 +473,7 @@ namespace NKikimr { } size_t byteSize = TBase::Record.ByteSize(); - Y_VERIFY(byteSize <= NActors::EventMaxByteSize, + Y_VERIFY(byteSize <= NActors::EventMaxByteSize, "event suspiciously large: %zu\n%s", byteSize, this->ToString().data()); @@ -497,10 +497,10 @@ namespace NKikimr { struct TEvBlobStorage::TEvVPut : public TEventPB<TEvBlobStorage::TEvVPut, NKikimrBlobStorage::TEvVPut, TEvBlobStorage::EvVPut> { - // In current realization it is intentionaly lost on event serialization since - // LWTrace doesn't support distributed shuttels yet - mutable NLWTrace::TOrbit Orbit; - + // In current realization it is intentionaly lost on event serialization since + // LWTrace doesn't support distributed shuttels yet + mutable NLWTrace::TOrbit Orbit; + TEvVPut() {} @@ -559,19 +559,19 @@ namespace NKikimr { void StorePayload(TRope&& buffer); - ui64 GetBufferBytes() const { - if (KIKIMR_USE_PROTOBUF_WITH_PAYLOAD) { - ui64 sizeBytes = 0; - const ui32 size = GetPayloadCount(); - for (ui32 i = 0; i < size; ++i) { - sizeBytes += GetPayload(i).GetSize(); - } - return sizeBytes; - } else { - return Record.GetBuffer().size(); - } - } - + ui64 GetBufferBytes() const { + if (KIKIMR_USE_PROTOBUF_WITH_PAYLOAD) { + ui64 sizeBytes = 0; + const ui32 size = GetPayloadCount(); + for (ui32 i = 0; i < size; ++i) { + sizeBytes += GetPayload(i).GetSize(); + } + return sizeBytes; + } else { + return Record.GetBuffer().size(); + } + } + bool Validate(TString& errorReason) { if (!Record.HasBlobID()) { errorReason = "TEvVPut rejected by VDisk. It has no query"; @@ -591,7 +591,7 @@ namespace NKikimr { return false; } - + TString ToString() const override { return ToString(Record); } @@ -694,11 +694,11 @@ namespace NKikimr { struct TEvBlobStorage::TEvVPutResult : public TEvVResultBaseWithQoSPB<TEvBlobStorage::TEvVPutResult, NKikimrBlobStorage::TEvVPutResult, TEvBlobStorage::EvVPutResult> { - - // In current realization it is intentionaly lost on event serialization since - // LWTrace doesn't support distributed shuttels yet - mutable NLWTrace::TOrbit Orbit; - + + // In current realization it is intentionaly lost on event serialization since + // LWTrace doesn't support distributed shuttels yet + mutable NLWTrace::TOrbit Orbit; + TEvVPutResult(); TEvVPutResult(const NKikimrProto::EReplyStatus status, const TLogoBlobID &logoBlobId, const TVDiskID &vdisk, @@ -752,9 +752,9 @@ namespace NKikimr { if (request.HasCookie()) { Record.SetCookie(request.GetCookie()); } - if (request.HasTimestamps()) { - Record.MutableTimestamps()->CopyFrom(request.GetTimestamps()); - } + if (request.HasTimestamps()) { + Record.MutableTimestamps()->CopyFrom(request.GetTimestamps()); + } } }; @@ -798,7 +798,7 @@ namespace NKikimr { ui64 GetBufferBytes() const { ui64 bytes = 0; if (KIKIMR_USE_PROTOBUF_WITH_PAYLOAD) { - ui32 size = GetPayloadCount(); + ui32 size = GetPayloadCount(); for (ui32 i = 0; i < size; ++i) { bytes += GetPayload(i).GetSize(); } @@ -913,7 +913,7 @@ namespace NKikimr { : TEvVResultBaseWithQoSPB(now, counterPtr, histoPtr, std::move(traceId), TInterconnectChannels::IC_BLOBSTORAGE_SMALL_MSG, recByteSize, record, skeletonFrontIDPtr) { - IncrementSize(bufferSizeBytes); + IncrementSize(bufferSizeBytes); Record.SetStatus(status); VDiskIDFromVDiskID(vdisk, Record.MutableVDiskID()); if (cookie) { @@ -1249,9 +1249,9 @@ namespace NKikimr { { Record.SetStatus(status); VDiskIDFromVDiskID(vdisk, Record.MutableVDiskID()); - if (queryRecord && queryRecord->HasTimestamps()) { - Record.MutableTimestamps()->CopyFrom(queryRecord->GetTimestamps()); - } + if (queryRecord && queryRecord->HasTimestamps()) { + Record.MutableTimestamps()->CopyFrom(queryRecord->GetTimestamps()); + } // copy cookie if it was set in initial query if (cookie) @@ -1268,7 +1268,7 @@ namespace NKikimr { void AddResult(NKikimrProto::EReplyStatus status, const TLogoBlobID &logoBlobId, ui64 sh, const char *data, size_t size, const ui64 *cookie = nullptr, const ui64 *ingress = nullptr) { - IncrementSize(size); + IncrementSize(size); NKikimrBlobStorage::TQueryResult *r = Record.AddResult(); r->SetStatus(status); LogoBlobIDFromLogoBlobID(logoBlobId, r->MutableBlobID()); @@ -1377,9 +1377,9 @@ namespace NKikimr { if (request.HasCookie()) { Record.SetCookie(request.GetCookie()); } - if (request.HasTimestamps()) { - Record.MutableTimestamps()->CopyFrom(request.GetTimestamps()); - } + if (request.HasTimestamps()) { + Record.MutableTimestamps()->CopyFrom(request.GetTimestamps()); + } } }; diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_histogram_latency.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_histogram_latency.cpp index 9f7d0ddc01..427a620367 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_histogram_latency.cpp +++ b/ydb/core/blobstorage/vdisk/common/vdisk_histogram_latency.cpp @@ -1,34 +1,34 @@ #include "vdisk_histogram_latency.h" #include <ydb/core/blobstorage/base/common_latency_hist_bounds.h> - + namespace NKikimr { namespace NVDiskMon { TLtcHisto::TLtcHisto( const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, const TString &name, - const TString &value, - TPDiskCategory::EDeviceType type) + const TString &value, + TPDiskCategory::EDeviceType type) { - auto throughput = counters->GetSubgroup(name, value); - ThroughputBytes = throughput->GetCounter("requestBytes", true); + auto throughput = counters->GetSubgroup(name, value); + ThroughputBytes = throughput->GetCounter("requestBytes", true); // Set up Histo TIntrusivePtr<NMonitoring::TDynamicCounters> histoGroup; histoGroup = counters->GetSubgroup("subsystem", "latency_histo"); - auto h = NMonitoring::ExplicitHistogram(GetCommonLatencyHistBounds(type)); + auto h = NMonitoring::ExplicitHistogram(GetCommonLatencyHistBounds(type)); Histo = histoGroup->GetNamedHistogram(name, value, std::move(h)); } - void TLtcHisto::Collect(TDuration d, ui64 size) { + void TLtcHisto::Collect(TDuration d, ui64 size) { if (Histo) { - Histo->Collect(d.MilliSeconds()); + Histo->Collect(d.MilliSeconds()); + } + if (size) { + ThroughputBytes->Add(size); } - if (size) { - ThroughputBytes->Add(size); - } } } // NKikimr diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_histogram_latency.h b/ydb/core/blobstorage/vdisk/common/vdisk_histogram_latency.h index 3714845aa8..d0388fac73 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_histogram_latency.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_histogram_latency.h @@ -3,7 +3,7 @@ #include "defs.h" #include <ydb/core/base/blobstorage.h> - + #include <library/cpp/monlib/dynamic_counters/percentile/percentile.h> #include <library/cpp/monlib/metrics/histogram_collector.h> @@ -13,7 +13,7 @@ namespace NKikimr { // TLtcHisto - Latency Histogram // This class abstracts interface for Latency Histograms from its implementation. // NOTE: - // 1. REMOVED. Historically we have TPercentileHistogram for calculation percentiles locally, + // 1. REMOVED. Historically we have TPercentileHistogram for calculation percentiles locally, // unfortunately percentiles are not additive and we can't build aggregates // for the whole cluster using Solomon. // 2. So IHistogramCollectorPtr is added to have additive histograms (i.e. buckets based) @@ -22,15 +22,15 @@ namespace NKikimr { public: TLtcHisto(const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, const TString &name, - const TString &value, - TPDiskCategory::EDeviceType type); + const TString &value, + TPDiskCategory::EDeviceType type); // update histogram with with an operation with duration 'd' - void Collect(TDuration d, ui64 size = 0); + void Collect(TDuration d, ui64 size = 0); private: NMonitoring::THistogramPtr Histo; - NMonitoring::TDynamicCounters::TCounterPtr ThroughputBytes; + NMonitoring::TDynamicCounters::TCounterPtr ThroughputBytes; }; using TLtcHistoPtr = std::shared_ptr<TLtcHisto>; diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_histograms.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_histograms.cpp index a979f84068..d9cce39e9e 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_histograms.cpp +++ b/ydb/core/blobstorage/vdisk/common/vdisk_histograms.cpp @@ -5,7 +5,7 @@ namespace NKikimr { THistograms::THistograms( const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, - TPDiskCategory::EDeviceType type) + TPDiskCategory::EDeviceType type) { for (const auto& item : { std::make_pair(&VGetAsyncLatencyHistogram, "GetAsync" ), diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_histograms.h b/ydb/core/blobstorage/vdisk/common/vdisk_histograms.h index 57fabec221..9b83a65546 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_histograms.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_histograms.h @@ -14,7 +14,7 @@ namespace NKikimr { public: THistograms( const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters, - TPDiskCategory::EDeviceType type); + TPDiskCategory::EDeviceType type); const NVDiskMon::TLtcHistoPtr &GetHistogram(NKikimrBlobStorage::EGetHandleClass handleClass) const; const NVDiskMon::TLtcHistoPtr &GetHistogram(NKikimrBlobStorage::EPutHandleClass handleClass) const; diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_log.h b/ydb/core/blobstorage/vdisk/common/vdisk_log.h index 2088aa2994..c308e9a7a0 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_log.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_log.h @@ -14,9 +14,9 @@ namespace NKikimr { // Log Prefix //////////////////////////////////////////////////////////////////////////// class TVDiskContext; - Y_PRINTF_FORMAT(2, 3) + Y_PRINTF_FORMAT(2, 3) TString AppendVDiskLogPrefix(const TIntrusivePtr<TVDiskContext> &vctx, const char *c, ...); - Y_PRINTF_FORMAT(2, 3) + Y_PRINTF_FORMAT(2, 3) TString AppendVDiskLogPrefix(const TString &prefix, const char *c, ...); struct TVDiskID; diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_mongroups.h b/ydb/core/blobstorage/vdisk/common/vdisk_mongroups.h index 2164bc04e6..aa1bff8f84 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_mongroups.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_mongroups.h @@ -3,7 +3,7 @@ #include "defs.h" #include <ydb/core/protos/node_whiteboard.pb.h> - + namespace NKikimr { namespace NMonGroup { @@ -145,7 +145,7 @@ public: COUNTER_INIT(DskUsedBytes, false); COUNTER_INIT(HugeUsedChunks, false); COUNTER_INIT(HugeCanBeFreedChunks, false); - COUNTER_INIT(HugeLockedChunks, false); + COUNTER_INIT(HugeLockedChunks, false); } COUNTER_DEF(DskOutOfSpace); @@ -155,7 +155,7 @@ public: // huge heap chunks COUNTER_DEF(HugeUsedChunks); // chunks used by huge heap COUNTER_DEF(HugeCanBeFreedChunks); // number of chunks that can be freed after defragmentation - COUNTER_DEF(HugeLockedChunks); + COUNTER_DEF(HugeLockedChunks); }; /////////////////////////////////////////////////////////////////////////////////// @@ -339,33 +339,33 @@ public: // TVDiskStateGroup /////////////////////////////////////////////////////////////////////////////////// class TVDiskStateGroup: public TBase { - std::array<NMonitoring::TDynamicCounters::TCounterPtr, NKikimrWhiteboard::EVDiskState_MAX + 1> VDiskStates; - NMonitoring::TDynamicCounters::TCounterPtr CurrentState; - + std::array<NMonitoring::TDynamicCounters::TCounterPtr, NKikimrWhiteboard::EVDiskState_MAX + 1> VDiskStates; + NMonitoring::TDynamicCounters::TCounterPtr CurrentState; + public: GROUP_CONSTRUCTOR(TVDiskStateGroup) { - // depracated, only for compatibility - TString name = "VDiskState"; - CurrentState = GroupCounters->GetCounter(name, false); - *CurrentState = NKikimrWhiteboard::Initial; - - for (size_t i = NKikimrWhiteboard::EVDiskState_MIN; i <= NKikimrWhiteboard::EVDiskState_MAX; ++i) { - VDiskStates[i] = GroupCounters->GetCounter(name + "_" + NKikimrWhiteboard::EVDiskState_Name(i), false); - } + // depracated, only for compatibility + TString name = "VDiskState"; + CurrentState = GroupCounters->GetCounter(name, false); + *CurrentState = NKikimrWhiteboard::Initial; + + for (size_t i = NKikimrWhiteboard::EVDiskState_MIN; i <= NKikimrWhiteboard::EVDiskState_MAX; ++i) { + VDiskStates[i] = GroupCounters->GetCounter(name + "_" + NKikimrWhiteboard::EVDiskState_Name(i), false); + } COUNTER_INIT(VDiskLocalRecoveryState, false); } - void VDiskState(NKikimrWhiteboard::EVDiskState s) { - *VDiskStates[*CurrentState] = 0; - *CurrentState = s; - *VDiskStates[s] = 1; - } - - NKikimrWhiteboard::EVDiskState VDiskState() const { - return static_cast<NKikimrWhiteboard::EVDiskState>(CurrentState->Val()); - } - + void VDiskState(NKikimrWhiteboard::EVDiskState s) { + *VDiskStates[*CurrentState] = 0; + *CurrentState = s; + *VDiskStates[s] = 1; + } + + NKikimrWhiteboard::EVDiskState VDiskState() const { + return static_cast<NKikimrWhiteboard::EVDiskState>(CurrentState->Val()); + } + COUNTER_DEF(VDiskLocalRecoveryState); }; diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_outofspace.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_outofspace.cpp index cc3f802a57..cba8aef896 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_outofspace.cpp +++ b/ydb/core/blobstorage/vdisk/common/vdisk_outofspace.cpp @@ -18,22 +18,22 @@ namespace NKikimr { } } - NKikimrWhiteboard::EFlag TOutOfSpaceState::ToWhiteboardFlag(const ESpaceColor color) { - switch (color) { - case TSpaceColor::GREEN: - case TSpaceColor::CYAN: - return NKikimrWhiteboard::EFlag::Green; + NKikimrWhiteboard::EFlag TOutOfSpaceState::ToWhiteboardFlag(const ESpaceColor color) { + switch (color) { + case TSpaceColor::GREEN: + case TSpaceColor::CYAN: + return NKikimrWhiteboard::EFlag::Green; case TSpaceColor::LIGHT_YELLOW: - case TSpaceColor::YELLOW: - case TSpaceColor::LIGHT_ORANGE: - return NKikimrWhiteboard::EFlag::Yellow; - case TSpaceColor::ORANGE: - return NKikimrWhiteboard::EFlag::Orange; - case TSpaceColor::RED: - case TSpaceColor::BLACK: - case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MIN_SENTINEL_DO_NOT_USE_: - case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MAX_SENTINEL_DO_NOT_USE_: - return NKikimrWhiteboard::EFlag::Red; + case TSpaceColor::YELLOW: + case TSpaceColor::LIGHT_ORANGE: + return NKikimrWhiteboard::EFlag::Yellow; + case TSpaceColor::ORANGE: + return NKikimrWhiteboard::EFlag::Orange; + case TSpaceColor::RED: + case TSpaceColor::BLACK: + case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MIN_SENTINEL_DO_NOT_USE_: + case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MAX_SENTINEL_DO_NOT_USE_: + return NKikimrWhiteboard::EFlag::Red; } } diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_outofspace.h b/ydb/core/blobstorage/vdisk/common/vdisk_outofspace.h index 2c14c9e9b8..28c351b5c7 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_outofspace.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_outofspace.h @@ -9,9 +9,9 @@ namespace NKikimr { - using TSpaceColor = NKikimrBlobStorage::TPDiskSpaceColor; - using ESpaceColor = TSpaceColor::E; - + using TSpaceColor = NKikimrBlobStorage::TPDiskSpaceColor; + using ESpaceColor = TSpaceColor::E; + //////////////////////////////////////////////////////////////////////////// // TOutOfSpaceState -- global state for disk space availability //////////////////////////////////////////////////////////////////////////// @@ -19,26 +19,26 @@ namespace NKikimr { public: TOutOfSpaceState(ui32 totalVDisks, ui32 selfOrderNum); - static NKikimrWhiteboard::EFlag ToWhiteboardFlag(const ESpaceColor color); + static NKikimrWhiteboard::EFlag ToWhiteboardFlag(const ESpaceColor color); // update flags for vdisk with vdiskOrderNum void Update(ui32 vdiskOrderNum, NPDisk::TStatusFlags flags); - NKikimrWhiteboard::EFlag GlobalWhiteboardFlag() const { - return ToWhiteboardFlag(GetGlobalColor()); - } - - NKikimrWhiteboard::EFlag LocalWhiteboardFlag() const { - return ToWhiteboardFlag(GetLocalColor()); - } - - ESpaceColor GetGlobalColor() const { - return StatusFlagToSpaceColor(static_cast<NPDisk::TStatusFlags>(AtomicGet(GlobalFlags))); + NKikimrWhiteboard::EFlag GlobalWhiteboardFlag() const { + return ToWhiteboardFlag(GetGlobalColor()); + } + + NKikimrWhiteboard::EFlag LocalWhiteboardFlag() const { + return ToWhiteboardFlag(GetLocalColor()); + } + + ESpaceColor GetGlobalColor() const { + return StatusFlagToSpaceColor(static_cast<NPDisk::TStatusFlags>(AtomicGet(GlobalFlags))); + } + + ESpaceColor GetLocalColor() const { + return StatusFlagToSpaceColor(GetLocalStatusFlags()); } - ESpaceColor GetLocalColor() const { - return StatusFlagToSpaceColor(GetLocalStatusFlags()); - } - // update state with flags received from local PDisk void UpdateLocal(NPDisk::TStatusFlags flags) { Update(SelfOrderNum, flags); diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_outofspace_ut.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_outofspace_ut.cpp index 46588a1ca8..e59a7c607a 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_outofspace_ut.cpp +++ b/ydb/core/blobstorage/vdisk/common/vdisk_outofspace_ut.cpp @@ -15,28 +15,28 @@ namespace NKikimr { Y_UNIT_TEST(TestLocal) { TOutOfSpaceState state(8, 0); - UNIT_ASSERT_EQUAL(state.GetGlobalColor(), TSpaceColor::GREEN); + UNIT_ASSERT_EQUAL(state.GetGlobalColor(), TSpaceColor::GREEN); NPDisk::TStatusFlags flags = NKikimrBlobStorage::StatusIsValid; state.UpdateLocal(flags); - UNIT_ASSERT_EQUAL(state.GetGlobalColor(), TSpaceColor::GREEN); + UNIT_ASSERT_EQUAL(state.GetGlobalColor(), TSpaceColor::GREEN); UNIT_ASSERT_EQUAL(state.GetLocalStatusFlags(), flags); UNIT_ASSERT_EQUAL(state.GetGlobalStatusFlags().Flags, flags); } Y_UNIT_TEST(TestGlobal) { TOutOfSpaceState state(8, 3); - UNIT_ASSERT_EQUAL(state.GetGlobalColor(), TSpaceColor::GREEN); + UNIT_ASSERT_EQUAL(state.GetGlobalColor(), TSpaceColor::GREEN); NPDisk::TStatusFlags flags = NKikimrBlobStorage::StatusIsValid; for (int i = 0; i < 8; ++i) { state.Update(0, flags); } state.Update(5, flags | NKikimrBlobStorage::StatusDiskSpaceRed); - UNIT_ASSERT_EQUAL(state.GetGlobalColor(), TSpaceColor::RED); + UNIT_ASSERT_EQUAL(state.GetGlobalColor(), TSpaceColor::RED); state.Update(4, flags | NKikimrBlobStorage::StatusDiskSpaceOrange); state.Update(5, flags | NKikimrBlobStorage::StatusDiskSpaceLightYellowMove); - UNIT_ASSERT_EQUAL(state.GetGlobalColor(), TSpaceColor::ORANGE); + UNIT_ASSERT_EQUAL(state.GetGlobalColor(), TSpaceColor::ORANGE); } } diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_recoverylogwriter.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_recoverylogwriter.cpp index ee69b8f865..f0ec9c15e4 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_recoverylogwriter.cpp +++ b/ydb/core/blobstorage/vdisk/common/vdisk_recoverylogwriter.cpp @@ -5,8 +5,8 @@ namespace NKikimr { -LWTRACE_USING(BLOBSTORAGE_PROVIDER); - +LWTRACE_USING(BLOBSTORAGE_PROVIDER); + //////////////////////////////////////////////////////////////////////////////// // TRecoveryLogWriter -- it makes all records to recovery log go sequentially // according to lsns @@ -26,7 +26,7 @@ LWTRACE_USING(BLOBSTORAGE_PROVIDER); , Bytes() {} - TItem(const TString &prefix, const TString& name, TIntrusivePtr<NMonitoring::TDynamicCounters> mon) { + TItem(const TString &prefix, const TString& name, TIntrusivePtr<NMonitoring::TDynamicCounters> mon) { Msgs = mon->GetCounter(prefix + name + "Msgs", true); Bytes = mon->GetCounter(prefix + name + "Bytes", true); } @@ -41,21 +41,21 @@ LWTRACE_USING(BLOBSTORAGE_PROVIDER); TCounters(TIntrusivePtr<NMonitoring::TDynamicCounters> mon) { auto group = mon->GetSubgroup("subsystem", "logrecs"); - Counters.reserve(static_cast<size_t>(TLogSignature::Max)); + Counters.reserve(static_cast<size_t>(TLogSignature::Max)); TString prefix("Log"); - for (int i = static_cast<int>(TLogSignature::First); - i < static_cast<int>(TLogSignature::Max); i++) { - TLogSignature s(i); - if (s == TLogSignature::First) { + for (int i = static_cast<int>(TLogSignature::First); + i < static_cast<int>(TLogSignature::Max); i++) { + TLogSignature s(i); + if (s == TLogSignature::First) { Counters.emplace_back(TItem()); } else { - Counters.emplace_back(TItem(prefix, s.ToString(), group)); + Counters.emplace_back(TItem(prefix, s.ToString(), group)); } } } - void Update(TLogSignature signature, i64 size) { + void Update(TLogSignature signature, i64 size) { Counters[static_cast<int>(signature)].Update(size); } }; @@ -128,19 +128,19 @@ LWTRACE_USING(BLOBSTORAGE_PROVIDER); void Handle(NPDisk::TEvLog::TPtr &ev, const TActorContext &ctx) { ui64 lsnSegmentStart = ev->Get()->LsnSegmentStart; ui64 lsn = ev->Get()->Lsn; - LWTRACK(VDiskRecoveryLogWriterVPutIsRecieved, ev->Get()->Orbit, Owner, lsn); - TLogSignature signature = ev->Get()->Signature.GetUnmasked(); - Y_VERIFY(TLogSignature::First < signature && signature < TLogSignature::Max); + LWTRACK(VDiskRecoveryLogWriterVPutIsRecieved, ev->Get()->Orbit, Owner, lsn); + TLogSignature signature = ev->Get()->Signature.GetUnmasked(); + Y_VERIFY(TLogSignature::First < signature && signature < TLogSignature::Max); i64 msgSize = ev->Get()->ApproximateSize(); // count written bytes *LsmLogBytesWritten += msgSize; // update generic counters - Counters.Update(signature, msgSize); + Counters.Update(signature, msgSize); std::unique_ptr<IEventHandle> converted(ev->Forward(YardID).Release()); if (lsnSegmentStart == CurSentLsn + 1) { // rewrite and send message; - LWTRACK(VDiskRecoveryLogWriterVPutIsSent, converted->Get<NPDisk::TEvLog>()->Orbit, Owner, lsn); + LWTRACK(VDiskRecoveryLogWriterVPutIsSent, converted->Get<NPDisk::TEvLog>()->Orbit, Owner, lsn); ctx.ExecutorThread.Send(converted.release()); CurSentLsn = lsn; // proceed with elements waiting in the queue @@ -161,13 +161,13 @@ LWTRACE_USING(BLOBSTORAGE_PROVIDER); << "Logs.back().Lsn# " << logs->Logs.back()->Lsn); for (auto &log : logs->Logs) { LWTRACK(VDiskRecoveryLogWriterVPutIsRecieved, log->Orbit, Owner, log->Lsn); - TLogSignature signature = log->Signature.GetUnmasked(); - Y_VERIFY(TLogSignature::First < signature && signature < TLogSignature::Max); + TLogSignature signature = log->Signature.GetUnmasked(); + Y_VERIFY(TLogSignature::First < signature && signature < TLogSignature::Max); i64 msgSize = log->ApproximateSize(); // count written bytes *LsmLogBytesWritten += msgSize; // update generic counters - Counters.Update(signature, msgSize); + Counters.Update(signature, msgSize); LWTRACK(VDiskRecoveryLogWriterVPutIsSent, log->Orbit, Owner, lsn); } std::unique_ptr<IEventHandle> converted(ev->Forward(YardID).Release()); diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_response.cpp b/ydb/core/blobstorage/vdisk/common/vdisk_response.cpp index e8af1dd09b..384931d305 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_response.cpp +++ b/ydb/core/blobstorage/vdisk/common/vdisk_response.cpp @@ -2,7 +2,7 @@ #include "vdisk_events.h" #include <ydb/core/blobstorage/base/wilson_events.h> #include <ydb/core/base/interconnect_channels.h> -#include <util/system/datetime.h> +#include <util/system/datetime.h> namespace NKikimr { diff --git a/ydb/core/blobstorage/vdisk/defrag/defrag_actor.cpp b/ydb/core/blobstorage/vdisk/defrag/defrag_actor.cpp index 7e43614654..4916a05941 100644 --- a/ydb/core/blobstorage/vdisk/defrag/defrag_actor.cpp +++ b/ydb/core/blobstorage/vdisk/defrag/defrag_actor.cpp @@ -45,22 +45,22 @@ namespace NKikimr { // that can be freed to number of chunks used by VDisk //////////////////////////////////////////////////////////////////////////// bool HugeHeapDefragmentationRequired( - const TOutOfSpaceState& oos, - ui32 hugeCanBeFreedChunks, - ui32 hugeTotalChunks) { + const TOutOfSpaceState& oos, + ui32 hugeCanBeFreedChunks, + ui32 hugeTotalChunks) { if (hugeCanBeFreedChunks < 10) return false; - double percentOfGarbage = static_cast<double>(hugeCanBeFreedChunks) / hugeTotalChunks; + double percentOfGarbage = static_cast<double>(hugeCanBeFreedChunks) / hugeTotalChunks; - if (oos.GetLocalColor() > TSpaceColor::CYAN) { - return percentOfGarbage >= 0.02; - } else if (oos.GetLocalColor() > TSpaceColor::GREEN) { - return percentOfGarbage >= 0.15; - } else { - return percentOfGarbage >= 0.30; - } + if (oos.GetLocalColor() > TSpaceColor::CYAN) { + return percentOfGarbage >= 0.02; + } else if (oos.GetLocalColor() > TSpaceColor::GREEN) { + return percentOfGarbage >= 0.15; + } else { + return percentOfGarbage >= 0.30; + } } //////////////////////////////////////////////////////////////////////////// @@ -208,7 +208,7 @@ namespace NKikimr { std::shared_ptr<TDefragCtx> DCtx; TIntrusivePtr<TBlobStorageGroupInfo> GInfo; - ui64 TotalDefragRuns = 0; + ui64 TotalDefragRuns = 0; bool InProgress = false; std::deque<TTask> WaitQueue; TActiveActors ActiveActors; @@ -232,7 +232,7 @@ namespace NKikimr { } Sublog.Log() << "Defrag quantum started\n"; - ++TotalDefragRuns; + ++TotalDefragRuns; InProgress = true; ActiveActors.Insert(RunInBatchPool(ctx, CreateDefragQuantumActor(DCtx, GInfo->GetVDiskId(DCtx->VCtx->ShortSelfVDisk), @@ -350,16 +350,16 @@ namespace NKikimr { TABLED() {str << WaitQueue.size(); } } TABLER() { - TABLED() {str << "DCtx->RunDefragBySchedule";} - TABLED() {str << DCtx->RunDefragBySchedule;} - } - TABLER() { - TABLED() {str << "TotalDefragRuns";} - TABLED() {str << TotalDefragRuns;} - } - TABLER() { + TABLED() {str << "DCtx->RunDefragBySchedule";} + TABLED() {str << DCtx->RunDefragBySchedule;} + } + TABLER() { + TABLED() {str << "TotalDefragRuns";} + TABLED() {str << TotalDefragRuns;} + } + TABLER() { TABLED() {str << "FreeSpaceShare/Threshold";} - TABLED() {str << DCtx->VCtx->GetOutOfSpaceState().GetFreeSpaceShare();} + TABLED() {str << DCtx->VCtx->GetOutOfSpaceState().GetFreeSpaceShare();} } TABLER() { TABLED() {str << "CanBeFreed/Used Huge Heap Chunks";} @@ -371,7 +371,7 @@ namespace NKikimr { TABLER() { TABLED() {str << "VDisk Used Chunks";} TABLED() { - str << DCtx->VCtx->GetOutOfSpaceState().GetLocalUsedChunks(); + str << DCtx->VCtx->GetOutOfSpaceState().GetLocalUsedChunks(); } } } diff --git a/ydb/core/blobstorage/vdisk/defrag/defrag_actor.h b/ydb/core/blobstorage/vdisk/defrag/defrag_actor.h index edb8644d3e..f59ecee374 100644 --- a/ydb/core/blobstorage/vdisk/defrag/defrag_actor.h +++ b/ydb/core/blobstorage/vdisk/defrag/defrag_actor.h @@ -43,9 +43,9 @@ namespace NKikimr { // Making decision to start compaction //////////////////////////////////////////////////////////////////////////// bool HugeHeapDefragmentationRequired( - const TOutOfSpaceState& oos, - ui32 hugeCanBeFreedChunks, - ui32 hugeTotalChunks); + const TOutOfSpaceState& oos, + ui32 hugeCanBeFreedChunks, + ui32 hugeTotalChunks); //////////////////////////////////////////////////////////////////////////// // VDISK DEFRAG ACTOR CREATOR diff --git a/ydb/core/blobstorage/vdisk/defrag/defrag_actor_ut.cpp b/ydb/core/blobstorage/vdisk/defrag/defrag_actor_ut.cpp index eb41632391..ebcfec5619 100644 --- a/ydb/core/blobstorage/vdisk/defrag/defrag_actor_ut.cpp +++ b/ydb/core/blobstorage/vdisk/defrag/defrag_actor_ut.cpp @@ -12,24 +12,24 @@ namespace NKikimr { Y_UNIT_TEST_SUITE(TVDiskDefrag) { Y_UNIT_TEST(HugeHeapDefragmentationRequired) { { - TOutOfSpaceState oos(1, 0); - ui32 hugeCanBeFreedChunks = 9; - ui32 hugeUsedChunks = 20; - bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks); - UNIT_ASSERT(!defrag); + TOutOfSpaceState oos(1, 0); + ui32 hugeCanBeFreedChunks = 9; + ui32 hugeUsedChunks = 20; + bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks); + UNIT_ASSERT(!defrag); } { - TOutOfSpaceState oos(1, 0); + TOutOfSpaceState oos(1, 0); ui32 hugeCanBeFreedChunks = 200; - ui32 hugeUsedChunks = 1000; - bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks); + ui32 hugeUsedChunks = 1000; + bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks); UNIT_ASSERT(!defrag); } { - TOutOfSpaceState oos(1, 0); - ui32 hugeCanBeFreedChunks = 301; - ui32 hugeUsedChunks = 1000; - bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks); + TOutOfSpaceState oos(1, 0); + ui32 hugeCanBeFreedChunks = 301; + ui32 hugeUsedChunks = 1000; + bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks); UNIT_ASSERT(defrag); } } diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.cpp b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.cpp index 5992b20d79..30d2aeece4 100644 --- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.cpp +++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.cpp @@ -289,7 +289,7 @@ namespace NKikimr { "COMMIT: type# HugeChunkAllocator msg# %s", commitRecord.ToString().data())); ctx.Send(HugeKeeperCtx->LoggerId, new NPDisk::TEvLog(HugeKeeperCtx->PDiskCtx->Dsk->Owner, - HugeKeeperCtx->PDiskCtx->Dsk->OwnerRound, TLogSignature::SignatureHugeBlobAllocChunk, + HugeKeeperCtx->PDiskCtx->Dsk->OwnerRound, TLogSignature::SignatureHugeBlobAllocChunk, commitRecord, data, TLsnSeg(Lsn, Lsn), nullptr)); // commit changes to the persistent state at once @@ -367,7 +367,7 @@ namespace NKikimr { // send log message ctx.Send(HugeKeeperCtx->LoggerId, new NPDisk::TEvLog(HugeKeeperCtx->PDiskCtx->Dsk->Owner, - HugeKeeperCtx->PDiskCtx->Dsk->OwnerRound, TLogSignature::SignatureHugeBlobFreeChunk, + HugeKeeperCtx->PDiskCtx->Dsk->OwnerRound, TLogSignature::SignatureHugeBlobFreeChunk, commitRecord, data, TLsnSeg(Lsn, Lsn), nullptr)); TThis::Become(&TThis::StateFunc); } @@ -430,7 +430,7 @@ namespace NKikimr { TLsnSeg seg(EntryPointLsn, EntryPointLsn); ctx.Send(HugeKeeperCtx->LoggerId, new NPDisk::TEvLog(HugeKeeperCtx->PDiskCtx->Dsk->Owner, HugeKeeperCtx->PDiskCtx->Dsk->OwnerRound, - TLogSignature::SignatureHugeBlobEntryPoint, commitRecord, Serialized, seg, nullptr)); + TLogSignature::SignatureHugeBlobEntryPoint, commitRecord, Serialized, seg, nullptr)); TThis::Become(&TThis::StateFunc); } @@ -785,13 +785,13 @@ namespace NKikimr { }; switch (msg->Signature) { - case TLogSignature::SignatureHullLogoBlobsDB: + case TLogSignature::SignatureHullLogoBlobsDB: checkAndSet(State.Pers->LogPos.LogoBlobsDbSlotDelLsn); break; - case TLogSignature::SignatureHullBlocksDB: + case TLogSignature::SignatureHullBlocksDB: checkAndSet(State.Pers->LogPos.BlocksDbSlotDelLsn); break; - case TLogSignature::SignatureHullBarriersDB: + case TLogSignature::SignatureHullBarriersDB: checkAndSet(State.Pers->LogPos.BarriersDbSlotDelLsn); break; default: @@ -913,7 +913,7 @@ namespace NKikimr { // update mon counters HugeKeeperCtx->DskOutOfSpaceGroup.HugeUsedChunks() = stat.CurrentlyUsedChunks; HugeKeeperCtx->DskOutOfSpaceGroup.HugeCanBeFreedChunks() = stat.CanBeFreedChunks; - HugeKeeperCtx->DskOutOfSpaceGroup.HugeLockedChunks() = stat.LockedChunks.size(); + HugeKeeperCtx->DskOutOfSpaceGroup.HugeLockedChunks() = stat.LockedChunks.size(); // update global stat HugeKeeperCtx->VCtx->GetHugeHeapFragmentation().Set(stat.CurrentlyUsedChunks, stat.CanBeFreedChunks); } diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.h b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.h index 18c5af9eec..66f3c04959 100644 --- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.h +++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.h @@ -118,10 +118,10 @@ namespace NKikimr { public: const TDiskPartVec HugeBlobs; const ui64 DeletionLsn; - const TLogSignature Signature; // identifies database we send update for + const TLogSignature Signature; // identifies database we send update for TEvHullFreeHugeSlots(TDiskPartVec &&hugeBlobs, ui64 deletionLsn, - TLogSignature signature) + TLogSignature signature) : HugeBlobs(std::move(hugeBlobs)) , DeletionLsn(deletionLsn) , Signature(signature) @@ -129,7 +129,7 @@ namespace NKikimr { TString ToString() const { TStringStream str; - str << "{" << Signature.ToString() + str << "{" << Signature.ToString() << " DelLsn# " << DeletionLsn << " Slots# " << HugeBlobs.ToString() << "}"; return str.Str(); diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge_ut.cpp b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge_ut.cpp index 0efb94623b..ed64a07678 100644 --- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge_ut.cpp +++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge_ut.cpp @@ -28,7 +28,7 @@ namespace NKikimr { auto counters = MakeIntrusive<NMonitoring::TDynamicCounters>(); auto info = MakeIntrusive<TBlobStorageGroupInfo>(TBlobStorageGroupType::Erasure4Plus2Block); auto vctx = MakeIntrusive<TVDiskContext>(TActorId(), info->PickTopology(), counters, TVDiskID(0, 1, 0, 0, 0), - nullptr, TPDiskCategory::DEVICE_TYPE_UNKNOWN); + nullptr, TPDiskCategory::DEVICE_TYPE_UNKNOWN); std::unique_ptr<THullHugeKeeperPersState> state( new THullHugeKeeperPersState(vctx, chunkSize, appendBlockSize, minHugeBlobInBytes, milestoneHugeBlobInBytes, maxBlobInBytes, diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugedefs.h b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugedefs.h index dd93d22d29..9bb8e64c08 100644 --- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugedefs.h +++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugedefs.h @@ -45,19 +45,19 @@ namespace NKikimr { struct THeapStat { ui32 CurrentlyUsedChunks = 0; ui32 CanBeFreedChunks = 0; - std::vector<ui32> LockedChunks; + std::vector<ui32> LockedChunks; THeapStat() = default; - THeapStat(ui32 currentlyUsedChunks, ui32 canBeFreedChunks, std::vector<ui32> lockedChunks) + THeapStat(ui32 currentlyUsedChunks, ui32 canBeFreedChunks, std::vector<ui32> lockedChunks) : CurrentlyUsedChunks(currentlyUsedChunks) , CanBeFreedChunks(canBeFreedChunks) - , LockedChunks(std::move(lockedChunks)) + , LockedChunks(std::move(lockedChunks)) {} THeapStat &operator+=(const THeapStat &s) { CurrentlyUsedChunks += s.CurrentlyUsedChunks; CanBeFreedChunks += s.CanBeFreedChunks; - LockedChunks.insert(LockedChunks.end(), s.LockedChunks.begin(), s.LockedChunks.end()); + LockedChunks.insert(LockedChunks.end(), s.LockedChunks.begin(), s.LockedChunks.end()); return *this; } diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugedelete.h b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugedelete.h index 8faee1e6b0..f0f6ebc326 100644 --- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugedelete.h +++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugedelete.h @@ -56,9 +56,9 @@ namespace NKikimr { struct TRemovedHugeBlobsQueueItem { ui64 RecordLsn; TDiskPartVec RemovedHugeBlobs; - TLogSignature Signature; + TLogSignature Signature; - TRemovedHugeBlobsQueueItem(ui64 recordLsn, TDiskPartVec&& removedHugeBlobs, TLogSignature signature) + TRemovedHugeBlobsQueueItem(ui64 recordLsn, TDiskPartVec&& removedHugeBlobs, TLogSignature signature) : RecordLsn(recordLsn) , RemovedHugeBlobs(std::move(removedHugeBlobs)) , Signature(signature) @@ -86,7 +86,7 @@ namespace NKikimr { // this function is called every time when compaction is about to commit new entrypoint containing at least // one removed huge blob; recordLsn is allocated LSN of this entrypoint void Update(ui64 recordLsn, TDiskPartVec&& removedHugeBlobs, const TActorContext& ctx, - const TActorId& hugeKeeperId, TLogSignature signature) { + const TActorId& hugeKeeperId, TLogSignature signature) { Y_VERIFY(recordLsn > LastDeletionLsn); LastDeletionLsn = recordLsn; RemovedHugeBlobsQueue.emplace_back(recordLsn, std::move(removedHugeBlobs), signature); diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.cpp b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.cpp index 6ee5f1548b..e60200de98 100644 --- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.cpp +++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.cpp @@ -158,7 +158,7 @@ namespace NKikimr { } else if ((it = LockedChunks.find(chunkId)) != LockedChunks.end()) { return freeFoundSlot(LockedChunks, "LockedChunks"); } else { - // chunk is neither in FreeSpace nor in LockedChunks + // chunk is neither in FreeSpace nor in LockedChunks TDynBitMap mask; mask.Reserve(SlotsInChunk); mask.Reset(0, SlotsInChunk); @@ -194,11 +194,11 @@ namespace NKikimr { ui32 fullyFilledChunks = slotsToChunks(AllocatedSlots - usedSlotsInFreeSpace, SlotsInChunk); ui32 currentlyUsedChunks = usedChunksInFreeSpace + fullyFilledChunks; - std::vector<ui32> lockedChunks; - for (auto& x : LockedChunks) { - lockedChunks.push_back(x.first); - } - return THeapStat(currentlyUsedChunks, canBeFreedChunks, std::move(lockedChunks)); + std::vector<ui32> lockedChunks; + for (auto& x : LockedChunks) { + lockedChunks.push_back(x.first); + } + return THeapStat(currentlyUsedChunks, canBeFreedChunks, std::move(lockedChunks)); } bool TChain::RecoveryModeAllocate(const NPrivate::TChunkSlot &id) { @@ -258,9 +258,9 @@ namespace NKikimr { ::Load(s, FreeSpace); FreeSlotsInFreeSpace = 0; for (const auto &[chunkId, mask] : FreeSpace) { - // all 1 in mask -- free slots - // 0 - slot is in use - FreeSlotsInFreeSpace += mask.Count(); + // all 1 in mask -- free slots + // 0 - slot is in use + FreeSlotsInFreeSpace += mask.Count(); } } @@ -289,14 +289,14 @@ namespace NKikimr { void TChain::RenderHtml(IOutputStream &str) const { auto output = [&str] (const TFreeSpace &c) { for (const auto &x : c) { - size_t freeSlots = 0; + size_t freeSlots = 0; for (size_t i = 0; i < x.second.Size(); i++) { if (x.second.Test(i)) - ++freeSlots; + ++freeSlots; + } + if (freeSlots) { + str << " [" << x.first << " " << freeSlots << "]"; } - if (freeSlots) { - str << " [" << x.first << " " << freeSlots << "]"; - } } }; @@ -565,7 +565,7 @@ namespace NKikimr { TABLEHEAD() { TABLER() { TABLEH() {str << "Chain";} - TABLEH() {str << "Reserved: [ChunkIdx, FreeSlotsInChunk]";} + TABLEH() {str << "Reserved: [ChunkIdx, FreeSlotsInChunk]";} } } TABLEBODY() { diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap_ut.cpp b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap_ut.cpp index 8c7e08fc35..638269fdfd 100644 --- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap_ut.cpp +++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap_ut.cpp @@ -59,7 +59,7 @@ namespace NKikimr { res = chain.Allocate(&id); UNIT_ASSERT_EQUAL(res, false); - UNIT_ASSERT_EQUAL(chain.GetStat(), THeapStat(1, 0, {})); + UNIT_ASSERT_EQUAL(chain.GetStat(), THeapStat(1, 0, {})); STR << "All allocated\n"; } diff --git a/ydb/core/blobstorage/vdisk/hulldb/base/blobstorage_hulldefs.h b/ydb/core/blobstorage/vdisk/hulldb/base/blobstorage_hulldefs.h index a1adbab314..4719a39e72 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/base/blobstorage_hulldefs.h +++ b/ydb/core/blobstorage/vdisk/hulldb/base/blobstorage_hulldefs.h @@ -18,7 +18,7 @@ namespace NKikimr { template <class TKey> - TLogSignature PDiskSignatureForHullDbKey(); + TLogSignature PDiskSignatureForHullDbKey(); /////////////////////////////////////////////////////////////////////////////////////// // TDiskDataExtractor diff --git a/ydb/core/blobstorage/vdisk/hulldb/base/blobstorage_hullsatisfactionrank.cpp b/ydb/core/blobstorage/vdisk/hulldb/base/blobstorage_hullsatisfactionrank.cpp index aa16283674..74ed554356 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/base/blobstorage_hullsatisfactionrank.cpp +++ b/ydb/core/blobstorage/vdisk/hulldb/base/blobstorage_hullsatisfactionrank.cpp @@ -59,7 +59,7 @@ namespace NKikimr { if (!msg) { msg = std::make_unique<NPDisk::TEvConfigureScheduler>(PDiskCtx->Dsk->Owner, PDiskCtx->Dsk->OwnerRound); } - msg->SchedulerCfg.FreshWeight = status.Weight; + msg->SchedulerCfg.FreshWeight = status.Weight; } // level status = LevelWeight.Update(LevelRank.GetRank()); @@ -67,7 +67,7 @@ namespace NKikimr { if (!msg) { msg = std::make_unique<NPDisk::TEvConfigureScheduler>(PDiskCtx->Dsk->Owner, PDiskCtx->Dsk->OwnerRound); } - msg->SchedulerCfg.CompWeight = status.Weight; + msg->SchedulerCfg.CompWeight = status.Weight; } // send msg if any if (msg) { diff --git a/ydb/core/blobstorage/vdisk/hulldb/base/hullbase_barrier.h b/ydb/core/blobstorage/vdisk/hulldb/base/hullbase_barrier.h index 9615fdba66..4afce21c05 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/base/hullbase_barrier.h +++ b/ydb/core/blobstorage/vdisk/hulldb/base/hullbase_barrier.h @@ -112,8 +112,8 @@ namespace NKikimr { // PDiskSignatureForHullDbKey ///////////////////////////////////////////////////////////////////////// template <> - inline TLogSignature PDiskSignatureForHullDbKey<TKeyBarrier>() { - return TLogSignature::SignatureHullBarriersDB; + inline TLogSignature PDiskSignatureForHullDbKey<TKeyBarrier>() { + return TLogSignature::SignatureHullBarriersDB; } ///////////////////////////////////////////////////////////////////////// diff --git a/ydb/core/blobstorage/vdisk/hulldb/base/hullbase_block.h b/ydb/core/blobstorage/vdisk/hulldb/base/hullbase_block.h index cff9a97a15..72e38257f2 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/base/hullbase_block.h +++ b/ydb/core/blobstorage/vdisk/hulldb/base/hullbase_block.h @@ -72,8 +72,8 @@ namespace NKikimr { // PDiskSignatureForHullDbKey ///////////////////////////////////////////////////////////////////////// template <> - inline TLogSignature PDiskSignatureForHullDbKey<TKeyBlock>() { - return TLogSignature::SignatureHullBlocksDB; + inline TLogSignature PDiskSignatureForHullDbKey<TKeyBlock>() { + return TLogSignature::SignatureHullBlocksDB; } ///////////////////////////////////////////////////////////////////////// diff --git a/ydb/core/blobstorage/vdisk/hulldb/base/hullbase_logoblob.h b/ydb/core/blobstorage/vdisk/hulldb/base/hullbase_logoblob.h index 1fa2c2ab77..95d32d80f4 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/base/hullbase_logoblob.h +++ b/ydb/core/blobstorage/vdisk/hulldb/base/hullbase_logoblob.h @@ -85,8 +85,8 @@ namespace NKikimr { // PDiskSignatureForHullDbKey ///////////////////////////////////////////////////////////////////////// template <> - inline TLogSignature PDiskSignatureForHullDbKey<TKeyLogoBlob>() { - return TLogSignature::SignatureHullLogoBlobsDB; + inline TLogSignature PDiskSignatureForHullDbKey<TKeyLogoBlob>() { + return TLogSignature::SignatureHullLogoBlobsDB; } ///////////////////////////////////////////////////////////////////////// diff --git a/ydb/core/blobstorage/vdisk/hulldb/base/hullds_ut.h b/ydb/core/blobstorage/vdisk/hulldb/base/hullds_ut.h index 564ca58b5e..b0d875a060 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/base/hullds_ut.h +++ b/ydb/core/blobstorage/vdisk/hulldb/base/hullds_ut.h @@ -13,7 +13,7 @@ namespace NKikimr { , CompWorthReadSize(compWorthReadSize) , GroupInfo(TBlobStorageGroupType::ErasureMirror3, 2, 4) , VCtx(new TVDiskContext(TActorId(), GroupInfo.PickTopology(), new NMonitoring::TDynamicCounters(), - TVDiskID(), nullptr, TPDiskCategory::DEVICE_TYPE_UNKNOWN)) + TVDiskID(), nullptr, TPDiskCategory::DEVICE_TYPE_UNKNOWN)) , HullCtx( new THullCtx( VCtx, diff --git a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_balance.h b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_balance.h index 9aba27b457..c98cdf82e1 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_balance.h +++ b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_balance.h @@ -235,7 +235,7 @@ namespace NKikimr { LOG_DEBUG(*HullCtx->VCtx->ActorSystem, NKikimrServices::BS_HULLCOMP, VDISKP(HullCtx->VCtx->VDiskLogPrefix, "%s: TBalancePartiallySortedLevels::CalculateRank: %s", - PDiskSignatureForHullDbKey<TKey>().ToString().data(), + PDiskSignatureForHullDbKey<TKey>().ToString().data(), ToString().data())); } @@ -553,7 +553,7 @@ namespace NKikimr { LOG_INFO(*HullCtx->VCtx->ActorSystem, NKikimrServices::BS_HULLCOMP, VDISKP(HullCtx->VCtx->VDiskLogPrefix, "%s: Balance: action# %s timeSpent# %s RankThreshold# %e ranks# %s", - PDiskSignatureForHullDbKey<TKey>().ToString().data(), + PDiskSignatureForHullDbKey<TKey>().ToString().data(), ActionToStr(action), (finishTime - startTime).ToString().data(), RankThreshold, ranks.ToString().data())); } diff --git a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_delsst.h b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_delsst.h index e69c5e5a08..eda1d87053 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_delsst.h +++ b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_delsst.h @@ -41,7 +41,7 @@ namespace NKikimr { LOG_INFO(*HullCtx->VCtx->ActorSystem, NKikimrServices::BS_HULLCOMP, VDISKP(HullCtx->VCtx->VDiskLogPrefix, "%s: DelSst: action# %s timeSpent# %s sstsToDelete# %" PRIu32, - PDiskSignatureForHullDbKey<TKey>().ToString().data(), + PDiskSignatureForHullDbKey<TKey>().ToString().data(), ActionToStr(action), (finishTime - startTime).ToString().data(), SstToDelete)); } diff --git a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_lazy.h b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_lazy.h index d477a106c8..5b0178873e 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_lazy.h +++ b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_lazy.h @@ -305,7 +305,7 @@ namespace NKikimr { LOG_INFO(*HullCtx->VCtx->ActorSystem, NKikimrServices::BS_HULLCOMP, VDISKP(HullCtx->VCtx->VDiskLogPrefix, "%s: Balance: action# %s timeSpent# %s ScoreThreshold# %e", - PDiskSignatureForHullDbKey<TKey>().ToString().data(), + PDiskSignatureForHullDbKey<TKey>().ToString().data(), ActionToStr(action), (finishTime - startTime).ToString().data(), ScoreThreshold)); } diff --git a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_promote.h b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_promote.h index 8966ff5566..b880e68731 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_promote.h +++ b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_promote.h @@ -46,7 +46,7 @@ namespace NKikimr { LOG_INFO(*HullCtx->VCtx->ActorSystem, NKikimrServices::BS_HULLCOMP, VDISKP(HullCtx->VCtx->VDiskLogPrefix, "%s: PromoteSsts: action# %s timeSpent# %s", - PDiskSignatureForHullDbKey<TKey>().ToString().data(), + PDiskSignatureForHullDbKey<TKey>().ToString().data(), ActionToStr(action), (finishTime - startTime).ToString().data())); } diff --git a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_ratio.h b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_ratio.h index 8647f69284..fe2a48fc9e 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_ratio.h +++ b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_ratio.h @@ -42,7 +42,7 @@ namespace NKikimr { LOG_INFO(*HullCtx->VCtx->ActorSystem, NKikimrServices::BS_HULLCOMP, VDISKP(HullCtx->VCtx->VDiskLogPrefix, "%s: StorageRatio: timeSpent# %s stat# %s", - PDiskSignatureForHullDbKey<TKey>().ToString().data(), + PDiskSignatureForHullDbKey<TKey>().ToString().data(), (finishTime - startTime).ToString().data(), stat.ToString().data())); } diff --git a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_selector.h b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_selector.h index 3a06616956..f269d15b95 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_selector.h +++ b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_selector.h @@ -108,7 +108,7 @@ namespace NKikimr { LOG_INFO(ctx, NKikimrServices::BS_HULLCOMP, VDISKP(HullCtx->VCtx->VDiskLogPrefix, "%s: Selector actor: action# %s timeSpent# %s", - PDiskSignatureForHullDbKey<TKey>().ToString().data(), + PDiskSignatureForHullDbKey<TKey>().ToString().data(), ActionToStr(action), (finishTime - startTime).ToString().data())); TThis::Die(ctx); } diff --git a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_space.h b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_space.h index 81685ee534..6014216174 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_space.h +++ b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_space.h @@ -46,7 +46,7 @@ namespace NKikimr { LOG_INFO(*HullCtx->VCtx->ActorSystem, NKikimrServices::BS_HULLCOMP, VDISKP(HullCtx->VCtx->VDiskLogPrefix, "%s: FreeSpace: action# %s timeSpent# %s candidate# %s", - PDiskSignatureForHullDbKey<TKey>().ToString().data(), + PDiskSignatureForHullDbKey<TKey>().ToString().data(), ActionToStr(action), (finishTime - startTime).ToString().data(), Candidate.ToString().data())); } diff --git a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_squeeze.h b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_squeeze.h index 72cf74e14c..bf264be3f2 100644 --- a/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_squeeze.h +++ b/ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_squeeze.h @@ -47,7 +47,7 @@ namespace NKikimr { LOG_INFO(*HullCtx->VCtx->ActorSystem, NKikimrServices::BS_HULLCOMP, VDISKP(HullCtx->VCtx->VDiskLogPrefix, "%s: FreeSpace: action# %s timeSpent# %s", - PDiskSignatureForHullDbKey<TKey>().ToString().data(), + PDiskSignatureForHullDbKey<TKey>().ToString().data(), ActionToStr(action), (finishTime - startTime).ToString().data())); } diff --git a/ydb/core/blobstorage/vdisk/hullop/blobstorage_buildslice.h b/ydb/core/blobstorage/vdisk/hullop/blobstorage_buildslice.h index a6630c0565..2a3fec23c2 100644 --- a/ydb/core/blobstorage/vdisk/hullop/blobstorage_buildslice.h +++ b/ydb/core/blobstorage/vdisk/hullop/blobstorage_buildslice.h @@ -1,13 +1,13 @@ #pragma once -#include "defs.h" +#include "defs.h" #include <ydb/core/blobstorage/vdisk/hulldb/hull_ds_all.h> #include <ydb/core/blobstorage/vdisk/hulldb/generic/hullds_leveledssts.h> #include <ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_defs.h> - -namespace NKikimr { - - //////////////////////////////////////////////////////////////////////////// + +namespace NKikimr { + + //////////////////////////////////////////////////////////////////////////// // THullOpUtil -- utils for compaction //////////////////////////////////////////////////////////////////////////// template <class TKey, class TMemRec> @@ -27,26 +27,26 @@ namespace NKikimr { using TLeveledSstsIterator = typename TLeveledSsts::TIterator; using TSortedLevel = ::NKikimr::TSortedLevel<TKey, TMemRec>; - // Find removed huge blobs + // Find removed huge blobs static TDiskPartVec FindRemovedHugeBlobsAfterLevelCompaction( const TActorContext &ctx, TSliceSstIterator oldSliceIt, const TLevelSlice *newSlice); - + static TDiskPartVec FindRemovedHugeBlobsAfterFreshCompaction( const TActorContext &ctx, const TIntrusivePtr<TFreshSegment> &freshSeg, const TOrderedLevelSegmentsPtr &segVec); - - struct TBuiltSlice { - // new slice built from the previous one and changes provided - TLevelSlicePtr NewSlice; - // calculated removed huge blobs based on slice comparison, - // for debug purposes only - TDiskPartVec RemovedHugeBlobs; - }; - - // Build new slice after compaction from previous slice and modifications provided + + struct TBuiltSlice { + // new slice built from the previous one and changes provided + TLevelSlicePtr NewSlice; + // calculated removed huge blobs based on slice comparison, + // for debug purposes only + TDiskPartVec RemovedHugeBlobs; + }; + + // Build new slice after compaction from previous slice and modifications provided static TBuiltSlice BuildSlice( const TIntrusivePtr<TVDiskContext> &vctx, const TActorContext &ctx, @@ -54,11 +54,11 @@ namespace NKikimr { const TLevelSlice *slice, const TCompactionTask &ctask, bool findRemovedHugeBlobs); - - }; - -//#define HULL_COMPACT_APPLY - + + }; + +//#define HULL_COMPACT_APPLY + template <class TKey, class TMemRec> typename THullOpUtil<TKey, TMemRec>::TBuiltSlice THullOpUtil<TKey, TMemRec>::BuildSlice( @@ -69,305 +69,305 @@ namespace NKikimr { const TCompactionTask &ctask, bool findRemovedHugeBlobs) { - Y_UNUSED(ctx); // for log in debug + Y_UNUSED(ctx); // for log in debug // initialize iterator for adding/removing TLeveledSstsIterator addIt(&ctask.GetSstsToAdd()); addIt.SeekToFirst(); TLeveledSstsIterator delIt(&ctask.GetSstsToDelete()); delIt.SeekToFirst(); // create a new slice - TLevelSlicePtr res(new TLevelSlice(settings, slice->Ctx)); - - ui32 levelsSize = slice->SortedLevels.size(); - if (ctask.Action == NHullComp::ActCompactSsts && - ctask.CompactSsts.TargetLevel != ui32(-1) && - ctask.CompactSsts.TargetLevel > levelsSize) { - // resize levels - levelsSize = ctask.CompactSsts.TargetLevel; - } - - res->SortedLevels.reserve(levelsSize); - for (ui32 j = 0; j < levelsSize; j++) { - // update/copy LastCompactedKey - TKey lastCompactedKey; - if (ctask.Action == NHullComp::ActCompactSsts && j == ctask.CompactSsts.TargetLevel) { - lastCompactedKey = ctask.CompactSsts.LastCompactedKey; - } else if (j < slice->SortedLevels.size()) { - lastCompactedKey = slice->SortedLevels[j].LastCompactedKey; - } else { - lastCompactedKey = TKey::First(); - } - res->SortedLevels.push_back(TSortedLevel(lastCompactedKey)); - } - - // prepare iterators - // NOTE: we use current number of ssts for Level 0 (slice->Level0CurSstsNum()). Number of ssts at Level 0 - // could change during compaction, but we have to use the actual value to avoid loosing - // Level 0 ssts we got while compaction was running - TSliceSstIterator sliceIt(slice, slice->Level0CurSstsNum()); - sliceIt.SeekToFirst(); - - auto checkOrder = [](auto iter) { - if (iter.Valid()) { - auto prev = iter.Get(); - for (iter.Next(); iter.Valid(); iter.Next()) { - auto current = iter.Get(); - if (current < prev) { - return false; - } else { - prev = current; - } - } - } - return true; - }; - auto dump = [](auto iter) { - TStringStream str; - str << "{"; - bool first = true; - for (; iter.Valid(); iter.Next()) { - if (first) { - first = false; - } else { - str << " "; - } - str << iter.Get().ToString(); - } - str << "}"; - return str.Str(); - }; - Y_VERIFY(checkOrder(addIt), "addIt# %s", dump(addIt).data()); - Y_VERIFY(checkOrder(delIt), "delIt# %s", dump(delIt).data()); - Y_VERIFY(checkOrder(sliceIt), "sliceIt# %s", dump(sliceIt).data()); - -#ifdef HULL_COMPACT_APPLY - TStringStream debugOutput; - { - debugOutput << "COMPACTION_APPLY\n"; - debugOutput << " ORIG\n" << slice->ToString(" "); - debugOutput << " CHANGES\n DELETE: "; - TLeveledSstsIterator d(delIt); - d.SeekToFirst(); - while (d.Valid()) { - debugOutput << "[Level: " << d.Get().Level << " " << d.Get().SstPtr->ChunksToString() << "]"; - d.Next(); - } - debugOutput << "\n ADD: "; - TLeveledSstsIterator v(addIt); - v.SeekToFirst(); - while (v.Valid()) { - debugOutput << "[Level: " << v.Get().Level << " " << v.Get().SstPtr->ChunksToString() << "]"; - v.Next(); - } - debugOutput << "\n COMMANDS\n"; - } -#endif - - // merge - while (sliceIt.Valid() && addIt.Valid()) { - if (sliceIt.Get() < addIt.Get()) { -#ifdef HULL_COMPACT_APPLY - debugOutput << " SLICE_LESS(1) sliceIt: " << sliceIt.Get().ToString() << "\n"; -#endif - if (delIt.Valid() && sliceIt.Get().IsSameSst(delIt.Get())) { - delIt.Next(); - } else { - TLevelSstPtr item = sliceIt.Get(); - res->Put(item); -#ifdef HULL_COMPACT_APPLY - debugOutput << " Put Item(1): " << item.ToString() << "\n"; -#endif - } - sliceIt.Next(); - } else if (addIt.Get() < sliceIt.Get()) { - TLevelSstPtr item = addIt.Get(); - res->Put(item); -#ifdef HULL_COMPACT_APPLY - debugOutput << " VEC_LESS(1) sliceIt: " << item.ToString() << "\n"; - debugOutput << " Put Item(2): " << item.ToString() << "\n"; -#endif - addIt.Next(); - } else { - // do smth -#ifdef HULL_COMPACT_APPLY - debugOutput << " BOTH sliceIt: " << sliceIt.Get().ToString() << " addIt: " - << addIt.Get().ToString() << "\n"; -#endif - Y_VERIFY(delIt.Valid() && sliceIt.Get().IsSameSst(delIt.Get())); - delIt.Next(); - sliceIt.Next(); - TLevelSstPtr item = addIt.Get(); - res->Put(item); -#ifdef HULL_COMPACT_APPLY - debugOutput << " Put Item(3): " << item.ToString() << "\n"; -#endif - addIt.Next(); - } - } - - while (sliceIt.Valid()) { -#ifdef HULL_COMPACT_APPLY - debugOutput << " SLICE_LESS(2) sliceIt: " << sliceIt.Get().ToString() << "\n"; -#endif - if (delIt.Valid() && sliceIt.Get().IsSameSst(delIt.Get())) { - delIt.Next(); - } else { - TLevelSstPtr item = sliceIt.Get(); - res->Put(item); -#ifdef HULL_COMPACT_APPLY - debugOutput << " Put Item(4): " << item.ToString() << "\n"; -#endif - } - sliceIt.Next(); - } - - while (addIt.Valid()) { - TLevelSstPtr item = addIt.Get(); - res->Put(item); -#ifdef HULL_COMPACT_APPLY - debugOutput << " VEC_LESS(2) sliceIt: " << item.ToString() << "\n"; - debugOutput << " Put Item(5): " << item.ToString() << "\n"; -#endif - addIt.Next(); - } - - Y_VERIFY(!sliceIt.Valid()); - Y_VERIFY(!addIt.Valid()); - Y_VERIFY(!delIt.Valid()); // ensure we didn't miss something - -#ifdef HULL_COMPACT_APPLY - debugOutput << " RESULT\n" << res->ToString(" "); + TLevelSlicePtr res(new TLevelSlice(settings, slice->Ctx)); + + ui32 levelsSize = slice->SortedLevels.size(); + if (ctask.Action == NHullComp::ActCompactSsts && + ctask.CompactSsts.TargetLevel != ui32(-1) && + ctask.CompactSsts.TargetLevel > levelsSize) { + // resize levels + levelsSize = ctask.CompactSsts.TargetLevel; + } + + res->SortedLevels.reserve(levelsSize); + for (ui32 j = 0; j < levelsSize; j++) { + // update/copy LastCompactedKey + TKey lastCompactedKey; + if (ctask.Action == NHullComp::ActCompactSsts && j == ctask.CompactSsts.TargetLevel) { + lastCompactedKey = ctask.CompactSsts.LastCompactedKey; + } else if (j < slice->SortedLevels.size()) { + lastCompactedKey = slice->SortedLevels[j].LastCompactedKey; + } else { + lastCompactedKey = TKey::First(); + } + res->SortedLevels.push_back(TSortedLevel(lastCompactedKey)); + } + + // prepare iterators + // NOTE: we use current number of ssts for Level 0 (slice->Level0CurSstsNum()). Number of ssts at Level 0 + // could change during compaction, but we have to use the actual value to avoid loosing + // Level 0 ssts we got while compaction was running + TSliceSstIterator sliceIt(slice, slice->Level0CurSstsNum()); + sliceIt.SeekToFirst(); + + auto checkOrder = [](auto iter) { + if (iter.Valid()) { + auto prev = iter.Get(); + for (iter.Next(); iter.Valid(); iter.Next()) { + auto current = iter.Get(); + if (current < prev) { + return false; + } else { + prev = current; + } + } + } + return true; + }; + auto dump = [](auto iter) { + TStringStream str; + str << "{"; + bool first = true; + for (; iter.Valid(); iter.Next()) { + if (first) { + first = false; + } else { + str << " "; + } + str << iter.Get().ToString(); + } + str << "}"; + return str.Str(); + }; + Y_VERIFY(checkOrder(addIt), "addIt# %s", dump(addIt).data()); + Y_VERIFY(checkOrder(delIt), "delIt# %s", dump(delIt).data()); + Y_VERIFY(checkOrder(sliceIt), "sliceIt# %s", dump(sliceIt).data()); + +#ifdef HULL_COMPACT_APPLY + TStringStream debugOutput; + { + debugOutput << "COMPACTION_APPLY\n"; + debugOutput << " ORIG\n" << slice->ToString(" "); + debugOutput << " CHANGES\n DELETE: "; + TLeveledSstsIterator d(delIt); + d.SeekToFirst(); + while (d.Valid()) { + debugOutput << "[Level: " << d.Get().Level << " " << d.Get().SstPtr->ChunksToString() << "]"; + d.Next(); + } + debugOutput << "\n ADD: "; + TLeveledSstsIterator v(addIt); + v.SeekToFirst(); + while (v.Valid()) { + debugOutput << "[Level: " << v.Get().Level << " " << v.Get().SstPtr->ChunksToString() << "]"; + v.Next(); + } + debugOutput << "\n COMMANDS\n"; + } +#endif + + // merge + while (sliceIt.Valid() && addIt.Valid()) { + if (sliceIt.Get() < addIt.Get()) { +#ifdef HULL_COMPACT_APPLY + debugOutput << " SLICE_LESS(1) sliceIt: " << sliceIt.Get().ToString() << "\n"; +#endif + if (delIt.Valid() && sliceIt.Get().IsSameSst(delIt.Get())) { + delIt.Next(); + } else { + TLevelSstPtr item = sliceIt.Get(); + res->Put(item); +#ifdef HULL_COMPACT_APPLY + debugOutput << " Put Item(1): " << item.ToString() << "\n"; +#endif + } + sliceIt.Next(); + } else if (addIt.Get() < sliceIt.Get()) { + TLevelSstPtr item = addIt.Get(); + res->Put(item); +#ifdef HULL_COMPACT_APPLY + debugOutput << " VEC_LESS(1) sliceIt: " << item.ToString() << "\n"; + debugOutput << " Put Item(2): " << item.ToString() << "\n"; +#endif + addIt.Next(); + } else { + // do smth +#ifdef HULL_COMPACT_APPLY + debugOutput << " BOTH sliceIt: " << sliceIt.Get().ToString() << " addIt: " + << addIt.Get().ToString() << "\n"; +#endif + Y_VERIFY(delIt.Valid() && sliceIt.Get().IsSameSst(delIt.Get())); + delIt.Next(); + sliceIt.Next(); + TLevelSstPtr item = addIt.Get(); + res->Put(item); +#ifdef HULL_COMPACT_APPLY + debugOutput << " Put Item(3): " << item.ToString() << "\n"; +#endif + addIt.Next(); + } + } + + while (sliceIt.Valid()) { +#ifdef HULL_COMPACT_APPLY + debugOutput << " SLICE_LESS(2) sliceIt: " << sliceIt.Get().ToString() << "\n"; +#endif + if (delIt.Valid() && sliceIt.Get().IsSameSst(delIt.Get())) { + delIt.Next(); + } else { + TLevelSstPtr item = sliceIt.Get(); + res->Put(item); +#ifdef HULL_COMPACT_APPLY + debugOutput << " Put Item(4): " << item.ToString() << "\n"; +#endif + } + sliceIt.Next(); + } + + while (addIt.Valid()) { + TLevelSstPtr item = addIt.Get(); + res->Put(item); +#ifdef HULL_COMPACT_APPLY + debugOutput << " VEC_LESS(2) sliceIt: " << item.ToString() << "\n"; + debugOutput << " Put Item(5): " << item.ToString() << "\n"; +#endif + addIt.Next(); + } + + Y_VERIFY(!sliceIt.Valid()); + Y_VERIFY(!addIt.Valid()); + Y_VERIFY(!delIt.Valid()); // ensure we didn't miss something + +#ifdef HULL_COMPACT_APPLY + debugOutput << " RESULT\n" << res->ToString(" "); LOG_DEBUG(ctx, NKikimrServices::BS_HULLCOMP, VDISKP(vctx, "%s", ~debugOutput.Str())); -#else +#else LOG_DEBUG(ctx, NKikimrServices::BS_HULLCOMP, VDISKP(vctx, "Changes to Hull applied")); -#endif - - TSliceSstIterator resIt(res.Get(), res->Level0CurSstsNum()); - resIt.SeekToFirst(); - Y_VERIFY(checkOrder(resIt), "resIt# %s", dump(resIt).data()); - - - // additional check - TDiskPartVec removedHugeBlobs; - if (findRemovedHugeBlobs) { - removedHugeBlobs = FindRemovedHugeBlobsAfterLevelCompaction(ctx, sliceIt, res.Get()); - } - - return TBuiltSlice{ res, removedHugeBlobs }; - } - +#endif + + TSliceSstIterator resIt(res.Get(), res->Level0CurSstsNum()); + resIt.SeekToFirst(); + Y_VERIFY(checkOrder(resIt), "resIt# %s", dump(resIt).data()); + + + // additional check + TDiskPartVec removedHugeBlobs; + if (findRemovedHugeBlobs) { + removedHugeBlobs = FindRemovedHugeBlobsAfterLevelCompaction(ctx, sliceIt, res.Get()); + } + + return TBuiltSlice{ res, removedHugeBlobs }; + } + template <class TKey, class TMemRec> - TDiskPartVec + TDiskPartVec THullOpUtil<TKey, TMemRec>::FindRemovedHugeBlobsAfterLevelCompaction( const TActorContext &ctx, TSliceSstIterator oldSliceIt, const TLevelSlice *newSlice) { - Y_UNUSED(ctx); - TSet<TDiskPart> hugeBlobs; - - // find all huge blobs in slice and call func on each - auto traverseHugeBlobs = [] (TSliceSstIterator it, auto func) { - it.SeekToFirst(); - while (it.Valid()) { - TLevelSstPtr p = it.Get(); - - TMemIterator c(p.SstPtr.Get()); - c.SeekToFirst(); - while (c.Valid()) { - TBlobType::EType type = c->MemRec.GetType(); - if (type == TBlobType::HugeBlob || type == TBlobType::ManyHugeBlobs) { - TDiskDataExtractor extr; - c->MemRec.GetDiskData(&extr, p.SstPtr->GetOutbound()); - for (const TDiskPart *hb = extr.Begin; hb != extr.End; ++hb) { - func(*hb); - } - } - c.Next(); - } - - it.Next(); - } - }; - - auto addHugeBlob = [&hugeBlobs] (const TDiskPart &part) { - bool inserted = hugeBlobs.insert(part).second; - Y_VERIFY(inserted); - }; - - auto removeHugeBlob = [&hugeBlobs] (const TDiskPart &part) { - auto num = hugeBlobs.erase(part); - Y_VERIFY(num == 1, "num=%u", unsigned(num)); - }; - - - // collect all huge blobs from old slice into hugeBlobs set - traverseHugeBlobs(oldSliceIt, addHugeBlob); - - // in hugeBlobs leave only those blobs, that do not present in new slice - TSliceSstIterator newSliceIt(newSlice, newSlice->Level0CurSstsNum()); - traverseHugeBlobs(newSliceIt, removeHugeBlob); - - // put from the set to a vector - TDiskPartVec result; - result.Reserve(hugeBlobs.size()); - for (const auto &x : hugeBlobs) { - result.PushBack(x); - } - return result; - } - + Y_UNUSED(ctx); + TSet<TDiskPart> hugeBlobs; + + // find all huge blobs in slice and call func on each + auto traverseHugeBlobs = [] (TSliceSstIterator it, auto func) { + it.SeekToFirst(); + while (it.Valid()) { + TLevelSstPtr p = it.Get(); + + TMemIterator c(p.SstPtr.Get()); + c.SeekToFirst(); + while (c.Valid()) { + TBlobType::EType type = c->MemRec.GetType(); + if (type == TBlobType::HugeBlob || type == TBlobType::ManyHugeBlobs) { + TDiskDataExtractor extr; + c->MemRec.GetDiskData(&extr, p.SstPtr->GetOutbound()); + for (const TDiskPart *hb = extr.Begin; hb != extr.End; ++hb) { + func(*hb); + } + } + c.Next(); + } + + it.Next(); + } + }; + + auto addHugeBlob = [&hugeBlobs] (const TDiskPart &part) { + bool inserted = hugeBlobs.insert(part).second; + Y_VERIFY(inserted); + }; + + auto removeHugeBlob = [&hugeBlobs] (const TDiskPart &part) { + auto num = hugeBlobs.erase(part); + Y_VERIFY(num == 1, "num=%u", unsigned(num)); + }; + + + // collect all huge blobs from old slice into hugeBlobs set + traverseHugeBlobs(oldSliceIt, addHugeBlob); + + // in hugeBlobs leave only those blobs, that do not present in new slice + TSliceSstIterator newSliceIt(newSlice, newSlice->Level0CurSstsNum()); + traverseHugeBlobs(newSliceIt, removeHugeBlob); + + // put from the set to a vector + TDiskPartVec result; + result.Reserve(hugeBlobs.size()); + for (const auto &x : hugeBlobs) { + result.PushBack(x); + } + return result; + } + template <class TKey, class TMemRec> - TDiskPartVec + TDiskPartVec THullOpUtil<TKey, TMemRec>::FindRemovedHugeBlobsAfterFreshCompaction( const TActorContext &ctx, const TIntrusivePtr<TFreshSegment> &freshSeg, const TOrderedLevelSegmentsPtr &segVec) { - Y_UNUSED(ctx); - TSet<TDiskPart> hugeBlobs; - - // fill in hugeBlobs with blobs from fresh segment - freshSeg->GetHugeBlobs(hugeBlobs); - - - // find all huge blobs in SST and call func on each - auto traverseHugeBlobs = [] (const TLevelSegmentPtr &seg, auto func) { - TMemIterator c(seg.Get()); - c.SeekToFirst(); - while (c.Valid()) { - TBlobType::EType type = c->MemRec.GetType(); - if (type == TBlobType::HugeBlob || type == TBlobType::ManyHugeBlobs) { - TDiskDataExtractor extr; - c->MemRec.GetDiskData(&extr, seg->GetOutbound()); - for (const TDiskPart *hb = extr.Begin; hb != extr.End; ++hb) { - func(*hb); - } - } - c.Next(); - } - }; - - // remove huge blob from the set - auto removeHugeBlob = [&hugeBlobs] (const TDiskPart &part) { - auto num = hugeBlobs.erase(part); - Y_VERIFY(num == 1, "num=%u", unsigned(num)); - }; - - // leave only deleted huge blobs in hugeBlobs - if (segVec) { - for (auto &seg : segVec->Segments) { - traverseHugeBlobs(seg, removeHugeBlob); - } - } - - // put from the set to a vector - TDiskPartVec result; - result.Reserve(hugeBlobs.size()); - for (const auto &x : hugeBlobs) { - result.PushBack(x); - } - return result; - } - + Y_UNUSED(ctx); + TSet<TDiskPart> hugeBlobs; + + // fill in hugeBlobs with blobs from fresh segment + freshSeg->GetHugeBlobs(hugeBlobs); + + + // find all huge blobs in SST and call func on each + auto traverseHugeBlobs = [] (const TLevelSegmentPtr &seg, auto func) { + TMemIterator c(seg.Get()); + c.SeekToFirst(); + while (c.Valid()) { + TBlobType::EType type = c->MemRec.GetType(); + if (type == TBlobType::HugeBlob || type == TBlobType::ManyHugeBlobs) { + TDiskDataExtractor extr; + c->MemRec.GetDiskData(&extr, seg->GetOutbound()); + for (const TDiskPart *hb = extr.Begin; hb != extr.End; ++hb) { + func(*hb); + } + } + c.Next(); + } + }; + + // remove huge blob from the set + auto removeHugeBlob = [&hugeBlobs] (const TDiskPart &part) { + auto num = hugeBlobs.erase(part); + Y_VERIFY(num == 1, "num=%u", unsigned(num)); + }; + + // leave only deleted huge blobs in hugeBlobs + if (segVec) { + for (auto &seg : segVec->Segments) { + traverseHugeBlobs(seg, removeHugeBlob); + } + } + + // put from the set to a vector + TDiskPartVec result; + result.Reserve(hugeBlobs.size()); + for (const auto &x : hugeBlobs) { + result.PushBack(x); + } + return result; + } + } // NKikimr diff --git a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hull.cpp b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hull.cpp index 395d25bef9..33a8e1153b 100644 --- a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hull.cpp +++ b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hull.cpp @@ -312,7 +312,7 @@ namespace NKikimr { // value -- this is not tolerable LOG_CRIT(ctx, NKikimrServices::BS_HULLRECS, VDISKP(HullDs->HullCtx->VCtx->VDiskLogPrefix, - "Db# Barriers ValidateGCCmd: incorrect collect cmd: tabletID# %" PRIu64 + "Db# Barriers ValidateGCCmd: incorrect collect cmd: tabletID# %" PRIu64 " key# %s existing barrier# %" PRIu32 ":%" PRIu32 " new barrier# %" PRIu32 ":%" PRIu32, tabletID, newKey.ToString().data(), memRec.CollectGen, memRec.CollectStep, collectGeneration, collectStep)); diff --git a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullactor.cpp b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullactor.cpp index 5841fa6741..e0118b3acf 100644 --- a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullactor.cpp +++ b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullactor.cpp @@ -1,4 +1,4 @@ -#include "blobstorage_hullactor.h" +#include "blobstorage_hullactor.h" #include "blobstorage_hullcommit.h" #include "blobstorage_hullcompact.h" #include "blobstorage_buildslice.h" @@ -6,10 +6,10 @@ #include <ydb/core/blobstorage/vdisk/hulldb/compstrat/hulldb_compstrat_selector.h> #include <ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugedelete.h> #include <ydb/core/blobstorage/vdisk/hulldb/hulldb_bulksst_add.h> - -namespace NKikimr { - - //////////////////////////////////////////////////////////////////////////// + +namespace NKikimr { + + //////////////////////////////////////////////////////////////////////////// // TFullCompactionState //////////////////////////////////////////////////////////////////////////// struct TFullCompactionState { @@ -104,7 +104,7 @@ namespace NKikimr { LOG_INFO(ctx, NKikimrServices::BS_HULLCOMP, VDISKP(hullCtx->VCtx->VDiskLogPrefix, - "%s: fresh scheduled", PDiskSignatureForHullDbKey<TKey>().ToString().data())); + "%s: fresh scheduled", PDiskSignatureForHullDbKey<TKey>().ToString().data())); Y_VERIFY(lastLsn <= rtCtx->LsnMngr->GetConfirmedLsnForHull(), "Last fresh lsn MUST be confirmed; lastLsn# %" PRIu64 " confirmed# %" PRIu64, @@ -115,77 +115,77 @@ namespace NKikimr { } //////////////////////////////////////////////////////////////////////////// - // TLevelIndexActor. We run it on the same mailbox as Skeleton, - // it is used for commits and compaction scheduling - //////////////////////////////////////////////////////////////////////////// + // TLevelIndexActor. We run it on the same mailbox as Skeleton, + // it is used for commits and compaction scheduling + //////////////////////////////////////////////////////////////////////////// template <class TKey, class TMemRec> class TLevelIndexActor : public TActorBootstrapped<TLevelIndexActor<TKey, TMemRec>> { - typedef ::NKikimr::THullChange<TKey, TMemRec> THullChange; + typedef ::NKikimr::THullChange<TKey, TMemRec> THullChange; typedef ::NKikimr::TFreshAppendixCompactionDone<TKey, TMemRec> TFreshAppendixCompactionDone; typedef ::NKikimr::TLevelIndexActor<TKey, TMemRec> TThis; - typedef ::NKikimr::NHullComp::TTask<TKey, TMemRec> TCompactionTask; + typedef ::NKikimr::NHullComp::TTask<TKey, TMemRec> TCompactionTask; typedef ::NKikimr::NHullComp::TSelectorActor<TKey, TMemRec> TSelectorActor; - typedef ::NKikimr::NHullComp::TSelected<TKey, TMemRec> TSelected; - - typedef ::NKikimr::TLevelSlice<TKey, TMemRec> TLevelSlice; - typedef TIntrusivePtr<TLevelSlice> TLevelSlicePtr; - typedef typename TLevelSlice::TForwardIterator TLevelSliceForwardIterator; + typedef ::NKikimr::NHullComp::TSelected<TKey, TMemRec> TSelected; + + typedef ::NKikimr::TLevelSlice<TKey, TMemRec> TLevelSlice; + typedef TIntrusivePtr<TLevelSlice> TLevelSlicePtr; + typedef typename TLevelSlice::TForwardIterator TLevelSliceForwardIterator; typedef ::NKikimr::THullCompaction<TKey, TMemRec, TLevelSliceForwardIterator> TLevelCompaction; - typedef ::NKikimr::TOrderedLevelSegments<TKey, TMemRec> TOrderedLevelSegments; - typedef TIntrusivePtr<TOrderedLevelSegments> TOrderedLevelSegmentsPtr; - typedef ::NKikimr::TLevelSegment<TKey, TMemRec> TLevelSegment; - typedef ::NKikimr::TLeveledSsts<TKey, TMemRec> TLeveledSsts; - typedef typename TLeveledSsts::TIterator TLeveledSstsIterator; + typedef ::NKikimr::TOrderedLevelSegments<TKey, TMemRec> TOrderedLevelSegments; + typedef TIntrusivePtr<TOrderedLevelSegments> TOrderedLevelSegmentsPtr; + typedef ::NKikimr::TLevelSegment<TKey, TMemRec> TLevelSegment; + typedef ::NKikimr::TLeveledSsts<TKey, TMemRec> TLeveledSsts; + typedef typename TLeveledSsts::TIterator TLeveledSstsIterator; typedef ::NKikimr::TAsyncLevelCommitter<TKey, TMemRec> TAsyncLevelCommitter; typedef ::NKikimr::TAsyncFreshCommitter<TKey, TMemRec> TAsyncFreshCommitter; typedef ::NKikimr::TAsyncAdvanceLsnCommitter<TKey, TMemRec> TAsyncAdvanceLsnCommitter; typedef ::NKikimr::TAsyncReplSstCommitter<TKey, TMemRec> TAsyncReplSstCommitter; - + using TRunTimeCtx = TLevelIndexRunTimeCtx<TKey, TMemRec>; using THullOpUtil = ::NKikimr::THullOpUtil<TKey, TMemRec>; - - // - // StateNoComp -> StateCompPolicyAtWork -> StateCompInProgress -> StateWaitCommit -+ - // ^ ^ | | ^ | - // | | | | | | - // | +----------------+ +---------------------------------------+ | - // +--------------------------------------------------------------------------+ - + + // + // StateNoComp -> StateCompPolicyAtWork -> StateCompInProgress -> StateWaitCommit -+ + // ^ ^ | | ^ | + // | | | | | | + // | +----------------+ +---------------------------------------+ | + // +--------------------------------------------------------------------------+ + TIntrusivePtr<TVDiskConfig> Config; - TIntrusivePtr<THullDs> HullDs; + TIntrusivePtr<THullDs> HullDs; std::shared_ptr<THullLogCtx> HullLogCtx; std::shared_ptr<TRunTimeCtx> RTCtx; std::shared_ptr<NSyncLog::TSyncLogFirstLsnToKeep> SyncLogFirstLsnToKeep; - NHullComp::TBoundariesConstPtr Boundaries; - THullDbCommitterCtxPtr HullDbCommitterCtx; + NHullComp::TBoundariesConstPtr Boundaries; + THullDbCommitterCtxPtr HullDbCommitterCtx; std::unique_ptr<TCompactionTask> CompactionTask; - bool AdvanceCommitInProgress = false; - TActiveActors &ActiveActors; - NMonGroup::TLsmAllLevelsStat LevelStat; + bool AdvanceCommitInProgress = false; + TActiveActors &ActiveActors; + NMonGroup::TLsmAllLevelsStat LevelStat; TFullCompactionState FullCompactionState; bool CompactionScheduled = false; TInstant NextCompactionWakeup; - - friend class TActorBootstrapped<TThis>; - - void Bootstrap(const TActorContext &ctx) { - TThis::Become(&TThis::StateFunc); - RTCtx->LevelIndex->UpdateLevelStat(LevelStat); - ScheduleCompaction(ctx); - } - + + friend class TActorBootstrapped<TThis>; + + void Bootstrap(const TActorContext &ctx) { + TThis::Become(&TThis::StateFunc); + RTCtx->LevelIndex->UpdateLevelStat(LevelStat); + ScheduleCompaction(ctx); + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // RunLevelCompactionSelector runs TSelectorActor which selects what to compact. // returns true, if selector has been started, false otherwise bool RunLevelCompactionSelector(const TActorContext &ctx) { - // if compaction is in progress, return + // if compaction is in progress, return if (RTCtx->LevelIndex->GetCompState() != TLevelIndexBase::StateNoComp || !Config->LevelCompaction) { return false; - } - + } + //////////////////////// CHOOSE WHAT TO COMPACT /////////////////////////////// - RTCtx->LevelIndex->SetCompState(TLevelIndexBase::StateCompPolicyAtWork); + RTCtx->LevelIndex->SetCompState(TLevelIndexBase::StateCompPolicyAtWork); auto barriersSnap = HullDs->Barriers->GetIndexSnapshot(); auto levelSnap = RTCtx->LevelIndex->GetIndexSnapshot(); const double rateThreshold = Config->HullCompLevelRateThreshold; @@ -194,10 +194,10 @@ namespace NKikimr { auto selector = std::make_unique<TSelectorActor>(HullDs->HullCtx, params, std::move(levelSnap), std::move(barriersSnap), ctx.SelfID, std::move(CompactionTask)); auto aid = RunInBatchPool(ctx, selector.release()); - ActiveActors.Insert(aid); + ActiveActors.Insert(aid); return true; } - + void ScheduleCompactionWakeup(const TActorContext& ctx) { NextCompactionWakeup = ctx.Now() + Config->HullCompSchedulingInterval; if (!CompactionScheduled) { @@ -224,260 +224,260 @@ namespace NKikimr { } } - void RunLevelCompaction(const TActorContext &ctx, TVector<TOrderedLevelSegmentsPtr> &vec) { - RTCtx->LevelIndex->SetCompState(TLevelIndexBase::StateCompInProgress); - - // set up lsns + find out number of elements to merge - ui64 firstLsn = ui64(-1); - ui64 lastLsn = 0; + void RunLevelCompaction(const TActorContext &ctx, TVector<TOrderedLevelSegmentsPtr> &vec) { + RTCtx->LevelIndex->SetCompState(TLevelIndexBase::StateCompInProgress); + + // set up lsns + find out number of elements to merge + ui64 firstLsn = ui64(-1); + ui64 lastLsn = 0; ui64 mergeElementsApproximation = 0; - for (const auto &seg : vec) { - firstLsn = Min(firstLsn, seg->GetFirstLsn()); - lastLsn = Max(lastLsn, seg->GetLastLsn()); + for (const auto &seg : vec) { + firstLsn = Min(firstLsn, seg->GetFirstLsn()); + lastLsn = Max(lastLsn, seg->GetLastLsn()); mergeElementsApproximation += seg->Elements(); - } - + } + // prepare snapshots auto barriersSnap = HullDs->Barriers->GetIndexSnapshot(); auto levelSnap = RTCtx->LevelIndex->GetIndexSnapshot(); - // set up iterator - TLevelSliceForwardIterator it(HullDs->HullCtx, vec); - it.SeekToFirst(); - + // set up iterator + TLevelSliceForwardIterator it(HullDs->HullCtx, vec); + it.SeekToFirst(); + std::unique_ptr<TLevelCompaction> compaction(new TLevelCompaction( HullDs->HullCtx, RTCtx, nullptr, nullptr, std::move(barriersSnap), std::move(levelSnap), mergeElementsApproximation, it, firstLsn, lastLsn, TDuration::Minutes(2), {})); NActors::TActorId actorId = RunInBatchPool(ctx, compaction.release()); - ActiveActors.Insert(actorId); - } - - void Handle(typename TSelected::TPtr &ev, const TActorContext &ctx) { + ActiveActors.Insert(actorId); + } + + void Handle(typename TSelected::TPtr &ev, const TActorContext &ctx) { ActiveActors.Erase(ev->Sender); - Y_VERIFY(RTCtx->LevelIndex->GetCompState() == TLevelIndexBase::StateCompPolicyAtWork); - RTCtx->LevelIndex->SetCompState(TLevelIndexBase::StateNoComp); - - NHullComp::EAction action = ev->Get()->Action; + Y_VERIFY(RTCtx->LevelIndex->GetCompState() == TLevelIndexBase::StateCompPolicyAtWork); + RTCtx->LevelIndex->SetCompState(TLevelIndexBase::StateNoComp); + + NHullComp::EAction action = ev->Get()->Action; CompactionTask = std::move(ev->Get()->CompactionTask); - - if (action != NHullComp::ActNothing) { - // log out decision + + if (action != NHullComp::ActNothing) { + // log out decision LOG_INFO(ctx, NKikimrServices::BS_HULLCOMP, VDISKP(HullDs->HullCtx->VCtx, "%s: selected compaction %s", - PDiskSignatureForHullDbKey<TKey>().ToString().data(), - CompactionTask->ToString().data())); - } - + PDiskSignatureForHullDbKey<TKey>().ToString().data(), + CompactionTask->ToString().data())); + } + FullCompactionState.Compacted(ctx, CompactionTask->FullCompactionInfo); - switch (action) { - case NHullComp::ActNothing: { - // nothing to merge, try later + switch (action) { + case NHullComp::ActNothing: { + // nothing to merge, try later ScheduleCompactionWakeup(ctx); - // for now, update storage ratio as it may have changed - UpdateStorageRatio(RTCtx->LevelIndex->CurSlice); - break; - } - case NHullComp::ActDeleteSsts: { + // for now, update storage ratio as it may have changed + UpdateStorageRatio(RTCtx->LevelIndex->CurSlice); + break; + } + case NHullComp::ActDeleteSsts: { Y_VERIFY(CompactionTask->GetSstsToAdd().Empty() && !CompactionTask->GetSstsToDelete().Empty()); ApplyCompactionResult(ctx, {}, {}); - break; - } - case NHullComp::ActMoveSsts: { + break; + } + case NHullComp::ActMoveSsts: { Y_VERIFY(!CompactionTask->GetSstsToAdd().Empty() && !CompactionTask->GetSstsToDelete().Empty()); ApplyCompactionResult(ctx, {}, {}); - break; - } - case NHullComp::ActCompactSsts: { - // start compaction - LOG_INFO(ctx, NKikimrServices::BS_HULLCOMP, + break; + } + case NHullComp::ActCompactSsts: { + // start compaction + LOG_INFO(ctx, NKikimrServices::BS_HULLCOMP, VDISKP(HullDs->HullCtx->VCtx, "%s: level scheduled", - PDiskSignatureForHullDbKey<TKey>().ToString().data())); + PDiskSignatureForHullDbKey<TKey>().ToString().data())); RunLevelCompaction(ctx, CompactionTask->CompactSsts.CompactionChains); - break; - } - default: - Y_FAIL("Unexpected case"); - } - - RTCtx->LevelIndex->UpdateLevelStat(LevelStat); - } - - void CalculateStorageRatio(TLevelSlicePtr slice) { - NHullComp::TSstRatio total; - - TLevelSliceSnapshot<TKey, TMemRec> sliceSnap(slice, slice->Level0CurSstsNum()); - typename TLevelSliceSnapshot<TKey, TMemRec>::TSstIterator it(&sliceSnap); - it.SeekToFirst(); - while (it.Valid()) { - if (NHullComp::TSstRatioPtr ratio = it.Get().SstPtr->StorageRatio.Get()) { - total += *ratio; - } - it.Next(); - } - - slice->LastPublishedRatio = total; - } - - void UpdateStorageRatio(TLevelSlicePtr slice) { - NHullComp::TSstRatio prev(slice->LastPublishedRatio); - CalculateStorageRatio(slice); - HullDs->HullCtx->UpdateSpaceCounters(prev, slice->LastPublishedRatio); - } - - void CheckRemovedHugeBlobs(const TActorContext &ctx, - const TDiskPartVec &calcVec, - const TDiskPartVec &checkVec, - bool level) const { + break; + } + default: + Y_FAIL("Unexpected case"); + } + + RTCtx->LevelIndex->UpdateLevelStat(LevelStat); + } + + void CalculateStorageRatio(TLevelSlicePtr slice) { + NHullComp::TSstRatio total; + + TLevelSliceSnapshot<TKey, TMemRec> sliceSnap(slice, slice->Level0CurSstsNum()); + typename TLevelSliceSnapshot<TKey, TMemRec>::TSstIterator it(&sliceSnap); + it.SeekToFirst(); + while (it.Valid()) { + if (NHullComp::TSstRatioPtr ratio = it.Get().SstPtr->StorageRatio.Get()) { + total += *ratio; + } + it.Next(); + } + + slice->LastPublishedRatio = total; + } + + void UpdateStorageRatio(TLevelSlicePtr slice) { + NHullComp::TSstRatio prev(slice->LastPublishedRatio); + CalculateStorageRatio(slice); + HullDs->HullCtx->UpdateSpaceCounters(prev, slice->LastPublishedRatio); + } + + void CheckRemovedHugeBlobs(const TActorContext &ctx, + const TDiskPartVec &calcVec, + const TDiskPartVec &checkVec, + bool level) const { if (Config->CheckHugeBlobs) { - TVector<TDiskPart> v1 = calcVec.Vec; - TVector<TDiskPart> v2 = checkVec.Vec; - Sort(v1.begin(), v1.end()); - Sort(v2.begin(), v2.end()); - if (v1 != v2) { - LOG_CRIT(ctx, NKikimrServices::BS_HULLCOMP, + TVector<TDiskPart> v1 = calcVec.Vec; + TVector<TDiskPart> v2 = checkVec.Vec; + Sort(v1.begin(), v1.end()); + Sort(v2.begin(), v2.end()); + if (v1 != v2) { + LOG_CRIT(ctx, NKikimrServices::BS_HULLCOMP, VDISKP(HullDs->HullCtx->VCtx, "HUGE BLOBS REMOVAL INCONSISTENCY: ctask# %s level# %s" - " calcVec# %s checkVec# %s", CompactionTask->ToString().data(), - (level ? "true" : "false"), calcVec.ToString().data(), - checkVec.ToString().data())); - } - } - } - + " calcVec# %s checkVec# %s", CompactionTask->ToString().data(), + (level ? "true" : "false"), calcVec.ToString().data(), + checkVec.ToString().data())); + } + } + } + void ApplyCompactionResult( const TActorContext &ctx, TVector<ui32> chunksAdded, TVector<ui32> reservedChunksLeft) { - // create new slice - RTCtx->LevelIndex->SetCompState(TLevelIndexBase::StateWaitCommit); - - // apply TCompactionTask (i.e. create a new slice) + // create new slice + RTCtx->LevelIndex->SetCompState(TLevelIndexBase::StateWaitCommit); + + // apply TCompactionTask (i.e. create a new slice) bool checkHugeBlobs = Config->CheckHugeBlobs; - TLevelSlicePtr prevSlice = std::move(RTCtx->LevelIndex->CurSlice); + TLevelSlicePtr prevSlice = std::move(RTCtx->LevelIndex->CurSlice); typename THullOpUtil::TBuiltSlice cs = THullOpUtil::BuildSlice(HullDs->HullCtx->VCtx, ctx, RTCtx->LevelIndex->Settings, prevSlice.Get(), *CompactionTask, checkHugeBlobs); - RTCtx->LevelIndex->CurSlice = std::move(cs.NewSlice); - // check huge blobs - if (checkHugeBlobs) { + RTCtx->LevelIndex->CurSlice = std::move(cs.NewSlice); + // check huge blobs + if (checkHugeBlobs) { CheckRemovedHugeBlobs(ctx, CompactionTask->GetHugeBlobsToDelete(), cs.RemovedHugeBlobs, true); LogRemovedHugeBlobs(ctx, CompactionTask->GetHugeBlobsToDelete(), true); - } - - // this flag is set if there are other users of this slice - bool prevSliceActive = prevSlice.RefCount() != 1; - - // delete list, includes previous ChunksToDelete and reserved chunks - TVector<ui32> deleteChunks(std::move(prevSlice->ChunksToDelete)); - deleteChunks.insert(deleteChunks.end(), reservedChunksLeft.begin(), reservedChunksLeft.end()); - - // select the vector where to put freed chunks to; if we have an active snapshot, then we preserve chunks and - // put them to ChunksToDelete in order to remove them at next commit; otherwise we can delete them immediately - TVector<ui32>& freedChunksSink = prevSliceActive ? RTCtx->LevelIndex->CurSlice->ChunksToDelete : deleteChunks; - - // only delete chunks if we actually delete SST's from yard; otherwise it is move operation, we delete them from one - // level and put to another + } + + // this flag is set if there are other users of this slice + bool prevSliceActive = prevSlice.RefCount() != 1; + + // delete list, includes previous ChunksToDelete and reserved chunks + TVector<ui32> deleteChunks(std::move(prevSlice->ChunksToDelete)); + deleteChunks.insert(deleteChunks.end(), reservedChunksLeft.begin(), reservedChunksLeft.end()); + + // select the vector where to put freed chunks to; if we have an active snapshot, then we preserve chunks and + // put them to ChunksToDelete in order to remove them at next commit; otherwise we can delete them immediately + TVector<ui32>& freedChunksSink = prevSliceActive ? RTCtx->LevelIndex->CurSlice->ChunksToDelete : deleteChunks; + + // only delete chunks if we actually delete SST's from yard; otherwise it is move operation, we delete them from one + // level and put to another if (CompactionTask->CollectDeletedSsts()) { TLeveledSstsIterator delIt(&CompactionTask->GetSstsToDelete()); for (delIt.SeekToFirst(); delIt.Valid(); delIt.Next()) { - const TLevelSegment& seg = *delIt.Get().SstPtr; + const TLevelSegment& seg = *delIt.Get().SstPtr; seg.FillInChunkIds(freedChunksSink); if (seg.Info.IsCreatedByRepl()) { // mark it out-of-index to schedule deletion from the bulk formed segments table prevSlice->BulkFormedSegments.RemoveSstFromIndex(seg.GetEntryPoint()); - } - } - } - - // transfer and update storage ratio to the new slice - CalculateStorageRatio(RTCtx->LevelIndex->CurSlice); - HullDs->HullCtx->UpdateSpaceCounters(prevSlice->LastPublishedRatio, - RTCtx->LevelIndex->CurSlice->LastPublishedRatio); - - // apply compaction to bulk-formed SSTables; it produces a set of bulk-formed segments suitable for saving - // in new slice containing only needed entries + } + } + } + + // transfer and update storage ratio to the new slice + CalculateStorageRatio(RTCtx->LevelIndex->CurSlice); + HullDs->HullCtx->UpdateSpaceCounters(prevSlice->LastPublishedRatio, + RTCtx->LevelIndex->CurSlice->LastPublishedRatio); + + // apply compaction to bulk-formed SSTables; it produces a set of bulk-formed segments suitable for saving + // in new slice containing only needed entries prevSlice->BulkFormedSegments.ApplyCompactionResult(RTCtx->LevelIndex->CurSlice->BulkFormedSegments, freedChunksSink); - - // manage recovery log LSN to keep: - // we can't advance LsnToKeep until the prev snapshot dies, - // since we need to be able to read the rest of the log for remote recovery - RTCtx->LevelIndex->PrevEntryPointLsn = RTCtx->LevelIndex->CurEntryPointLsn; // keep everything for prev snapshot - - // run level committer + + // manage recovery log LSN to keep: + // we can't advance LsnToKeep until the prev snapshot dies, + // since we need to be able to read the rest of the log for remote recovery + RTCtx->LevelIndex->PrevEntryPointLsn = RTCtx->LevelIndex->CurEntryPointLsn; // keep everything for prev snapshot + + // run level committer TDiskPartVec removedHugeBlobs(CompactionTask->ExtractHugeBlobsToDelete()); auto committer = std::make_unique<TAsyncLevelCommitter>(HullLogCtx, HullDbCommitterCtx, RTCtx->LevelIndex, ctx.SelfID, std::move(chunksAdded), std::move(deleteChunks), std::move(removedHugeBlobs), prevSliceActive); TActorId committerID = ctx.RegisterWithSameMailbox(committer.release()); - ActiveActors.Insert(committerID); - - if (prevSliceActive) { - // notify LIActor when previous slice is not used anymore - prevSlice->SetUpCommitter(ctx.ExecutorThread.ActorSystem, committerID); - } - - // drop prev slice, some snapshot can still have a pointer to it - prevSlice.Drop(); - - // free used resources - CompactionTask->Clear(); - } - - void LogRemovedHugeBlobs(const TActorContext &ctx, const TDiskPartVec &vec, bool level) const { - for (const auto &x : vec) { - LOG_DEBUG(ctx, NKikimrServices::BS_HULLHUGE, + ActiveActors.Insert(committerID); + + if (prevSliceActive) { + // notify LIActor when previous slice is not used anymore + prevSlice->SetUpCommitter(ctx.ExecutorThread.ActorSystem, committerID); + } + + // drop prev slice, some snapshot can still have a pointer to it + prevSlice.Drop(); + + // free used resources + CompactionTask->Clear(); + } + + void LogRemovedHugeBlobs(const TActorContext &ctx, const TDiskPartVec &vec, bool level) const { + for (const auto &x : vec) { + LOG_DEBUG(ctx, NKikimrServices::BS_HULLHUGE, VDISKP(HullDs->HullCtx->VCtx, "%s: LogRemovedHugeBlobs: one slot: addr# %s level# %s", - PDiskSignatureForHullDbKey<TKey>().ToString().data(), - x.ToString().data(), (level ? "true" : "false"))); - } - } - - void Handle(typename THullChange::TPtr &ev, const TActorContext &ctx) { - ActiveActors.Erase(ev->Sender); - THullChange *msg = ev->Get(); - - // NOTE: when we run committer (Fresh or Level) we allocate Lsn and - // perform LevelIndex serialization in this handler to _guarantee_ order - // of log messages - - // handle commit msg differently - if (msg->FreshSegment) { - TStringStream dbg; - dbg << "{commiter# fresh" - << " firtsLsn# "<< msg->FreshSegment->GetFirstLsn() - << " lastLsn# " << msg->FreshSegment->GetLastLsn() - << "}"; - - // update compacted lsn - const ui64 lastLsnFromFresh = msg->FreshSegment->GetLastLsn(); - if (lastLsnFromFresh > 0) - RTCtx->LevelIndex->UpdateCompactedLsn(lastLsnFromFresh); - // check huge blobs + PDiskSignatureForHullDbKey<TKey>().ToString().data(), + x.ToString().data(), (level ? "true" : "false"))); + } + } + + void Handle(typename THullChange::TPtr &ev, const TActorContext &ctx) { + ActiveActors.Erase(ev->Sender); + THullChange *msg = ev->Get(); + + // NOTE: when we run committer (Fresh or Level) we allocate Lsn and + // perform LevelIndex serialization in this handler to _guarantee_ order + // of log messages + + // handle commit msg differently + if (msg->FreshSegment) { + TStringStream dbg; + dbg << "{commiter# fresh" + << " firtsLsn# "<< msg->FreshSegment->GetFirstLsn() + << " lastLsn# " << msg->FreshSegment->GetLastLsn() + << "}"; + + // update compacted lsn + const ui64 lastLsnFromFresh = msg->FreshSegment->GetLastLsn(); + if (lastLsnFromFresh > 0) + RTCtx->LevelIndex->UpdateCompactedLsn(lastLsnFromFresh); + // check huge blobs if (Config->CheckHugeBlobs) { TDiskPartVec checkVec = THullOpUtil::FindRemovedHugeBlobsAfterFreshCompaction( ctx, msg->FreshSegment, msg->SegVec); - CheckRemovedHugeBlobs(ctx, msg->FreedHugeBlobs, checkVec, false); - LogRemovedHugeBlobs(ctx, msg->FreedHugeBlobs, false); - } - // remove fresh segment - RTCtx->LevelIndex->FreshCompactionSstCreated(std::move(msg->FreshSegment)); - - // put new sstable into zero level - if (msg->SegVec.Get()) { - for (auto &seg : msg->SegVec->Segments) - RTCtx->LevelIndex->InsertSstAtLevel0(seg, HullDs->HullCtx); - } - - // run fresh committer + CheckRemovedHugeBlobs(ctx, msg->FreedHugeBlobs, checkVec, false); + LogRemovedHugeBlobs(ctx, msg->FreedHugeBlobs, false); + } + // remove fresh segment + RTCtx->LevelIndex->FreshCompactionSstCreated(std::move(msg->FreshSegment)); + + // put new sstable into zero level + if (msg->SegVec.Get()) { + for (auto &seg : msg->SegVec->Segments) + RTCtx->LevelIndex->InsertSstAtLevel0(seg, HullDs->HullCtx); + } + + // run fresh committer auto committer = std::make_unique<TAsyncFreshCommitter>(HullLogCtx, HullDbCommitterCtx, RTCtx->LevelIndex, ctx.SelfID, std::move(msg->CommitChunks), std::move(msg->ReservedChunks), std::move(msg->FreedHugeBlobs), dbg.Str()); auto aid = ctx.RegisterWithSameMailbox(committer.release()); - ActiveActors.Insert(aid); - } else { - Y_VERIFY(RTCtx->LevelIndex->GetCompState() == TLevelIndexBase::StateCompInProgress); - + ActiveActors.Insert(aid); + } else { + Y_VERIFY(RTCtx->LevelIndex->GetCompState() == TLevelIndexBase::StateCompInProgress); + CompactionTask->CompactSsts.CompactionFinished(std::move(msg->SegVec), std::move(msg->FreedHugeBlobs), msg->Aborted); - + if (msg->Aborted) { // if the compaction was aborted, ensure there was no index change Y_VERIFY(CompactionTask->GetSstsToAdd().Empty()); Y_VERIFY(CompactionTask->GetSstsToDelete().Empty()); @@ -489,11 +489,11 @@ namespace NKikimr { } ApplyCompactionResult(ctx, std::move(msg->CommitChunks), std::move(msg->ReservedChunks)); - } - - RTCtx->LevelIndex->UpdateLevelStat(LevelStat); - } - + } + + RTCtx->LevelIndex->UpdateLevelStat(LevelStat); + } + void Handle(typename TFreshAppendixCompactionDone::TPtr& ev, const TActorContext& ctx) { auto newJob = ev->Get()->Job.ApplyCompactionResult(); if (!newJob.Empty()) { @@ -501,31 +501,31 @@ namespace NKikimr { } } - void Handle(TEvAddBulkSst::TPtr& ev, const TActorContext& ctx) { - TEvAddBulkSst *msg = ev->Get(); - const auto oneAddition = msg->Essence.EnsureOnlyOneSst<TKey, TMemRec>(); - - // move level-0 SSTable segment into uncommitted set and spawn committer actor - Y_VERIFY(oneAddition.Sst->IsLoaded()); - RTCtx->LevelIndex->UncommittedReplSegments.push_back(oneAddition.Sst); - + void Handle(TEvAddBulkSst::TPtr& ev, const TActorContext& ctx) { + TEvAddBulkSst *msg = ev->Get(); + const auto oneAddition = msg->Essence.EnsureOnlyOneSst<TKey, TMemRec>(); + + // move level-0 SSTable segment into uncommitted set and spawn committer actor + Y_VERIFY(oneAddition.Sst->IsLoaded()); + RTCtx->LevelIndex->UncommittedReplSegments.push_back(oneAddition.Sst); + auto actor = std::make_unique<TAsyncReplSstCommitter>(HullLogCtx, HullDbCommitterCtx, RTCtx->LevelIndex, ctx.SelfID, std::move(msg->ChunksToCommit), std::move(msg->ReservedChunks), oneAddition.Sst, oneAddition.RecsNum, msg->NotifyId); auto aid = ctx.RegisterWithSameMailbox(actor.release()); - ActiveActors.Insert(aid); - } - - void Handle(THullCommitFinished::TPtr &ev, const TActorContext &ctx) { - ActiveActors.Erase(ev->Sender); - switch (ev->Get()->Type) { - case THullCommitFinished::CommitLevel: - Y_VERIFY_DEBUG(RTCtx->LevelIndex->GetCompState() == TLevelIndexBase::StateWaitCommit); - RTCtx->LevelIndex->SetCompState(TLevelIndexBase::StateNoComp); - RTCtx->LevelIndex->PrevEntryPointLsn = ui64(-1); - ScheduleCompaction(ctx); - break; - case THullCommitFinished::CommitFresh: + ActiveActors.Insert(aid); + } + + void Handle(THullCommitFinished::TPtr &ev, const TActorContext &ctx) { + ActiveActors.Erase(ev->Sender); + switch (ev->Get()->Type) { + case THullCommitFinished::CommitLevel: + Y_VERIFY_DEBUG(RTCtx->LevelIndex->GetCompState() == TLevelIndexBase::StateWaitCommit); + RTCtx->LevelIndex->SetCompState(TLevelIndexBase::StateNoComp); + RTCtx->LevelIndex->PrevEntryPointLsn = ui64(-1); + ScheduleCompaction(ctx); + break; + case THullCommitFinished::CommitFresh: ProcessFreshOnlyCompactQ(ctx); // to avoid deadlock with emerg queue if (FullCompactionState.Enabled()) { @@ -533,65 +533,65 @@ namespace NKikimr { } else { CompactFreshSegmentIfRequired<TKey, TMemRec>(HullDs, RTCtx, ctx); } - break; - case THullCommitFinished::CommitAdvanceLsn: - AdvanceCommitInProgress = false; - break; - case THullCommitFinished::CommitReplSst: - break; - default: - Y_FAIL("Unexpected case"); - } - - // notify skeleton about finished compaction - ctx.Send(RTCtx->SkeletonId, new TEvCompactionFinished()); - - // notify HullLogCutterNotifier - ctx.Send(RTCtx->GetLogNotifierActorId(), new TEvents::TEvCompleted()); - } - - void Handle(NPDisk::TEvCutLog::TPtr &ev, const TActorContext &ctx) { - const ui64 freeUpToLsn = ev->Get()->FreeUpToLsn; - RTCtx->SetFreeUpToLsn(freeUpToLsn); - // we check if we need to start fresh compaction, FreeUpToLsn influence our decision + break; + case THullCommitFinished::CommitAdvanceLsn: + AdvanceCommitInProgress = false; + break; + case THullCommitFinished::CommitReplSst: + break; + default: + Y_FAIL("Unexpected case"); + } + + // notify skeleton about finished compaction + ctx.Send(RTCtx->SkeletonId, new TEvCompactionFinished()); + + // notify HullLogCutterNotifier + ctx.Send(RTCtx->GetLogNotifierActorId(), new TEvents::TEvCompleted()); + } + + void Handle(NPDisk::TEvCutLog::TPtr &ev, const TActorContext &ctx) { + const ui64 freeUpToLsn = ev->Get()->FreeUpToLsn; + RTCtx->SetFreeUpToLsn(freeUpToLsn); + // we check if we need to start fresh compaction, FreeUpToLsn influence our decision const bool freshCompStarted = CompactFreshSegmentIfRequired<TKey, TMemRec>(HullDs, RTCtx, ctx); - // just for valid info output to the log - bool moveEntryPointStarted = false; - if (!freshCompStarted && !AdvanceCommitInProgress) { - // move entry point if required - const ui64 entryPoint = Min(RTCtx->LevelIndex->CurEntryPointLsn, RTCtx->LevelIndex->PrevEntryPointLsn); - if (entryPoint == ui64(-1) || freeUpToLsn > entryPoint) { - TStringStream dbg; - dbg << "{commiter# advance" - << " entryPoint# "<< entryPoint - << " freeUpToLsn# " << freeUpToLsn - << "}"; + // just for valid info output to the log + bool moveEntryPointStarted = false; + if (!freshCompStarted && !AdvanceCommitInProgress) { + // move entry point if required + const ui64 entryPoint = Min(RTCtx->LevelIndex->CurEntryPointLsn, RTCtx->LevelIndex->PrevEntryPointLsn); + if (entryPoint == ui64(-1) || freeUpToLsn > entryPoint) { + TStringStream dbg; + dbg << "{commiter# advance" + << " entryPoint# "<< entryPoint + << " freeUpToLsn# " << freeUpToLsn + << "}"; auto aid = ctx.RegisterWithSameMailbox(new TAsyncAdvanceLsnCommitter(HullLogCtx, HullDbCommitterCtx, - RTCtx->LevelIndex, ctx.SelfID, dbg.Str())); - ActiveActors.Insert(aid); - AdvanceCommitInProgress = true; - moveEntryPointStarted = true; - } - } - - // if we don't start compaction we notify HullLogCutterNotifier; we need it at least for - // process startup to initialize LogCutter; - // anyway we don't get NPDisk::TEvCutLog too often, once per log chunk written - bool justNotifyLogCutter = false; - if (!freshCompStarted && !AdvanceCommitInProgress && !moveEntryPointStarted) { - // notify HullLogCutterNotifier - justNotifyLogCutter = true; - ctx.Send(RTCtx->GetLogNotifierActorId(), new TEvents::TEvCompleted()); - } - - LOG_DEBUG(ctx, NKikimrServices::BS_LOGCUTTER, + RTCtx->LevelIndex, ctx.SelfID, dbg.Str())); + ActiveActors.Insert(aid); + AdvanceCommitInProgress = true; + moveEntryPointStarted = true; + } + } + + // if we don't start compaction we notify HullLogCutterNotifier; we need it at least for + // process startup to initialize LogCutter; + // anyway we don't get NPDisk::TEvCutLog too often, once per log chunk written + bool justNotifyLogCutter = false; + if (!freshCompStarted && !AdvanceCommitInProgress && !moveEntryPointStarted) { + // notify HullLogCutterNotifier + justNotifyLogCutter = true; + ctx.Send(RTCtx->GetLogNotifierActorId(), new TEvents::TEvCompleted()); + } + + LOG_DEBUG(ctx, NKikimrServices::BS_LOGCUTTER, VDISKP(HullDs->HullCtx->VCtx, "TLevelIndexActor::Handle(NPDisk::TEvCutLog): freshCompStarted# %d" - " moveEntryPointStarted# %d justNotifyLogCutter# %d freeUpToLsn# %" PRIu64 - " CurEntryPointLsn# %" PRIu64 " PrevEntryPointLsn# %" PRIu64, - int(freshCompStarted), int(moveEntryPointStarted), int(justNotifyLogCutter), - freeUpToLsn, RTCtx->LevelIndex->CurEntryPointLsn, RTCtx->LevelIndex->PrevEntryPointLsn)); - } - + " moveEntryPointStarted# %d justNotifyLogCutter# %d freeUpToLsn# %" PRIu64 + " CurEntryPointLsn# %" PRIu64 " PrevEntryPointLsn# %" PRIu64, + int(freshCompStarted), int(moveEntryPointStarted), int(justNotifyLogCutter), + freeUpToLsn, RTCtx->LevelIndex->CurEntryPointLsn, RTCtx->LevelIndex->PrevEntryPointLsn)); + } + std::deque<std::pair<ui64, TEvHullCompact::TPtr>> FreshOnlyCompactQ; void Handle(TEvHullCompact::TPtr &ev, const TActorContext &ctx) { @@ -629,29 +629,29 @@ namespace NKikimr { } } - void HandlePoison(const TEvents::TEvPoisonPill::TPtr &ev, const TActorContext &ctx) { - Y_UNUSED(ev); - ActiveActors.KillAndClear(ctx); - TThis::Die(ctx); - } - - STRICT_STFUNC(StateFunc, - HFunc(THullCommitFinished, Handle) - HFunc(NPDisk::TEvCutLog, Handle) + void HandlePoison(const TEvents::TEvPoisonPill::TPtr &ev, const TActorContext &ctx) { + Y_UNUSED(ev); + ActiveActors.KillAndClear(ctx); + TThis::Die(ctx); + } + + STRICT_STFUNC(StateFunc, + HFunc(THullCommitFinished, Handle) + HFunc(NPDisk::TEvCutLog, Handle) HFunc(TEvHullCompact, Handle) CFunc(TEvents::TSystem::Wakeup, HandleWakeup) - HTemplFunc(THullChange, Handle) + HTemplFunc(THullChange, Handle) HTemplFunc(TFreshAppendixCompactionDone, Handle) - HTemplFunc(TEvAddBulkSst, Handle) - HTemplFunc(TSelected, Handle) - HFunc(TEvents::TEvPoisonPill, HandlePoison) - ) - - public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::BS_ASYNC_LEVEL_INDEX; - } - + HTemplFunc(TEvAddBulkSst, Handle) + HTemplFunc(TSelected, Handle) + HFunc(TEvents::TEvPoisonPill, HandlePoison) + ) + + public: + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { + return NKikimrServices::TActivity::BS_ASYNC_LEVEL_INDEX; + } + TLevelIndexActor( TIntrusivePtr<TVDiskConfig> config, TIntrusivePtr<THullDs> hullDs, @@ -659,60 +659,60 @@ namespace NKikimr { TActorId loggerId, std::shared_ptr<TRunTimeCtx> rtCtx, std::shared_ptr<NSyncLog::TSyncLogFirstLsnToKeep> syncLogFirstLsnToKeep) - : TActorBootstrapped<TThis>() + : TActorBootstrapped<TThis>() , Config(std::move(config)) - , HullDs(std::move(hullDs)) + , HullDs(std::move(hullDs)) , HullLogCtx(std::move(hullLogCtx)) - , RTCtx(std::move(rtCtx)) + , RTCtx(std::move(rtCtx)) , SyncLogFirstLsnToKeep(std::move(syncLogFirstLsnToKeep)) - , Boundaries(new NHullComp::TBoundaries(RTCtx->PDiskCtx->Dsk->ChunkSize, + , Boundaries(new NHullComp::TBoundaries(RTCtx->PDiskCtx->Dsk->ChunkSize, Config->HullCompLevel0MaxSstsAtOnce, Config->HullCompSortedPartsNum, Config->Level0UseDreg)) - , HullDbCommitterCtx(new THullDbCommitterCtx(RTCtx->PDiskCtx, - HullDs->HullCtx, - RTCtx->LsnMngr, + , HullDbCommitterCtx(new THullDbCommitterCtx(RTCtx->PDiskCtx, + HullDs->HullCtx, + RTCtx->LsnMngr, loggerId, HullLogCtx->HugeKeeperId)) - , CompactionTask(new TCompactionTask) - , ActiveActors(RTCtx->LevelIndex->ActorCtx->ActiveActors) - , LevelStat(HullDs->HullCtx->VCtx->VDiskCounters) - {} - }; - - NActors::IActor* CreateLogoBlobsActor( + , CompactionTask(new TCompactionTask) + , ActiveActors(RTCtx->LevelIndex->ActorCtx->ActiveActors) + , LevelStat(HullDs->HullCtx->VCtx->VDiskCounters) + {} + }; + + NActors::IActor* CreateLogoBlobsActor( TIntrusivePtr<TVDiskConfig> config, - TIntrusivePtr<THullDs> hullDs, + TIntrusivePtr<THullDs> hullDs, std::shared_ptr<THullLogCtx> hullLogCtx, TActorId loggerId, std::shared_ptr<TLevelIndexRunTimeCtx<TKeyLogoBlob, TMemRecLogoBlob>> rtCtx, std::shared_ptr<NSyncLog::TSyncLogFirstLsnToKeep> syncLogFirstLsnToKeep) { - + return new TLevelIndexActor<TKeyLogoBlob, TMemRecLogoBlob>( config, hullDs, hullLogCtx, loggerId, rtCtx, syncLogFirstLsnToKeep); - } - - NActors::IActor* CreateBlocksActor( + } + + NActors::IActor* CreateBlocksActor( TIntrusivePtr<TVDiskConfig> config, - TIntrusivePtr<THullDs> hullDs, + TIntrusivePtr<THullDs> hullDs, std::shared_ptr<THullLogCtx> hullLogCtx, TActorId loggerId, std::shared_ptr<TLevelIndexRunTimeCtx<TKeyBlock, TMemRecBlock>> rtCtx, std::shared_ptr<NSyncLog::TSyncLogFirstLsnToKeep> syncLogFirstLsnToKeep) { - + return new TLevelIndexActor<TKeyBlock, TMemRecBlock>( config, hullDs, hullLogCtx, loggerId, rtCtx, syncLogFirstLsnToKeep); - } - - NActors::IActor* CreateBarriersActor( + } + + NActors::IActor* CreateBarriersActor( TIntrusivePtr<TVDiskConfig> config, - TIntrusivePtr<THullDs> hullDs, + TIntrusivePtr<THullDs> hullDs, std::shared_ptr<THullLogCtx> hullLogCtx, TActorId loggerId, std::shared_ptr<TLevelIndexRunTimeCtx<TKeyBarrier, TMemRecBarrier>> rtCtx, std::shared_ptr<NSyncLog::TSyncLogFirstLsnToKeep> syncLogFirstLsnToKeep) { - + return new TLevelIndexActor<TKeyBarrier, TMemRecBarrier>( config, hullDs, hullLogCtx, loggerId, rtCtx, syncLogFirstLsnToKeep); - } -} + } +} diff --git a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullactor.h b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullactor.h index c2f2de8be1..5749a70ecd 100644 --- a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullactor.h +++ b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullactor.h @@ -103,25 +103,25 @@ namespace NKikimr { //////////////////////////////////////////////////////////////////////////// // Run an actor for every database //////////////////////////////////////////////////////////////////////////// - NActors::IActor* CreateLogoBlobsActor( + NActors::IActor* CreateLogoBlobsActor( TIntrusivePtr<TVDiskConfig> config, - TIntrusivePtr<THullDs> hullDs, + TIntrusivePtr<THullDs> hullDs, std::shared_ptr<THullLogCtx> hullLogCtx, TActorId loggerId, std::shared_ptr<TLevelIndexRunTimeCtx<TKeyLogoBlob, TMemRecLogoBlob>> rtCtx, std::shared_ptr<NSyncLog::TSyncLogFirstLsnToKeep> syncLogFirstLsnToKeep); - NActors::IActor* CreateBlocksActor( + NActors::IActor* CreateBlocksActor( TIntrusivePtr<TVDiskConfig> config, - TIntrusivePtr<THullDs> hullDs, + TIntrusivePtr<THullDs> hullDs, std::shared_ptr<THullLogCtx> hullLogCtx, TActorId loggerId, std::shared_ptr<TLevelIndexRunTimeCtx<TKeyBlock, TMemRecBlock>> rtCtx, std::shared_ptr<NSyncLog::TSyncLogFirstLsnToKeep> syncLogFirstLsnToKeep); - NActors::IActor* CreateBarriersActor( + NActors::IActor* CreateBarriersActor( TIntrusivePtr<TVDiskConfig> config, - TIntrusivePtr<THullDs> hullDs, + TIntrusivePtr<THullDs> hullDs, std::shared_ptr<THullLogCtx> hullLogCtx, TActorId loggerId, std::shared_ptr<TLevelIndexRunTimeCtx<TKeyBarrier, TMemRecBarrier>> rtCtx, diff --git a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullcommit.h b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullcommit.h index 1221693c5e..8b094e893b 100644 --- a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullcommit.h +++ b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullcommit.h @@ -107,7 +107,7 @@ namespace NKikimr { LOG_INFO(ctx, NKikimrServices::BS_SKELETON, VDISKP(HullLogCtx->VCtx->VDiskLogPrefix, "commit %s signature# %s CommitChunks# %s" " DeleteChunks# %s", THullCommitFinished::TypeToString(NotifyType), - PDiskSignatureForHullDbKey<TKey>().ToString().data(), + PDiskSignatureForHullDbKey<TKey>().ToString().data(), FormatList(CommitRecord.CommitChunks).data(), FormatList(CommitRecord.DeleteChunks).data())); } diff --git a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullcompact.h b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullcompact.h index 6ab4d8a66e..fca2f00daa 100644 --- a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullcompact.h +++ b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hullcompact.h @@ -96,7 +96,7 @@ namespace NKikimr { LOG_INFO(ctx, NKikimrServices::BS_HULLCOMP, VDISKP(HullCtx->VCtx->VDiskLogPrefix, "%s: Compaction job (%" PRIu64 ") started: fresh# %s freedHugeBlobs# %s", - PDiskSignatureForHullDbKey<TKey>().ToString().data(), CompactionID, + PDiskSignatureForHullDbKey<TKey>().ToString().data(), CompactionID, (FreshSegment ? "true" : "false"), Worker.GetFreedHugeBlobs().ToString().data())); // bool debug output of brs @@ -277,7 +277,7 @@ namespace NKikimr { LOG_LOG(ctx, IsAborting ? NLog::PRI_ERROR : NLog::PRI_INFO, NKikimrServices::BS_HULLCOMP, VDISKP(HullCtx->VCtx->VDiskLogPrefix, "%s: Compaction job (%" PRIu64 ") finished (freedHugeBlobs): fresh# %s freedHugeBlobs# %s", - PDiskSignatureForHullDbKey<TKey>().ToString().data(), CompactionID, + PDiskSignatureForHullDbKey<TKey>().ToString().data(), CompactionID, (FreshSegment ? "true" : "false"), Worker.GetFreedHugeBlobs().ToString().data())); msg->FreedHugeBlobs = IsAborting ? TDiskPartVec() : Worker.GetFreedHugeBlobs(); @@ -295,7 +295,7 @@ namespace NKikimr { VDISKP(HullCtx->VCtx->VDiskLogPrefix, "%s: Compaction job (%" PRIu64 ") finished: fresh# %s chunks# %" PRIu32 " stat# %s " "gcmpStat# %s IsAborting# %s", - PDiskSignatureForHullDbKey<TKey>().ToString().data(), + PDiskSignatureForHullDbKey<TKey>().ToString().data(), CompactionID, (FreshSegment ? "true" : "false"), ui32(msg->CommitChunks.size()), Worker.Statistics.ToString().data(), Gcmp->GetStat().ToString().data(), IsAborting ? "true" : "false")); diff --git a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hulllog.cpp b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hulllog.cpp index ecf5d9f699..bbdf962ce5 100644 --- a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hulllog.cpp +++ b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hulllog.cpp @@ -43,7 +43,7 @@ namespace NKikimr { }; std::unique_ptr<NPDisk::TEvLog> CreateHullUpdate(const std::shared_ptr<THullLogCtx> &hullLogCtx, - TLogSignature signature, + TLogSignature signature, const TString &data, TLsnSeg seg, void *cookie, @@ -65,7 +65,7 @@ namespace NKikimr { std::unique_ptr<NPDisk::TEvLog> CreateHullUpdate(const std::shared_ptr<THullLogCtx> &hullLogCtx, - TLogSignature signature, + TLogSignature signature, const NPDisk::TCommitRecord &commitRecord, const TString &data, TLsnSeg seg, diff --git a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hulllog.h b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hulllog.h index f20a3faf7d..6e6416b830 100644 --- a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hulllog.h +++ b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hulllog.h @@ -13,7 +13,7 @@ namespace NKikimr { // to recovery log ///////////////////////////////////////////////////////////////////////////////////////// std::unique_ptr<NPDisk::TEvLog> CreateHullUpdate(const std::shared_ptr<THullLogCtx> &hullLogCtx, - TLogSignature signature, + TLogSignature signature, const TString &data, TLsnSeg seg, void *cookie, @@ -21,7 +21,7 @@ namespace NKikimr { std::unique_ptr<TEvHullHugeBlobLogged> hugeKeeperNotice); std::unique_ptr<NPDisk::TEvLog> CreateHullUpdate(const std::shared_ptr<THullLogCtx> &hullLogCtx, - TLogSignature signature, + TLogSignature signature, const NPDisk::TCommitRecord &commitRecord, const TString &data, TLsnSeg seg, diff --git a/ydb/core/blobstorage/vdisk/hullop/blobstorage_readbatch_ut.cpp b/ydb/core/blobstorage/vdisk/hullop/blobstorage_readbatch_ut.cpp index b035813ad4..2ec3c228f0 100644 --- a/ydb/core/blobstorage/vdisk/hullop/blobstorage_readbatch_ut.cpp +++ b/ydb/core/blobstorage/vdisk/hullop/blobstorage_readbatch_ut.cpp @@ -79,7 +79,7 @@ Y_UNIT_TEST_SUITE(ReadBatcher) { ui32 index = rng() % pendingReads.size(); std::unique_ptr<NPDisk::TEvChunkRead> msg = std::move(pendingReads[index]); pendingReads.erase(pendingReads.begin() + index); - NPDisk::TEvChunkReadResult result(NKikimrProto::OK, msg->ChunkIdx, msg->Offset, msg->Cookie, 0, ""); + NPDisk::TEvChunkReadResult result(NKikimrProto::OK, msg->ChunkIdx, msg->Offset, msg->Cookie, 0, ""); UNIT_ASSERT(msg->Offset + msg->Size <= chunkSize); result.Data.SetData(chunks.at(msg->ChunkIdx).substr(msg->Offset, msg->Size)); batcher.Apply(&result); diff --git a/ydb/core/blobstorage/vdisk/hullop/hullop_compactfreshappendix.cpp b/ydb/core/blobstorage/vdisk/hullop/hullop_compactfreshappendix.cpp index 40905cdf49..e6e2c89573 100644 --- a/ydb/core/blobstorage/vdisk/hullop/hullop_compactfreshappendix.cpp +++ b/ydb/core/blobstorage/vdisk/hullop/hullop_compactfreshappendix.cpp @@ -29,7 +29,7 @@ namespace NKikimr { auto endTime = TAppData::TimeProvider->Now(); LOG_INFO_S(ctx, NKikimrServices::BS_HULLCOMP, VCtx->VDiskLogPrefix - << PDiskSignatureForHullDbKey<TKey>().ToString().data() + << PDiskSignatureForHullDbKey<TKey>().ToString().data() << ": FreshAppendix Compaction Job finished: duration# " << (endTime - startTime)); ctx.Send(Recipient, new TFreshAppendixCompactionDone(std::move(Job))); diff --git a/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_defs.cpp b/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_defs.cpp index 6d09d5b45f..f75654db66 100644 --- a/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_defs.cpp +++ b/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_defs.cpp @@ -167,7 +167,7 @@ namespace NKikimr { OutputCounters(str, " ", "", ""); str << " StartingPoints# {"; for (const auto &x : StartingPoints) { - str << "[" << x.first.ToString() + str << "[" << x.first.ToString() << " " << x.second << "]"; } str << "}"; @@ -217,7 +217,7 @@ namespace NKikimr { RecoveryLogLastLsn = r.Lsn; } - void TLocalRecoveryInfo::SetStartingPoint(TLogSignature signature, ui64 lsn) { + void TLocalRecoveryInfo::SetStartingPoint(TLogSignature signature, ui64 lsn) { bool success = StartingPoints.insert(TSignatureToLsn::value_type(signature, lsn)).second; Y_VERIFY(success); } diff --git a/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_defs.h b/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_defs.h index eaf0944126..7414907347 100644 --- a/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_defs.h +++ b/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_defs.h @@ -103,7 +103,7 @@ namespace NKikimr { // lsn we starting with after local recovery and lsn shift ui64 RecoveredLogStartLsn = 0; // found starting points - using TSignatureToLsn = TMap<TLogSignature, ui64>; + using TSignatureToLsn = TMap<TLogSignature, ui64>; TSignatureToLsn StartingPoints; TInstant LocalRecoveryStartTime; @@ -268,7 +268,7 @@ namespace NKikimr { void Output(IOutputStream &str) const; void OutputHtml(IOutputStream &str) const; TString ToString() const; - void SetStartingPoint(TLogSignature signature, ui64 lsn); + void SetStartingPoint(TLogSignature signature, ui64 lsn); void HandleReadLogResult(const NPDisk::TEvReadLogResult::TResults &results); void SetRecoveredLogStartLsn(ui64 lsn); void CheckConsistency(); diff --git a/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_logreplay.cpp b/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_logreplay.cpp index 89a2244ac8..b9f9460b40 100644 --- a/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_logreplay.cpp +++ b/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_logreplay.cpp @@ -807,72 +807,72 @@ namespace NKikimr { RecoveredLsn = record.Lsn; switch (record.Signature) { - case TLogSignature::SignatureLogoBlob: + case TLogSignature::SignatureLogoBlob: LocRecCtx->RecovInfo->DispatchSignatureLogoBlob(record); return HandleLogoBlob(ctx, record); - case TLogSignature::SignatureBlock: + case TLogSignature::SignatureBlock: LocRecCtx->RecovInfo->DispatchSignatureBlock(record); return HandleBlock(ctx, record); - case TLogSignature::SignatureGC: + case TLogSignature::SignatureGC: LocRecCtx->RecovInfo->DispatchSignatureGC(record); return HandleGC(ctx, record); - case TLogSignature::SignatureSyncLogIdx: + case TLogSignature::SignatureSyncLogIdx: LocRecCtx->RecovInfo->DispatchSignatureSyncLogIdx(record); return EDispatchStatus::Success; // entry point, already handled - case TLogSignature::SignatureHullLogoBlobsDB: + case TLogSignature::SignatureHullLogoBlobsDB: LocRecCtx->RecovInfo->DispatchSignatureHullLogoBlobsDB(record); return HandleHugeSlotsDelLogoBlobsDB(ctx, record); - case TLogSignature::SignatureHullBlocksDB: + case TLogSignature::SignatureHullBlocksDB: LocRecCtx->RecovInfo->DispatchSignatureHullBlocksDB(record); // entry point already handled, take care of huge slots return HandleHugeSlotsDelBlocksDB(ctx, record); - case TLogSignature::SignatureHullBarriersDB: + case TLogSignature::SignatureHullBarriersDB: LocRecCtx->RecovInfo->DispatchSignatureHullBarriersDB(record); // entry point already handled, take care of huge slots return HandleHugeSlotsDelBarriersDB(ctx, record); - case TLogSignature::SignatureHullCutLog: + case TLogSignature::SignatureHullCutLog: LocRecCtx->RecovInfo->DispatchSignatureHullCutLog(record); return EDispatchStatus::Success; - case TLogSignature::SignatureLocalSyncData: + case TLogSignature::SignatureLocalSyncData: LocRecCtx->RecovInfo->DispatchSignatureLocalSyncData(record); return HandleSyncData(ctx, record); - case TLogSignature::SignatureSyncerState: + case TLogSignature::SignatureSyncerState: LocRecCtx->RecovInfo->DispatchSignatureSyncerState(record); return EDispatchStatus::Success; // entry point, already handled - case TLogSignature::SignatureHandoffDelLogoBlob: + case TLogSignature::SignatureHandoffDelLogoBlob: LocRecCtx->RecovInfo->DispatchSignatureHandoffDelLogoBlob(record); return HandleHandoffDel(ctx, record); - case TLogSignature::SignatureHugeBlobAllocChunk: + case TLogSignature::SignatureHugeBlobAllocChunk: LocRecCtx->RecovInfo->DispatchSignatureHugeBlobAllocChunk(record); return HandleHugeBlobAllocChunk(ctx, record); - case TLogSignature::SignatureHugeBlobFreeChunk: + case TLogSignature::SignatureHugeBlobFreeChunk: LocRecCtx->RecovInfo->DispatchSignatureHugeBlobFreeChunk(record); return HandleHugeBlobFreeChunk(ctx, record); - case TLogSignature::SignatureHugeBlobEntryPoint: + case TLogSignature::SignatureHugeBlobEntryPoint: LocRecCtx->RecovInfo->DispatchSignatureHugeBlobEntryPoint(record); return HandleHugeBlobEntryPoint(ctx, record); - case TLogSignature::SignatureHugeLogoBlob: + case TLogSignature::SignatureHugeLogoBlob: LocRecCtx->RecovInfo->DispatchSignatureHugeLogoBlob(record); return HandleHugeLogoBlob(ctx, record); - case TLogSignature::SignatureLogoBlobOpt: + case TLogSignature::SignatureLogoBlobOpt: LocRecCtx->RecovInfo->DispatchSignatureLogoBlobOpt(record); return HandleOptLogoBlob(ctx, record); - case TLogSignature::SignaturePhantomBlobs: + case TLogSignature::SignaturePhantomBlobs: LocRecCtx->RecovInfo->DispatchSignaturePhantomBlobs(record); return HandlePhantomLogoBlobs(ctx, record); - case TLogSignature::SignatureAnubisOsirisPut: + case TLogSignature::SignatureAnubisOsirisPut: LocRecCtx->RecovInfo->DispatchSignatureAnubisOsirisPut(record); return HandleAnubisOsirisPut(ctx, record); - case TLogSignature::SignatureAddBulkSst: + case TLogSignature::SignatureAddBulkSst: LocRecCtx->RecovInfo->DispatchSignatureAddBulkSst(record); return HandleAddBulkSst(ctx, record); case TLogSignature::SignatureScrub: LocRecCtx->RecovInfo->DispatchSignatureScrub(record); return HandleScrub(ctx, record); - case TLogSignature::Max: + case TLogSignature::Max: break; } - Y_FAIL_S("Unexpected case: " << record.Signature.ToString()); + Y_FAIL_S("Unexpected case: " << record.Signature.ToString()); } void VerifyOwnedChunks(const TActorContext& ctx) { diff --git a/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_public.cpp b/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_public.cpp index 4608d17d87..b9bdafdbb1 100644 --- a/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_public.cpp +++ b/ydb/core/blobstorage/vdisk/localrecovery/localrecovery_public.cpp @@ -60,7 +60,7 @@ namespace NKikimr { //////////////////////////////////////////////////////////////////////////// class TDatabaseLocalRecovery : public TActorBootstrapped<TDatabaseLocalRecovery> { friend class TActorBootstrapped<TDatabaseLocalRecovery>; - using TStartingPoints = TMap<TLogSignature, NPDisk::TLogRecord>; + using TStartingPoints = TMap<TLogSignature, NPDisk::TLogRecord>; using THullSegLoadedLogoBlob = THullSegLoaded<TLogoBlobsSst>; TIntrusivePtr<TVDiskConfig> Config; @@ -257,7 +257,7 @@ namespace NKikimr { NKikimrVDiskData::THullDbEntryPoint pb; const bool good = THullDbSignatureRoutines::Parse(pb, data, explanation); if (!good) { - TString dbtype = TLogSignature(signature).ToString(); + TString dbtype = TLogSignature(signature).ToString(); explanation = "Entry point for Hull (" + dbtype + ") check failed: " + explanation; SignalErrorAndDie(ctx, NKikimrProto::ERROR, explanation); return false; @@ -274,7 +274,7 @@ namespace NKikimr { bool InitLogoBlobsMetabase(const TStartingPoints &startingPoints, const TActorContext &ctx) { using TLoader = TLevelIndexLoader<TKeyLogoBlob, TMemRecLogoBlob, EHullDbType::LogoBlobs>; - const int signature = TLogSignature::SignatureHullLogoBlobsDB; + const int signature = TLogSignature::SignatureHullLogoBlobsDB; return InitMetabase<TLogoBlobsDs, TLoader, signature>( startingPoints, LocRecCtx->HullDbRecovery->GetHullDs()->LogoBlobs, @@ -288,7 +288,7 @@ namespace NKikimr { bool InitBlocksMetabase(const TStartingPoints &startingPoints, const TActorContext &ctx) { using TLoader = TLevelIndexLoader<TKeyBlock, TMemRecBlock, EHullDbType::Blocks>; - const int signature = TLogSignature::SignatureHullBlocksDB; + const int signature = TLogSignature::SignatureHullBlocksDB; return InitMetabase<TBlocksDs, TLoader, signature>( startingPoints, LocRecCtx->HullDbRecovery->GetHullDs()->Blocks, @@ -302,7 +302,7 @@ namespace NKikimr { bool InitBarriersMetabase(const TStartingPoints &startingPoints, const TActorContext &ctx) { using TLoader = TLevelIndexLoader<TKeyBarrier, TMemRecBarrier, EHullDbType::Barriers>; - const int signature = TLogSignature::SignatureHullBarriersDB; + const int signature = TLogSignature::SignatureHullBarriersDB; return InitMetabase<TBarriersDs, TLoader, signature>( startingPoints, LocRecCtx->HullDbRecovery->GetHullDs()->Barriers, @@ -316,7 +316,7 @@ namespace NKikimr { bool InitSyncLogData(const TStartingPoints &startingPoints, const TActorContext &ctx) { TStartingPoints::const_iterator it; - it = startingPoints.find(TLogSignature::SignatureSyncLogIdx); + it = startingPoints.find(TLogSignature::SignatureSyncLogIdx); TString entryPoint; ui64 entryPointLsn = 0; @@ -360,7 +360,7 @@ namespace NKikimr { bool InitSyncer(const TStartingPoints &startingPoints, const TActorContext &ctx) { TStartingPoints::const_iterator it; - it = startingPoints.find(TLogSignature::SignatureSyncerState); + it = startingPoints.find(TLogSignature::SignatureSyncerState); if (it == startingPoints.end()) { // create an empty DB LocRecCtx->RecovInfo->EmptySyncer = true; @@ -404,7 +404,7 @@ namespace NKikimr { LOG_DEBUG(ctx, BS_HULLHUGE, msg); }; TStartingPoints::const_iterator it; - it = startingPoints.find(TLogSignature::SignatureHugeBlobEntryPoint); + it = startingPoints.find(TLogSignature::SignatureHugeBlobEntryPoint); if (it == startingPoints.end()) { LocRecCtx->RecovInfo->EmptyHuge = true; @@ -508,12 +508,12 @@ namespace NKikimr { ui32(x.first), x.second.ToString().data())); LocRecCtx->RecovInfo->SetStartingPoint(x.first, x.second.Lsn); switch (x.first) { - case TLogSignature::SignatureSyncLogIdx: - case TLogSignature::SignatureHullLogoBlobsDB: - case TLogSignature::SignatureHullBlocksDB: - case TLogSignature::SignatureHullBarriersDB: - case TLogSignature::SignatureSyncerState: - case TLogSignature::SignatureHugeBlobEntryPoint: + case TLogSignature::SignatureSyncLogIdx: + case TLogSignature::SignatureHullLogoBlobsDB: + case TLogSignature::SignatureHullBlocksDB: + case TLogSignature::SignatureHullBarriersDB: + case TLogSignature::SignatureSyncerState: + case TLogSignature::SignatureHugeBlobEntryPoint: case TLogSignature::SignatureScrub: break; diff --git a/ydb/core/blobstorage/vdisk/repl/blobstorage_hullreplwritesst_ut.cpp b/ydb/core/blobstorage/vdisk/repl/blobstorage_hullreplwritesst_ut.cpp index e1534cdea0..cacb140593 100644 --- a/ydb/core/blobstorage/vdisk/repl/blobstorage_hullreplwritesst_ut.cpp +++ b/ydb/core/blobstorage/vdisk/repl/blobstorage_hullreplwritesst_ut.cpp @@ -13,7 +13,7 @@ std::shared_ptr<TReplCtx> CreateReplCtx(TVector<TVDiskID>& vdisks, const TIntrus auto vdiskCfg = MakeIntrusive<TVDiskConfig>(baseInfo); auto counters = MakeIntrusive<NMonitoring::TDynamicCounters>(); auto vctx = MakeIntrusive<TVDiskContext>(TActorId(), info->PickTopology(), counters, TVDiskID(0, 1, 0, 0, 0), - nullptr, TPDiskCategory::DEVICE_TYPE_UNKNOWN); + nullptr, TPDiskCategory::DEVICE_TYPE_UNKNOWN); auto hugeBlobCtx = std::make_shared<THugeBlobCtx>(512u << 10u, nullptr); auto dsk = MakeIntrusive<TPDiskParams>(ui8(1), 1u, 128u << 20, 4096u, 0u, 1000000000u, 1000000000u, 65536u, 65536u, 65536u); auto pdiskCtx = std::make_shared<TPDiskCtx>(dsk, TActorId()); @@ -31,7 +31,7 @@ std::shared_ptr<TReplCtx> CreateReplCtx(TVector<TVDiskID>& vdisks, const TIntrus TVDiskContextPtr CreateVDiskContext(const TBlobStorageGroupInfo& info) { return MakeIntrusive<TVDiskContext>(TActorId(), info.PickTopology(), new NMonitoring::TDynamicCounters(), TVDiskID(), - nullptr, TPDiskCategory::DEVICE_TYPE_UNKNOWN); + nullptr, TPDiskCategory::DEVICE_TYPE_UNKNOWN); } TIntrusivePtr<THullCtx> CreateHullCtx(const TBlobStorageGroupInfo& info, ui32 chunkSize, ui32 compWorthReadSize) { diff --git a/ydb/core/blobstorage/vdisk/repl/blobstorage_replrecoverymachine_ut.cpp b/ydb/core/blobstorage/vdisk/repl/blobstorage_replrecoverymachine_ut.cpp index 96fab8dafc..3ff7efb53e 100644 --- a/ydb/core/blobstorage/vdisk/repl/blobstorage_replrecoverymachine_ut.cpp +++ b/ydb/core/blobstorage/vdisk/repl/blobstorage_replrecoverymachine_ut.cpp @@ -65,7 +65,7 @@ namespace NKikimr { auto vdiskCfg = MakeIntrusive<TVDiskConfig>(baseInfo); auto counters = MakeIntrusive<NMonitoring::TDynamicCounters>(); auto vctx = MakeIntrusive<TVDiskContext>(TActorId(), info->PickTopology(), counters, TVDiskID(0, 1, 0, 0, 0), - nullptr, TPDiskCategory::DEVICE_TYPE_UNKNOWN); + nullptr, TPDiskCategory::DEVICE_TYPE_UNKNOWN); auto hugeBlobCtx = std::make_shared<THugeBlobCtx>(512u << 10u, nullptr); auto replCtx = std::make_shared<TReplCtx>( vctx, diff --git a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp index 1bdbd3dcad..96b813d317 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp @@ -341,7 +341,7 @@ namespace NKikimr { CreateLoggedRec(seg, confirmSyncLogAlso, id, ingress, std::move(buffer), std::move(result), sender, cookie)); void *loggedRecCookie = reinterpret_cast<void *>(loggedRecId); // create log msg - auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureLogoBlobOpt, dataToWrite, + auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureLogoBlobOpt, dataToWrite, seg, loggedRecCookie, std::move(syncLogMsg), nullptr); // send prepared message to recovery log logMsg->Orbit = std::move(orbit); @@ -755,7 +755,7 @@ namespace NKikimr { Db->HugeKeeperID, ev)); void *loggedRecCookie = reinterpret_cast<void *>(loggedRecId); // create log msg - auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureHugeLogoBlob, dataToWrite, seg, + auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureHugeLogoBlob, dataToWrite, seg, loggedRecCookie, std::move(syncLogMsg), nullptr); // send prepared message to recovery log ctx.Send(Db->LoggerID, logMsg.release()); @@ -787,7 +787,7 @@ namespace NKikimr { new TLoggedRecDelLogoBlobDataSyncLog(seg, confirmSyncLogAlso, std::move(result), ev->Sender, ev->Cookie)); void *loggedRecCookie = reinterpret_cast<void *>(loggedRecId); // create log msg - auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureHandoffDelLogoBlob, + auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureHandoffDelLogoBlob, serializedLogRecord, seg, loggedRecCookie, std::move(syncLogMsg), nullptr); // send prepared message to recovery log ctx.Send(Db->LoggerID, logMsg.release()); @@ -943,7 +943,7 @@ namespace NKikimr { issuerGuid, std::move(result), ev->Sender, ev->Cookie)); void *loggedRecCookie = reinterpret_cast<void *>(loggedRecId); // create log msg - auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureBlock, + auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureBlock, ev->GetChainBuffer()->GetString(), seg, loggedRecCookie, std::move(syncLogMsg), nullptr); // send prepared message to recovery log ctx.Send(Db->LoggerID, logMsg.release()); @@ -1044,7 +1044,7 @@ namespace NKikimr { intptr_t loggedRecId = LoggedRecsVault.Put(new TLoggedRecVCollectGarbage(seg, true, ingress, std::move(result), ev)); void *loggedRecCookie = reinterpret_cast<void *>(loggedRecId); // create log msg - auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureGC, data, seg, loggedRecCookie, + auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureGC, data, seg, loggedRecCookie, std::move(syncLogMsg), nullptr); // send prepared message to recovery log ctx.Send(Db->LoggerID, logMsg.release()); @@ -1362,7 +1362,7 @@ namespace NKikimr { intptr_t loggedRecId = LoggedRecsVault.Put(new TLoggedRecLocalSyncData(seg, false, std::move(result), ev)); void *loggedRecCookie = reinterpret_cast<void *>(loggedRecId); // create log msg - auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureLocalSyncData, data, seg, + auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureLocalSyncData, data, seg, loggedRecCookie, nullptr, nullptr); // send prepared message to recovery log ctx.Send(Db->LoggerID, logMsg.release()); @@ -1433,7 +1433,7 @@ namespace NKikimr { intptr_t loggedRecId = LoggedRecsVault.Put(new TLoggedRecAnubisOsirisPut(seg, true, insert, std::move(result), ev)); void *loggedRecCookie = reinterpret_cast<void *>(loggedRecId); // create log msg - auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureAnubisOsirisPut, data, seg, + auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignatureAnubisOsirisPut, data, seg, loggedRecCookie, std::move(syncLogMsg), nullptr); // send prepared message to recovery log ctx.Send(Db->LoggerID, logMsg.release()); @@ -1503,9 +1503,9 @@ namespace NKikimr { << " Marker# BSVS26"); TRope buf = std::move(msg->Data); - const ui64 bufSize = buf.GetSize(); + const ui64 bufSize = buf.GetSize(); Y_VERIFY(bufSize <= Config->MaxLogoBlobDataSize && HugeBlobCtx->IsHugeBlob(VCtx->Top->GType, id.FullID()), - "TEvRecoveredHugeBlob: blob is too small/huge bufSize# %zu", bufSize); + "TEvRecoveredHugeBlob: blob is too small/huge bufSize# %zu", bufSize); UpdatePDiskWriteBytes(bufSize); auto oosStatus = VCtx->GetOutOfSpaceState().GetGlobalStatusFlags(); @@ -1549,7 +1549,7 @@ namespace NKikimr { intptr_t loggedRecId = LoggedRecsVault.Put(new TLoggedRecPhantoms(seg, true, ev)); void *loggedRecCookie = reinterpret_cast<void *>(loggedRecId); // create log msg - auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignaturePhantomBlobs, data, seg, + auto logMsg = CreateHullUpdate(HullLogCtx, TLogSignature::SignaturePhantomBlobs, data, seg, loggedRecCookie, std::move(syncLogMsg), nullptr); // send prepared message to recovery log ctx.Send(Db->LoggerID, logMsg.release()); @@ -1582,7 +1582,7 @@ namespace NKikimr { void SkeletonIsUpAndRunning(const TActorContext &ctx, bool runRepl = false) { Become(&TThis::StateNormal); - VDiskMonGroup.VDiskState(NKikimrWhiteboard::EVDiskState::OK); + VDiskMonGroup.VDiskState(NKikimrWhiteboard::EVDiskState::OK); LOG_INFO_S(ctx, BS_SKELETON, VCtx->VDiskLogPrefix << "SKELETON IS UP AND RUNNING" << " Marker# BSVS28"); // notify SkeletonFront @@ -1604,7 +1604,7 @@ namespace NKikimr { NKikimrWhiteboard::EVDiskState state) { Become(&TThis::StateDatabaseError); - VDiskMonGroup.VDiskState(state); + VDiskMonGroup.VDiskState(state); // notify SkeletonFront auto msg = std::make_unique<TEvFrontRecoveryStatus>(phase, NKikimrProto::ERROR, @@ -1767,7 +1767,7 @@ namespace NKikimr { if (Config->RunDefrag) { auto defragCtx = std::make_shared<TDefragCtx>(VCtx, HugeBlobCtx, PDiskCtx, ctx.SelfID, - Db->HugeKeeperID, true); + Db->HugeKeeperID, true); DefragId = ctx.Register(CreateDefragActor(defragCtx, GInfo)); ActiveActors.Insert(DefragId); // keep forever } @@ -1794,7 +1794,7 @@ namespace NKikimr { if (Config->RunSyncer && !Config->BaseInfo.DonorMode) { // switch to syncronization step Become(&TThis::StateSyncGuidRecovery); - VDiskMonGroup.VDiskState(NKikimrWhiteboard::EVDiskState::SyncGuidRecovery); + VDiskMonGroup.VDiskState(NKikimrWhiteboard::EVDiskState::SyncGuidRecovery); // create syncer context auto sc = MakeIntrusive<TSyncerContext>(VCtx, Db->LsnMngr, @@ -2043,13 +2043,13 @@ namespace NKikimr { Y_VERIFY_DEBUG(msg->Owner == PDiskCtx->Dsk->Owner); Y_VERIFY(!CutLogDelayedMsg); LOG_DEBUG_S(ctx, BS_LOGCUTTER, VCtx->VDiskLogPrefix - << "Handle " << msg->ToString() + << "Handle " << msg->ToString() << " actorid# " << ctx.SelfID.ToString() << " Marker# BSVS33"); SpreadCutLog(std::move(msg), ctx); } else { LOG_DEBUG_S(ctx, BS_LOGCUTTER, VCtx->VDiskLogPrefix - << "Handle " << msg->ToString() + << "Handle " << msg->ToString() << " DELAYED actorid# " << ctx.SelfID.ToString() << " Marker# BSVS34"); CutLogDelayedMsg = std::move(msg); @@ -2089,8 +2089,8 @@ namespace NKikimr { } LOG_DEBUG_S(ctx, BS_LOGCUTTER, VCtx->VDiskLogPrefix - << "SpreadCutLog: Handle " << msg->ToString() - << " DELAYED; counter# " << counter + << "SpreadCutLog: Handle " << msg->ToString() + << " DELAYED; counter# " << counter << " actorid# " << ctx.SelfID.ToString() << " Marker# BSVS35"); } @@ -2100,7 +2100,7 @@ namespace NKikimr { // completion local recovery void DeliverDelayedCutLogIfAny(const TActorContext &ctx) { LOG_DEBUG_S(ctx, BS_LOGCUTTER, VCtx->VDiskLogPrefix - << "DeliverDelayedCutLogIfAny: hasMsg# " << (CutLogDelayedMsg ? "true" : "false") + << "DeliverDelayedCutLogIfAny: hasMsg# " << (CutLogDelayedMsg ? "true" : "false") << " actorid# " << ctx.SelfID.ToString() << " Marker# BSVS36"); diff --git a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonerr.h b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonerr.h index 076fdb8cd4..7e899b3236 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonerr.h +++ b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonerr.h @@ -9,8 +9,8 @@ #include <ydb/core/blobstorage/vdisk/common/vdisk_events.h> #include <ydb/core/blobstorage/vdisk/common/vdisk_response.h> -LWTRACE_USING(BLOBSTORAGE_PROVIDER); - +LWTRACE_USING(BLOBSTORAGE_PROVIDER); + namespace NKikimr { //////////////////////////////////////////////////////////////////////////////////////////// @@ -266,7 +266,7 @@ namespace NKikimr { ev->Get()->GetBufferBytes()); const ui64 vcookie = record.GetCookie(); const ui64 *cookie = record.HasCookie() ? &vcookie : nullptr; - const ui64 bufferSizeBytes = ev->Get()->GetBufferBytes(); + const ui64 bufferSizeBytes = ev->Get()->GetBufferBytes(); const auto handleClass = record.GetHandleClass(); const NVDiskMon::TLtcHistoPtr &histoPtr = vctx->Histograms.GetHistogram(handleClass); const NMonitoring::TDynamicCounters::TCounterPtr &counterPtr = ResultingCounterForEvent(vctx, ev); diff --git a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp index c5c1185f5f..08c32a8921 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeletonfront.cpp @@ -657,8 +657,8 @@ namespace NKikimr { void Bootstrap(const TActorContext &ctx) { const auto& baseInfo = Config->BaseInfo; VCtx = MakeIntrusive<TVDiskContext>(ctx.SelfID, GInfo->PickTopology(), VDiskCounters, SelfVDiskId, - ctx.ExecutorThread.ActorSystem, baseInfo.DeviceType, baseInfo.DonorMode, - baseInfo.ReplPDiskReadQuoter, baseInfo.ReplPDiskWriteQuoter, baseInfo.ReplNodeRequestQuoter, + ctx.ExecutorThread.ActorSystem, baseInfo.DeviceType, baseInfo.DonorMode, + baseInfo.ReplPDiskReadQuoter, baseInfo.ReplPDiskWriteQuoter, baseInfo.ReplNodeRequestQuoter, baseInfo.ReplNodeResponseQuoter); // create IntQueues @@ -769,7 +769,7 @@ namespace NKikimr { } TABLEBODY() { TABLER() { - auto v = VDiskMonGroup.VDiskState(); + auto v = VDiskMonGroup.VDiskState(); auto s = NKikimrWhiteboard::EVDiskState_Name(v); auto light = ToLightSignal(v); TABLED() {str << "VDisk";} @@ -782,7 +782,7 @@ namespace NKikimr { TABLED() {str << "VDisk LocalDb Recovery";} TABLED() {THtmlLightSignalRenderer(light, s).Output(str);} } - if (VDiskMonGroup.VDiskState() == NKikimrWhiteboard::PDiskError) { + if (VDiskMonGroup.VDiskState() == NKikimrWhiteboard::PDiskError) { TABLER() { TABLED() {str << "Error Details";} TABLED() { @@ -900,9 +900,9 @@ namespace NKikimr { //////////////////////////////////////////////////////////////////////// void UpdateWhiteboard(const TActorContext &ctx, bool schedule = true) { // out of space - const auto outOfSpaceFlags = VCtx->GetOutOfSpaceState().LocalWhiteboardFlag(); + const auto outOfSpaceFlags = VCtx->GetOutOfSpaceState().LocalWhiteboardFlag(); // skeleton state - const auto state = VDiskMonGroup.VDiskState(); + const auto state = VDiskMonGroup.VDiskState(); // replicated? bool replicated = !ReplMonGroup.ReplUnreplicatedVDisks() && !HasUnreadableBlobs; bool unreplicatedPhantoms = ReplMonGroup.ReplCurrentNumUnrecoveredPhantomBlobs() + @@ -936,15 +936,15 @@ namespace NKikimr { template <class TEventPtr> void DatabaseAccessDeniedHandle(TEventPtr &ev, const TActorContext &ctx) { - LOG_ERROR_S(ctx, NKikimrServices::BS_SKELETON, VCtx->VDiskLogPrefix - << "Access denied Type# " << Sprintf("0x%08" PRIx32, ev->GetTypeRewrite()) - << " Sender# " << ev->Sender.ToString() - << " OriginScopeId# " << ScopeIdToString(ev->OriginScopeId) + LOG_ERROR_S(ctx, NKikimrServices::BS_SKELETON, VCtx->VDiskLogPrefix + << "Access denied Type# " << Sprintf("0x%08" PRIx32, ev->GetTypeRewrite()) + << " Sender# " << ev->Sender.ToString() + << " OriginScopeId# " << ScopeIdToString(ev->OriginScopeId) << " LocalScopeId# " << ScopeIdToString(AppData(ctx)->LocalScopeId.GetInterconnectScopeId()) << " Marker# BSVSF01"); ++*AccessDeniedMessages; - TInstant now = TAppData::TimeProvider->Now(); - FillInCostSettingsAndTimestampIfApplicable(ev->Get()->Record, now); + TInstant now = TAppData::TimeProvider->Now(); + FillInCostSettingsAndTimestampIfApplicable(ev->Get()->Record, now); Reply(ev, ctx, NKikimrProto::ERROR, "access denied", now); } @@ -952,7 +952,7 @@ namespace NKikimr { void DatabaseErrorHandle(TEventPtr &ev, const TActorContext &ctx) { SetReceivedTime(ev); TInstant now = TAppData::TimeProvider->Now(); - FillInCostSettingsAndTimestampIfApplicable(ev->Get()->Record, now); + FillInCostSettingsAndTimestampIfApplicable(ev->Get()->Record, now); Reply(ev, ctx, NKikimrProto::VDISK_ERROR_STATE, "VDisk is in error state", now); // NOTE: VDisk is in StateDatabaseError state, it means recovery failed. // VDisk returns VDISK_ERROR_STATE status to all requests (outside). @@ -963,7 +963,7 @@ namespace NKikimr { SetReceivedTime(ev); TInstant now = TAppData::TimeProvider->Now(); NotifyIfNotReady(ev, ctx); - FillInCostSettingsAndTimestampIfApplicable(ev->Get()->Record, now); + FillInCostSettingsAndTimestampIfApplicable(ev->Get()->Record, now); Reply(ev, ctx, NKikimrProto::NOTREADY, "VDisk is not ready", now); // NOTE: when database is not ready, we reply with NOTREADY and we do not // pass this message to the Backpressure management subsystem @@ -1006,8 +1006,8 @@ namespace NKikimr { } } - void FillInCostSettingsAndTimestampIfRequired(NKikimrBlobStorage::TMsgQoS *qos, TInstant now) const { - qos->MutableExecTimeStats()->SetReceivedTimestamp(now.GetValue()); + void FillInCostSettingsAndTimestampIfRequired(NKikimrBlobStorage::TMsgQoS *qos, TInstant now) const { + qos->MutableExecTimeStats()->SetReceivedTimestamp(now.GetValue()); if (qos->GetSendMeCostSettings() && CostModel) { CostModel->FillInSettings(*qos->MutableCostSettings()); } @@ -1019,7 +1019,7 @@ namespace NKikimr { CheckEvent(ev, msgName); const ui32 recByteSize = ev->Get()->GetCachedByteSize(); auto &record = ev->Get()->Record; - auto &msgQoS = *record.MutableMsgQoS(); + auto &msgQoS = *record.MutableMsgQoS(); // set up reception time TInstant now = TAppData::TimeProvider->Now(); @@ -1031,7 +1031,7 @@ namespace NKikimr { msgQoS.SetCost(cost); msgQoS.SetIntQueueId(intQueueId); ActorIdToProto(ev->Sender, msgQoS.MutableSenderActorId()); - FillInCostSettingsAndTimestampIfRequired(&msgQoS, now); + FillInCostSettingsAndTimestampIfRequired(&msgQoS, now); // check queue compatibility: it's a contract between BlobStorage Proxy and VDisk, // we don't work if queues are incompatible @@ -1124,11 +1124,11 @@ namespace NKikimr { void Handle(TEvBlobStorage::TEvVPut::TPtr &ev, const TActorContext &ctx) { bool logPutInternalQueue = true; const ui64 cost = CostModel->GetCost(*ev->Get(), &logPutInternalQueue); - - const NKikimrBlobStorage::TEvVPut &record = ev->Get()->Record; - const TLogoBlobID blob = LogoBlobIDFromLogoBlobID(record.GetBlobID()); - LWTRACK(VDiskSkeletonFrontVPutRecieved, ev->Get()->Orbit, VCtx->NodeId, VCtx->GroupId, - VCtx->Top->GetFailDomainOrderNumber(VCtx->ShortSelfVDisk), blob.TabletID(), blob.BlobSize()); + + const NKikimrBlobStorage::TEvVPut &record = ev->Get()->Record; + const TLogoBlobID blob = LogoBlobIDFromLogoBlobID(record.GetBlobID()); + LWTRACK(VDiskSkeletonFrontVPutRecieved, ev->Get()->Orbit, VCtx->NodeId, VCtx->GroupId, + VCtx->Top->GetFailDomainOrderNumber(VCtx->ShortSelfVDisk), blob.TabletID(), blob.BlobSize()); if (logPutInternalQueue) { HandleRequestWithQoS(ctx, ev, "TEvVPut", cost, *IntQueueLogPuts); @@ -1416,15 +1416,15 @@ namespace NKikimr { } void Handle(TEvPDiskErrorStateChange::TPtr &ev, const TActorContext &ctx) { - LOG_ERROR_S(ctx, NKikimrServices::BS_SKELETON, VCtx->VDiskLogPrefix - << "SkeletonFront: got TEvPDiskErrorStateChange;" + LOG_ERROR_S(ctx, NKikimrServices::BS_SKELETON, VCtx->VDiskLogPrefix + << "SkeletonFront: got TEvPDiskErrorStateChange;" << " state# " << TPDiskErrorState::StateToString(ev->Get()->State) << " Marker# BSVSF03"); // switch skeleton state to PDiskError - SkeletonFrontGroup->ResetCounters(); - VDiskMonGroup.VDiskState(NKikimrWhiteboard::EVDiskState::PDiskError); + SkeletonFrontGroup->ResetCounters(); + VDiskMonGroup.VDiskState(NKikimrWhiteboard::EVDiskState::PDiskError); // send poison pill to Skeleton to shutdown it ctx.Send(SkeletonId, new TEvents::TEvPoisonPill()); SkeletonId = {}; @@ -1815,7 +1815,7 @@ namespace NKikimr { auto vdiskCounters = GetServiceCounters(counters, "vdisks"); // add 'storagePool' label - vdiskCounters = vdiskCounters->GetSubgroup("storagePool", cfg->BaseInfo.StoragePoolName); + vdiskCounters = vdiskCounters->GetSubgroup("storagePool", cfg->BaseInfo.StoragePoolName); // add 'group' label const ui32 blobstorageGroupId = info->GroupID; @@ -1827,11 +1827,11 @@ namespace NKikimr { // add 'pdisk' label as a local id of pdisk const ui32 pdiskId = cfg->BaseInfo.PDiskId; - vdiskCounters = vdiskCounters->GetSubgroup("pdisk", Sprintf("%09" PRIu32, pdiskId)); + vdiskCounters = vdiskCounters->GetSubgroup("pdisk", Sprintf("%09" PRIu32, pdiskId)); // add 'media' const auto media = cfg->BaseInfo.DeviceType; - vdiskCounters = vdiskCounters->GetSubgroup("media", to_lower(TPDiskCategory::DeviceTypeStr(media, true))); + vdiskCounters = vdiskCounters->GetSubgroup("media", to_lower(TPDiskCategory::DeviceTypeStr(media, true))); return vdiskCounters; } @@ -1897,7 +1897,7 @@ namespace NKikimr { , VDiskMonGroup(VDiskCounters, "subsystem", "state") { ReplMonGroup.ReplUnreplicatedVDisks() = 1; - VDiskMonGroup.VDiskState(NKikimrWhiteboard::EVDiskState::Initial); + VDiskMonGroup.VDiskState(NKikimrWhiteboard::EVDiskState::Initial); } }; diff --git a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_syncfullhandler.cpp b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_syncfullhandler.cpp index 3f7212b31e..11f708e717 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_syncfullhandler.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_syncfullhandler.cpp @@ -46,9 +46,9 @@ namespace NKikimr { const ui64 cookie = Ev->Cookie; TSyncState clientSyncState(SyncStateFromSyncState(Record.GetSyncState())); - LOG_DEBUG_S(ctx, BS_SYNCJOB, Db->VCtx->VDiskLogPrefix - << "TVSyncFullHandler: Bootstrap: fromVDisk# " - << VDiskIDFromVDiskID(Record.GetSourceVDiskID()) + LOG_DEBUG_S(ctx, BS_SYNCJOB, Db->VCtx->VDiskLogPrefix + << "TVSyncFullHandler: Bootstrap: fromVDisk# " + << VDiskIDFromVDiskID(Record.GetSourceVDiskID()) << " fromSyncState# " << clientSyncState.ToString() << " Marker# BSVSFH01"); @@ -66,7 +66,7 @@ namespace NKikimr { // check disk guid and start from the beginning if it has changed if (Db->GetVDiskIncarnationGuid() != clientSyncState.Guid) { - LOG_DEBUG_S(ctx, BS_SYNCJOB, Db->VCtx->VDiskLogPrefix + LOG_DEBUG_S(ctx, BS_SYNCJOB, Db->VCtx->VDiskLogPrefix << "TVSyncFullHandler: GUID CHANGED;" << " SourceVDisk# " << SourceVDisk << " DbBirthLsn# " << DbBirthLsn @@ -110,9 +110,9 @@ namespace NKikimr { // snapshotLsn is _always_ the last confirmed lsn THullDsSnap fullSnap = Hull->GetIndexSnapshot(); - LOG_DEBUG_S(ctx, BS_SYNCJOB, Db->VCtx->VDiskLogPrefix + LOG_DEBUG_S(ctx, BS_SYNCJOB, Db->VCtx->VDiskLogPrefix << "TVSyncFullHandler: ourConfirmedLsn# " << ConfirmedLsn - << " syncedLsn# " << syncedLsn + << " syncedLsn# " << syncedLsn << " SourceVDisk# " << SourceVDisk << " Marker# BSVSFH03"); diff --git a/ydb/core/blobstorage/vdisk/skeleton/skeleton_oos_logic.cpp b/ydb/core/blobstorage/vdisk/skeleton/skeleton_oos_logic.cpp index 792f8df319..5592abd5c0 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/skeleton_oos_logic.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/skeleton_oos_logic.cpp @@ -57,10 +57,10 @@ namespace NKikimr { } } - mutable THashMap<ui64, TCell> Stat[EMsgType::Last + 1]; + mutable THashMap<ui64, TCell> Stat[EMsgType::Last + 1]; - TCell &Lookup(EMsgType msgType, ESpaceColor color) { - return Stat[msgType][static_cast<ui64>(color)]; + TCell &Lookup(EMsgType msgType, ESpaceColor color) { + return Stat[msgType][static_cast<ui64>(color)]; } void RenderHtml(IOutputStream &str, const char *tableName, std::function<ui64(const TCell&)> &&func) const { @@ -74,9 +74,9 @@ namespace NKikimr { TABLEHEAD() { TABLER() { TABLEH() {str << "Message";} - for (int j = 0; j < TSpaceColor::E_descriptor()->value_count(); ++j) { - auto color = TSpaceColor::E_descriptor()->value(j)->name(); - TABLEH() {str << color;} + for (int j = 0; j < TSpaceColor::E_descriptor()->value_count(); ++j) { + auto color = TSpaceColor::E_descriptor()->value(j)->name(); + TABLEH() {str << color;} } } } @@ -85,10 +85,10 @@ namespace NKikimr { TABLER() { auto msgType = (EMsgType)i; TABLED() {str << MsgTypeToStr(msgType);} - for (int j = 0; j < TSpaceColor::E_descriptor()->value_count(); ++j) { - auto color = TSpaceColor::E_descriptor()->value(j)->number(); + for (int j = 0; j < TSpaceColor::E_descriptor()->value_count(); ++j) { + auto color = TSpaceColor::E_descriptor()->value(j)->number(); TABLED() { - str << func(Stat[i][color]); + str << func(Stat[i][color]); } } } @@ -112,7 +112,7 @@ namespace NKikimr { =============================================================================================== Green | No restrictions. ----------------------------------------------------------------------------------------------- - Yellow | No restrictions, translate Yellow color to tablet and other VDisks in the group. + Yellow | No restrictions, translate Yellow color to tablet and other VDisks in the group. ----------------------------------------------------------------------------------------------- Orange | Disk space for tablet is over. Tablet can boot (i.e. make TEvVPut for discovery | with IgnoreBlock, block generation, delete data via garbage collection commands). @@ -135,26 +135,26 @@ namespace NKikimr { template <typename TPutEventPtr> bool AllowImpl(const TOutOfSpaceLogic &logic, const TActorContext &ctx, TPutEventPtr &ev) { Y_UNUSED(ctx); - auto color = logic.VCtx->GetOutOfSpaceState().GetGlobalColor(); - auto &stat = logic.Stat->Lookup(TOutOfSpaceLogic::TStat::Put, color).HandleMsg(ev->Get()->GetCachedByteSize()); - switch (color) { - case TSpaceColor::GREEN: - case TSpaceColor::CYAN: + auto color = logic.VCtx->GetOutOfSpaceState().GetGlobalColor(); + auto &stat = logic.Stat->Lookup(TOutOfSpaceLogic::TStat::Put, color).HandleMsg(ev->Get()->GetCachedByteSize()); + switch (color) { + case TSpaceColor::GREEN: + case TSpaceColor::CYAN: case TSpaceColor::LIGHT_YELLOW: - case TSpaceColor::YELLOW: - case TSpaceColor::LIGHT_ORANGE: + case TSpaceColor::YELLOW: + case TSpaceColor::LIGHT_ORANGE: return stat.Allow(); - case TSpaceColor::ORANGE: + case TSpaceColor::ORANGE: { // allow writes with IgnoreBlock=true auto &record = ev->Get()->Record; const bool allow = record.GetIgnoreBlock(); return stat.Pass(allow); } - case TSpaceColor::RED: - case TSpaceColor::BLACK: - case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MIN_SENTINEL_DO_NOT_USE_: - case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MAX_SENTINEL_DO_NOT_USE_: + case TSpaceColor::RED: + case TSpaceColor::BLACK: + case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MIN_SENTINEL_DO_NOT_USE_: + case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MAX_SENTINEL_DO_NOT_USE_: return stat.NotAllow(); } } @@ -169,105 +169,105 @@ namespace NKikimr { bool TOutOfSpaceLogic::Allow(const TActorContext &ctx, TEvBlobStorage::TEvVBlock::TPtr &ev) const { Y_UNUSED(ctx); - auto color = VCtx->GetOutOfSpaceState().GetGlobalColor(); - auto &stat = Stat->Lookup(TStat::Block, color).HandleMsg(ev->Get()->GetCachedByteSize()); - switch (color) { - case TSpaceColor::GREEN: - case TSpaceColor::CYAN: + auto color = VCtx->GetOutOfSpaceState().GetGlobalColor(); + auto &stat = Stat->Lookup(TStat::Block, color).HandleMsg(ev->Get()->GetCachedByteSize()); + switch (color) { + case TSpaceColor::GREEN: + case TSpaceColor::CYAN: case TSpaceColor::LIGHT_YELLOW: - case TSpaceColor::YELLOW: - case TSpaceColor::LIGHT_ORANGE: + case TSpaceColor::YELLOW: + case TSpaceColor::LIGHT_ORANGE: return stat.Allow(); - case TSpaceColor::ORANGE: + case TSpaceColor::ORANGE: { NKikimrBlobStorage::TEvVBlock &record = ev->Get()->Record; const ui64 tabletId = record.GetTabletId(); const bool allow = Hull->HasBlockRecordFor(tabletId); return stat.Pass(allow); } - case TSpaceColor::RED: { + case TSpaceColor::RED: { // FIXME: handle complete removal only return stat.NotAllow(); } - case TSpaceColor::BLACK: - case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MIN_SENTINEL_DO_NOT_USE_: - case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MAX_SENTINEL_DO_NOT_USE_: + case TSpaceColor::BLACK: + case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MIN_SENTINEL_DO_NOT_USE_: + case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MAX_SENTINEL_DO_NOT_USE_: return stat.NotAllow(); } } bool TOutOfSpaceLogic::Allow(const TActorContext &ctx, TEvBlobStorage::TEvVCollectGarbage::TPtr &ev) const { Y_UNUSED(ctx); - // FIXME: accept hard barriers in red color - auto color = VCtx->GetOutOfSpaceState().GetGlobalColor(); - auto &stat = Stat->Lookup(TStat::CollectGarbage, color).HandleMsg(ev->Get()->GetCachedByteSize()); - return stat.Pass(DefaultAllow(color)); + // FIXME: accept hard barriers in red color + auto color = VCtx->GetOutOfSpaceState().GetGlobalColor(); + auto &stat = Stat->Lookup(TStat::CollectGarbage, color).HandleMsg(ev->Get()->GetCachedByteSize()); + return stat.Pass(DefaultAllow(color)); } bool TOutOfSpaceLogic::Allow(const TActorContext &ctx, TEvLocalSyncData::TPtr &ev) const { Y_UNUSED(ctx); - auto color = VCtx->GetOutOfSpaceState().GetGlobalColor(); - auto &stat = Stat->Lookup(TStat::LocalSyncData, color).HandleMsg(ev->Get()->ByteSize()); - return stat.Pass(DefaultAllow(color)); + auto color = VCtx->GetOutOfSpaceState().GetGlobalColor(); + auto &stat = Stat->Lookup(TStat::LocalSyncData, color).HandleMsg(ev->Get()->ByteSize()); + return stat.Pass(DefaultAllow(color)); } bool TOutOfSpaceLogic::Allow(const TActorContext &ctx, TEvAnubisOsirisPut::TPtr &ev) const { - auto color = VCtx->GetOutOfSpaceState().GetGlobalColor(); - auto &stat = Stat->Lookup(TStat::AnubisOsirisPut, color).HandleMsg(ev->Get()->ByteSize()); - switch (color) { - case TSpaceColor::GREEN: - case TSpaceColor::CYAN: + auto color = VCtx->GetOutOfSpaceState().GetGlobalColor(); + auto &stat = Stat->Lookup(TStat::AnubisOsirisPut, color).HandleMsg(ev->Get()->ByteSize()); + switch (color) { + case TSpaceColor::GREEN: + case TSpaceColor::CYAN: case TSpaceColor::LIGHT_YELLOW: - case TSpaceColor::YELLOW: - case TSpaceColor::LIGHT_ORANGE: + case TSpaceColor::YELLOW: + case TSpaceColor::LIGHT_ORANGE: return stat.Allow(); - case TSpaceColor::ORANGE: + case TSpaceColor::ORANGE: { TEvAnubisOsirisPut *msg = ev->Get(); if (msg->IsAnubis()) { - LOG_ERROR_S(ctx, NKikimrServices::BS_SKELETON, VCtx->VDiskLogPrefix - << "OUT OF SPACE while removing LogoBlob we got from Anubis;" + LOG_ERROR_S(ctx, NKikimrServices::BS_SKELETON, VCtx->VDiskLogPrefix + << "OUT OF SPACE while removing LogoBlob we got from Anubis;" << " LogoBlobId# " << msg->LogoBlobId << " Marker# BSVSOOSL01"); return stat.NotAllow(); } else { // We MUST allow Osiris writes. W/o Osiris we can't work. // There should not be too much of them. - LOG_ERROR_S(ctx, NKikimrServices::BS_SKELETON, VCtx->VDiskLogPrefix - << "OUT OF SPACE while adding resurrected by Osiris LogoBlob;" + LOG_ERROR_S(ctx, NKikimrServices::BS_SKELETON, VCtx->VDiskLogPrefix + << "OUT OF SPACE while adding resurrected by Osiris LogoBlob;" << " FORCING addition: LogoBlobId# " << msg->LogoBlobId << " Marker# BSVSOOSL02"); return stat.Allow(); } } - case TSpaceColor::RED: - case TSpaceColor::BLACK: - case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MIN_SENTINEL_DO_NOT_USE_: - case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MAX_SENTINEL_DO_NOT_USE_: + case TSpaceColor::RED: + case TSpaceColor::BLACK: + case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MIN_SENTINEL_DO_NOT_USE_: + case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MAX_SENTINEL_DO_NOT_USE_: return stat.NotAllow(); } } bool TOutOfSpaceLogic::Allow(const TActorContext &ctx, TEvRecoveredHugeBlob::TPtr &ev) const { Y_UNUSED(ctx); - auto color = VCtx->GetOutOfSpaceState().GetGlobalColor(); - auto &stat = Stat->Lookup(TStat::RecoveredHugeBlob, color).HandleMsg(ev->Get()->ByteSize()); - return stat.Pass(DefaultAllow(color)); + auto color = VCtx->GetOutOfSpaceState().GetGlobalColor(); + auto &stat = Stat->Lookup(TStat::RecoveredHugeBlob, color).HandleMsg(ev->Get()->ByteSize()); + return stat.Pass(DefaultAllow(color)); } bool TOutOfSpaceLogic::Allow(const TActorContext &ctx, TEvDetectedPhantomBlob::TPtr &ev) const { Y_UNUSED(ctx); - auto color = VCtx->GetOutOfSpaceState().GetGlobalColor(); - auto &stat = Stat->Lookup(TStat::DetectedPhantomBlob, color).HandleMsg(ev->Get()->ByteSize()); - return stat.Pass(DefaultAllow(color)); + auto color = VCtx->GetOutOfSpaceState().GetGlobalColor(); + auto &stat = Stat->Lookup(TStat::DetectedPhantomBlob, color).HandleMsg(ev->Get()->ByteSize()); + return stat.Pass(DefaultAllow(color)); } void TOutOfSpaceLogic::RenderHtml(IOutputStream &str) const { Stat->RenderHtml(str); } - bool TOutOfSpaceLogic::DefaultAllow(ESpaceColor color) const { - return color <= TSpaceColor::ORANGE; + bool TOutOfSpaceLogic::DefaultAllow(ESpaceColor color) const { + return color <= TSpaceColor::ORANGE; } } // NKikimr diff --git a/ydb/core/blobstorage/vdisk/skeleton/skeleton_oos_logic.h b/ydb/core/blobstorage/vdisk/skeleton/skeleton_oos_logic.h index 0a0290ba77..8def77caa1 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/skeleton_oos_logic.h +++ b/ydb/core/blobstorage/vdisk/skeleton/skeleton_oos_logic.h @@ -37,7 +37,7 @@ namespace NKikimr { class TStat; mutable std::unique_ptr<TStat> Stat; - bool DefaultAllow(ESpaceColor color) const; + bool DefaultAllow(ESpaceColor color) const; template <typename TEvPtr> friend bool AllowImpl(const TOutOfSpaceLogic &logic, const TActorContext &ctx, TEvPtr &ev); diff --git a/ydb/core/blobstorage/vdisk/skeleton/skeleton_oos_tracker.cpp b/ydb/core/blobstorage/vdisk/skeleton/skeleton_oos_tracker.cpp index a80cb514b1..8af5bd8dc2 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/skeleton_oos_tracker.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/skeleton_oos_tracker.cpp @@ -1,5 +1,5 @@ #include "skeleton_oos_tracker.h" - + #include <ydb/core/blobstorage/base/html.h> #include <ydb/core/blobstorage/vdisk/common/vdisk_context.h> #include <ydb/core/blobstorage/vdisk/common/vdisk_mongroups.h> @@ -29,35 +29,35 @@ namespace NKikimr { friend class TActorBootstrapped<TDskSpaceTrackerActor>; void CheckState(const TActorContext &ctx) { - auto zone = VCtx->OutOfSpaceState.GetGlobalColor(); + auto zone = VCtx->OutOfSpaceState.GetGlobalColor(); MonGroup.DskOutOfSpace() = zone; - auto priority = NActors::NLog::PRI_TRACE; - - switch (zone) { - case TSpaceColor::YELLOW: - priority = NActors::NLog::PRI_WARN; - ++YellowZonePeriods; - break; - case TSpaceColor::LIGHT_ORANGE: - case TSpaceColor::ORANGE: - priority = NActors::NLog::PRI_ERROR; - ++OrangeZonePeriods; - break; - case TSpaceColor::RED: - priority = NActors::NLog::PRI_CRIT; - ++RedZonePeriods; - break; - case TSpaceColor::BLACK: - priority = NActors::NLog::PRI_CRIT; - ++BlackZonePeriods; - break; - default: - break; - } - - LOG_LOG_S(ctx, priority, NKikimrServices::BS_SKELETON, VCtx->VDiskLogPrefix - << "TDskSpaceTrackerActor: " << zone << " ZONE" << " Marker# BSVSOOST01"); + auto priority = NActors::NLog::PRI_TRACE; + + switch (zone) { + case TSpaceColor::YELLOW: + priority = NActors::NLog::PRI_WARN; + ++YellowZonePeriods; + break; + case TSpaceColor::LIGHT_ORANGE: + case TSpaceColor::ORANGE: + priority = NActors::NLog::PRI_ERROR; + ++OrangeZonePeriods; + break; + case TSpaceColor::RED: + priority = NActors::NLog::PRI_CRIT; + ++RedZonePeriods; + break; + case TSpaceColor::BLACK: + priority = NActors::NLog::PRI_CRIT; + ++BlackZonePeriods; + break; + default: + break; + } + + LOG_LOG_S(ctx, priority, NKikimrServices::BS_SKELETON, VCtx->VDiskLogPrefix + << "TDskSpaceTrackerActor: " << zone << " ZONE" << " Marker# BSVSOOST01"); // send message to PDisk Become(&TThis::AskFunc); ctx.Send(PDiskCtx->PDiskId, @@ -74,7 +74,7 @@ namespace NKikimr { void Handle(NPDisk::TEvCheckSpaceResult::TPtr &ev, const TActorContext &ctx) { const auto *msg = ev->Get(); - LOG_DEBUG_S(ctx, NKikimrServices::BS_SKELETON, VCtx->VDiskLogPrefix + LOG_DEBUG_S(ctx, NKikimrServices::BS_SKELETON, VCtx->VDiskLogPrefix << "TDskSpaceTrackerActor:handle TEvCheckSpaceResult; msg# " << msg->ToString() << " Marker# BSVSOOST02"); @@ -119,20 +119,20 @@ namespace NKikimr { TABLER() { auto flags = VCtx->OutOfSpaceState.GetLocalStatusFlags(); TABLED() {str << "Local Disk State";} - TABLED() {str << StatusFlagToSpaceColor(flags);} + TABLED() {str << StatusFlagToSpaceColor(flags);} } TABLER() { TABLED() {str << "Global BlobStorage Group State";} - TABLED() {str << StatusFlagToSpaceColor(oosStatus.Flags);} + TABLED() {str << StatusFlagToSpaceColor(oosStatus.Flags);} + } + TABLER() { + TABLED() {str << "Global Whiteboard Flag";} + TABLED() { + auto wb_flag = VCtx->OutOfSpaceState.GlobalWhiteboardFlag(); + THtmlLightSignalRenderer(wb_flag, TStringBuilder() << wb_flag).Output(str); + } } TABLER() { - TABLED() {str << "Global Whiteboard Flag";} - TABLED() { - auto wb_flag = VCtx->OutOfSpaceState.GlobalWhiteboardFlag(); - THtmlLightSignalRenderer(wb_flag, TStringBuilder() << wb_flag).Output(str); - } - } - TABLER() { TABLED() {str << "Local Disk Approximate Free Space Share";} TABLED() {str << oosStatus.ApproximateFreeSpaceShare * 100 << "%";} } diff --git a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vmultiput_actor.cpp b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vmultiput_actor.cpp index 6198081741..ac7dcd7d58 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vmultiput_actor.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vmultiput_actor.cpp @@ -77,7 +77,7 @@ namespace NKikimr { } TInstant now = TAppData::TimeProvider->Now(); - const ui64 bufferSizeBytes = Event->Get()->GetBufferBytes(); + const ui64 bufferSizeBytes = Event->Get()->GetBufferBytes(); auto vMultiPutResult = std::make_unique<TEvBlobStorage::TEvVMultiPutResult>(NKikimrProto::OK, vdisk, cookie, now, Event->Get()->GetCachedByteSize(), &vMultiPutRecord, SkeletonFrontIDPtr, MultiPutResMsgsPtr, nullptr, bufferSizeBytes, std::move(Event->TraceId), IncarnationGuid, TString()); diff --git a/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper.cpp b/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper.cpp index 55a800e861..c7893daa0a 100644 --- a/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper.cpp +++ b/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper.cpp @@ -51,12 +51,12 @@ namespace NKikimr { // just trim log based by TrimTailLsn (which is confirmed lsn from peers) bool PerformTrimTailAction() { - const bool hasToCommit = KeepState.PerformTrimTailAction(); + const bool hasToCommit = KeepState.PerformTrimTailAction(); // we don't need to commit because we either remove mem pages or // schedule to remove some chunks (but they may be used by snapshots, // so wait until TEvSyncLogFreeChunk message) - Y_VERIFY(!hasToCommit); + Y_VERIFY(!hasToCommit); return false; } diff --git a/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper_committer.cpp b/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper_committer.cpp index 4786739049..e02b627a69 100644 --- a/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper_committer.cpp +++ b/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper_committer.cpp @@ -42,7 +42,7 @@ namespace NKikimr { TLsnSeg seg = SlCtx->LsnMngr->AllocLsnForLocalUse(); // commit msg auto commitMsg = std::make_unique<NPDisk::TEvLog>(SlCtx->PDiskCtx->Dsk->Owner, - SlCtx->PDiskCtx->Dsk->OwnerRound, TLogSignature::SignatureSyncLogIdx, + SlCtx->PDiskCtx->Dsk->OwnerRound, TLogSignature::SignatureSyncLogIdx, CommitRecord, EntryPointSerializer.GetSerializedData(), seg, nullptr); if (CommitRecord.CommitChunks || CommitRecord.DeleteChunks) { diff --git a/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper_state.cpp b/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper_state.cpp index ee4fba86f1..31448b8e46 100644 --- a/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper_state.cpp +++ b/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper_state.cpp @@ -369,7 +369,7 @@ namespace NKikimr { VDISKP(VCtx->VDiskLogPrefix, "KEEPER: we've got disk overflow for SyncLog:" " numCurChunks# %" PRIu32 " numChunksToAdd# %" PRIu32 - " MaxDiskChunks# %" PRIu32, numCurChunks, numChunksToAdd, MaxDiskChunks)); + " MaxDiskChunks# %" PRIu32, numCurChunks, numChunksToAdd, MaxDiskChunks)); } // trim SyncLog in case of disk overflow diff --git a/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper_ut.cpp b/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper_ut.cpp index 1e31ad363b..44808438be 100644 --- a/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper_ut.cpp +++ b/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogkeeper_ut.cpp @@ -76,7 +76,7 @@ namespace NKikimr { new NMonitoring::TDynamicCounters(), TVDiskID(), nullptr, - TPDiskCategory::DEVICE_TYPE_UNKNOWN); + TPDiskCategory::DEVICE_TYPE_UNKNOWN); const ui64 pdiskGuid = 19283489374; const ui32 chunkSize = 512u << 10u; diff --git a/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogreader.cpp b/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogreader.cpp index 242a26d4f1..2873bcb56c 100644 --- a/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogreader.cpp +++ b/ydb/core/blobstorage/vdisk/synclog/blobstorage_synclogreader.cpp @@ -163,7 +163,7 @@ namespace NKikimr { LOG_DEBUG(ctx, BS_SYNCLOG, VDISKP(SlCtx->VCtx->VDiskLogPrefix, "SYNCLOG REPLY: SourceVDisk# %s guid# %" PRIu64 " lsn# %" PRIu64, - SourceVDisk.ToString().data(), static_cast<ui64>(VDiskIncarnationGuid), lsn)); + SourceVDisk.ToString().data(), static_cast<ui64>(VDiskIncarnationGuid), lsn)); auto result = std::make_unique<TEvBlobStorage::TEvVSyncResult>(status, SelfVDiskId, TSyncState(VDiskIncarnationGuid, lsn), finished, SlCtx->VCtx->GetOutOfSpaceState().GetLocalStatusFlags(), diff --git a/ydb/core/blobstorage/ya.make b/ydb/core/blobstorage/ya.make index 6cfe7dd955..17f379ebf1 100644 --- a/ydb/core/blobstorage/ya.make +++ b/ydb/core/blobstorage/ya.make @@ -44,7 +44,7 @@ RECURSE( dsproxy groupinfo incrhuge - lwtrace_probes + lwtrace_probes nodewarden other pdisk diff --git a/ydb/core/client/client_ut.cpp b/ydb/core/client/client_ut.cpp index 841f9121ff..b94c722588 100644 --- a/ydb/core/client/client_ut.cpp +++ b/ydb/core/client/client_ut.cpp @@ -2164,7 +2164,7 @@ Y_UNIT_TEST_SUITE(TClientTest) { const TActorId followerId = runtime.GrabEdgeEvent<TEvTablet::TEvRestored>(edge)->Get()->UserTabletActor; Y_UNUSED(followerId); - + { NTabletPipe::TClientConfig pipeClientConfig; pipeClientConfig.AllowFollower = true; diff --git a/ydb/core/cms/cluster_info_ut.cpp b/ydb/core/cms/cluster_info_ut.cpp index b224b30d60..8b9d3b3d65 100644 --- a/ydb/core/cms/cluster_info_ut.cpp +++ b/ydb/core/cms/cluster_info_ut.cpp @@ -376,7 +376,7 @@ Y_UNIT_TEST_SUITE(TClusterInfoTest) { permission.Deadline = now - TDuration::Seconds(30); cluster.SetNodeState(2, DOWN, MakeSystemStateInfo("1")); UNIT_ASSERT_VALUES_EQUAL(cluster.AddLocks(permission, nullptr), 1); - UNIT_ASSERT_VALUES_EQUAL(cluster.Node(2).State, EState::RESTART); + UNIT_ASSERT_VALUES_EQUAL(cluster.Node(2).State, EState::RESTART); UNIT_ASSERT_VALUES_EQUAL(cluster.Node(2).Lock->ActionDeadline, now + TDuration::Seconds(30)); cluster.ClearNode(1); diff --git a/ydb/core/cms/cms_ut_common.cpp b/ydb/core/cms/cms_ut_common.cpp index 4529df7bb1..b458fe672c 100644 --- a/ydb/core/cms/cms_ut_common.cpp +++ b/ydb/core/cms/cms_ut_common.cpp @@ -211,7 +211,7 @@ public: } }; -void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseConfig *config, +void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseConfig *config, ui32 pdisks, ui32 vdiskPerPdisk = 4, const TNodeTenantsMap &tenants = {}) { TGuard<TMutex> guard(TFakeNodeWhiteboardService::Mutex); @@ -226,7 +226,7 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC auto now = Now(); for (ui32 groupId = 0; groupId < numGroups; ++groupId) { - auto &group = *config->AddGroup(); + auto &group = *config->AddGroup(); group.SetGroupId(groupId); group.SetGroupGeneration(1); if (numNodes >= 8) @@ -267,7 +267,7 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC pdisk.SetTotalSize(200ULL << 30); pdisk.SetState(NKikimrBlobStorage::TPDiskState::Normal); - auto &pdiskConfig = *config->AddPDisk(); + auto &pdiskConfig = *config->AddPDisk(); pdiskConfig.SetNodeId(nodeId); pdiskConfig.SetPDiskId(pdiskId); pdiskConfig.SetPath("/pdisk.data"); @@ -288,7 +288,7 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC vdisk.SetVDiskState(NKikimrWhiteboard::OK); vdisk.SetReplicated(true); - auto &vdiskConfig = *config->AddVSlot(); + auto &vdiskConfig = *config->AddVSlot(); vdiskConfig.MutableVSlotId()->SetNodeId(nodeId); vdiskConfig.MutableVSlotId()->SetPDiskId(pdiskId); vdiskConfig.MutableVSlotId()->SetVSlotId(1000 + vdiskIndex); @@ -296,7 +296,7 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC vdiskConfig.SetGroupGeneration(1); vdiskConfig.SetFailDomainIdx(nodeIndex % 8); - config->MutableGroup(groupId)->AddVSlotId() + config->MutableGroup(groupId)->AddVSlotId() ->CopyFrom(vdiskConfig.GetVSlotId()); } } @@ -447,7 +447,7 @@ TCmsTestEnv::TCmsTestEnv(ui32 nodeCount, TFakeNodeWhiteboardService::Config.MutableResponse()->ClearStatus(); auto &status = *TFakeNodeWhiteboardService::Config.MutableResponse()->AddStatus(); status.SetSuccess(true); - auto *config = status.MutableBaseConfig(); + auto *config = status.MutableBaseConfig(); GenerateExtendedInfo(*this, config, pdisks, 4, tenants); @@ -471,10 +471,10 @@ TCmsTestEnv::TCmsTestEnv(ui32 nodeCount, Sender = AllocateEdgeActor(); - NKikimrCms::TCmsConfig cmsConfig; - cmsConfig.MutableTenantLimits()->SetDisabledNodesRatioLimit(0); - cmsConfig.MutableClusterLimits()->SetDisabledNodesRatioLimit(0); - SetCmsConfig(cmsConfig); + NKikimrCms::TCmsConfig cmsConfig; + cmsConfig.MutableTenantLimits()->SetDisabledNodesRatioLimit(0); + cmsConfig.MutableClusterLimits()->SetDisabledNodesRatioLimit(0); + SetCmsConfig(cmsConfig); } TCmsTestEnv::TCmsTestEnv(ui32 nodeCount, diff --git a/ydb/core/cms/config.h b/ydb/core/cms/config.h index 95365f7731..471880e34f 100644 --- a/ydb/core/cms/config.h +++ b/ydb/core/cms/config.h @@ -115,15 +115,15 @@ struct TCmsSentinelConfig { { TMap<EPDiskState, ui32> stateLimits; // error states - stateLimits[NKikimrBlobStorage::TPDiskState::InitialFormatReadError] = 60; - stateLimits[NKikimrBlobStorage::TPDiskState::InitialSysLogReadError] = 60; - stateLimits[NKikimrBlobStorage::TPDiskState::InitialSysLogParseError] = 60; - stateLimits[NKikimrBlobStorage::TPDiskState::InitialCommonLogReadError] = 60; - stateLimits[NKikimrBlobStorage::TPDiskState::InitialCommonLogParseError] = 60; - stateLimits[NKikimrBlobStorage::TPDiskState::CommonLoggerInitError] = 60; - stateLimits[NKikimrBlobStorage::TPDiskState::OpenFileError] = 60; - stateLimits[NKikimrBlobStorage::TPDiskState::ChunkQuotaError] = 60; - stateLimits[NKikimrBlobStorage::TPDiskState::DeviceIoError] = 60; + stateLimits[NKikimrBlobStorage::TPDiskState::InitialFormatReadError] = 60; + stateLimits[NKikimrBlobStorage::TPDiskState::InitialSysLogReadError] = 60; + stateLimits[NKikimrBlobStorage::TPDiskState::InitialSysLogParseError] = 60; + stateLimits[NKikimrBlobStorage::TPDiskState::InitialCommonLogReadError] = 60; + stateLimits[NKikimrBlobStorage::TPDiskState::InitialCommonLogParseError] = 60; + stateLimits[NKikimrBlobStorage::TPDiskState::CommonLoggerInitError] = 60; + stateLimits[NKikimrBlobStorage::TPDiskState::OpenFileError] = 60; + stateLimits[NKikimrBlobStorage::TPDiskState::ChunkQuotaError] = 60; + stateLimits[NKikimrBlobStorage::TPDiskState::DeviceIoError] = 60; // node online, pdisk missing stateLimits[NKikimrBlobStorage::TPDiskState::Missing] = 60; // node timeout diff --git a/ydb/core/cms/sentinel.cpp b/ydb/core/cms/sentinel.cpp index a87dca8c02..638b78fe09 100644 --- a/ydb/core/cms/sentinel.cpp +++ b/ydb/core/cms/sentinel.cpp @@ -57,7 +57,7 @@ void TPDiskStatusComputer::AddState(EPDiskState state) { ++StateCounter; } } else { - PrevState = std::exchange(State, state); + PrevState = std::exchange(State, state); StateCounter = 1; } } @@ -72,27 +72,27 @@ EPDiskStatus TPDiskStatusComputer::Compute(EPDiskStatus current, TString& reason const ui32 stateLimit = (it != StateLimits.end()) ? it->second : DefaultStateLimit; if (!stateLimit || StateCounter < stateLimit) { - reason = TStringBuilder() - << " PrevState# " << PrevState - << " State# " << State - << " StateCounter# " << StateCounter - << " current# " << current; - switch (PrevState) { - case NKikimrBlobStorage::TPDiskState::Unknown: - return current; - default: - return EPDiskStatus::INACTIVE; - } + reason = TStringBuilder() + << " PrevState# " << PrevState + << " State# " << State + << " StateCounter# " << StateCounter + << " current# " << current; + switch (PrevState) { + case NKikimrBlobStorage::TPDiskState::Unknown: + return current; + default: + return EPDiskStatus::INACTIVE; + } } reason = TStringBuilder() - << " PrevState# " << PrevState + << " PrevState# " << PrevState << " State# " << State << " StateCounter# " << StateCounter << " StateLimit# " << stateLimit; - PrevState = State; - + PrevState = State; + switch (State) { case NKikimrBlobStorage::TPDiskState::Normal: return EPDiskStatus::ACTIVE; @@ -140,24 +140,24 @@ EPDiskStatus TPDiskStatus::GetStatus() const { return Current; } -bool TPDiskStatus::IsNewStatusGood() const { - TString unused; - switch (Compute(Current, unused)) { - case EPDiskStatus::INACTIVE: - case EPDiskStatus::ACTIVE: - case EPDiskStatus::SPARE: - return true; - - case EPDiskStatus::UNKNOWN: - case EPDiskStatus::FAULTY: - case EPDiskStatus::BROKEN: - case EPDiskStatus::TO_BE_REMOVED: - case EPDiskStatus::EDriveStatus_INT_MIN_SENTINEL_DO_NOT_USE_: - case EPDiskStatus::EDriveStatus_INT_MAX_SENTINEL_DO_NOT_USE_: - return false; - } -} - +bool TPDiskStatus::IsNewStatusGood() const { + TString unused; + switch (Compute(Current, unused)) { + case EPDiskStatus::INACTIVE: + case EPDiskStatus::ACTIVE: + case EPDiskStatus::SPARE: + return true; + + case EPDiskStatus::UNKNOWN: + case EPDiskStatus::FAULTY: + case EPDiskStatus::BROKEN: + case EPDiskStatus::TO_BE_REMOVED: + case EPDiskStatus::EDriveStatus_INT_MIN_SENTINEL_DO_NOT_USE_: + case EPDiskStatus::EDriveStatus_INT_MAX_SENTINEL_DO_NOT_USE_: + return false; + } +} + bool TPDiskStatus::IsChangingAllowed() const { return ChangingAllowed; } @@ -187,13 +187,13 @@ void TPDiskInfo::AddState(EPDiskState state) { Touch(); } -/// TClusterMap +/// TClusterMap -TClusterMap::TClusterMap(TCmsStatePtr state) +TClusterMap::TClusterMap(TCmsStatePtr state) : State(state) -{} +{} -void TClusterMap::AddPDisk(const TPDiskID& id) { +void TClusterMap::AddPDisk(const TPDiskID& id) { Y_VERIFY(State->ClusterInfo->HasNode(id.NodeId)); Y_VERIFY(State->ClusterInfo->HasPDisk(id)); const auto& location = State->ClusterInfo->Node(id.NodeId).Location; @@ -203,18 +203,18 @@ void TClusterMap::AddPDisk(const TPDiskID& id) { ByRack[location.HasKey(TNodeLocation::TKeys::Rack) ? location.GetRackId() : ""].insert(id); } -/// TGuardian - -TGuardian::TGuardian(TCmsStatePtr state, ui32 dataCenterRatio, ui32 roomRatio, ui32 rackRatio) - : TClusterMap(state) - , DataCenterRatio(dataCenterRatio) - , RoomRatio(roomRatio) - , RackRatio(rackRatio) -{ -} - -TClusterMap::TPDiskIDSet TGuardian::GetAllowedPDisks(const TClusterMap& all, TString& issues, - TPDiskIDSet& disallowed) const { +/// TGuardian + +TGuardian::TGuardian(TCmsStatePtr state, ui32 dataCenterRatio, ui32 roomRatio, ui32 rackRatio) + : TClusterMap(state) + , DataCenterRatio(dataCenterRatio) + , RoomRatio(roomRatio) + , RackRatio(rackRatio) +{ +} + +TClusterMap::TPDiskIDSet TGuardian::GetAllowedPDisks(const TClusterMap& all, TString& issues, + TPDiskIDSet& disallowed) const { TPDiskIDSet result; TStringBuilder issuesBuilder; @@ -251,10 +251,10 @@ TClusterMap::TPDiskIDSet TGuardian::GetAllowedPDisks(const TClusterMap& all, TSt for (const auto& kv : ByRack) { Y_VERIFY(all.ByRack.contains(kv.first)); - // ignore check if there is only one node in a rack - if (kv.second.size() == 1) { - continue; - } + // ignore check if there is only one node in a rack + if (kv.second.size() == 1) { + continue; + } if (kv.first && !CheckRatio(kv, all.ByRack, RackRatio)) { LOG_IGNORED(Rack); disallowed.insert(kv.second.begin(), kv.second.end()); @@ -871,9 +871,9 @@ class TSentinel: public TActorBootstrapped<TSentinel> { return; } - TClusterMap all(CmsState); + TClusterMap all(CmsState); TGuardian changed(CmsState, Config.DataCenterRatio, Config.RoomRatio, Config.RackRatio); - TClusterMap::TPDiskIDSet alwaysAllowed; + TClusterMap::TPDiskIDSet alwaysAllowed; for (auto& pdisk : SentinelState->PDisks) { const TPDiskID& id = pdisk.first; @@ -893,11 +893,11 @@ class TSentinel: public TActorBootstrapped<TSentinel> { all.AddPDisk(id); if (info.IsChanged()) { - if (info.IsNewStatusGood()) { - alwaysAllowed.insert(id); - } else { - changed.AddPDisk(id); - } + if (info.IsNewStatusGood()) { + alwaysAllowed.insert(id); + } else { + changed.AddPDisk(id); + } } else { info.AllowChanging(); } @@ -906,9 +906,9 @@ class TSentinel: public TActorBootstrapped<TSentinel> { TString issues; THashSet<TPDiskID, TPDiskIDHash> disallowed; - TClusterMap::TPDiskIDSet allowed = changed.GetAllowedPDisks(all, issues, disallowed); - Copy(alwaysAllowed.begin(), alwaysAllowed.end(), std::inserter(allowed, allowed.begin())); - for (const TPDiskID& id : allowed) { + TClusterMap::TPDiskIDSet allowed = changed.GetAllowedPDisks(all, issues, disallowed); + Copy(alwaysAllowed.begin(), alwaysAllowed.end(), std::inserter(allowed, allowed.begin())); + for (const TPDiskID& id : allowed) { Y_VERIFY(SentinelState->PDisks.contains(id)); TPDiskInfo& info = SentinelState->PDisks.at(id); diff --git a/ydb/core/cms/sentinel_impl.h b/ydb/core/cms/sentinel_impl.h index 3a75e773aa..8aa6934baf 100644 --- a/ydb/core/cms/sentinel_impl.h +++ b/ydb/core/cms/sentinel_impl.h @@ -30,8 +30,8 @@ private: const ui32& DefaultStateLimit; const TLimitsMap& StateLimits; - EPDiskState State = NKikimrBlobStorage::TPDiskState::Unknown; - mutable EPDiskState PrevState = State; + EPDiskState State = NKikimrBlobStorage::TPDiskState::Unknown; + mutable EPDiskState PrevState = State; ui64 StateCounter; }; // TPDiskStatusComputer @@ -46,7 +46,7 @@ public: void ApplyChanges(TString& reason); void ApplyChanges(); EPDiskStatus GetStatus() const; - bool IsNewStatusGood() const; + bool IsNewStatusGood() const; bool IsChangingAllowed() const; void AllowChanging(); @@ -74,22 +74,22 @@ private: }; // TPDiskInfo -class TClusterMap { -public: +class TClusterMap { +public: using TPDiskIDSet = THashSet<TPDiskID, TPDiskIDHash>; using TDistribution = THashMap<TString, TPDiskIDSet>; - TCmsStatePtr State; - TDistribution ByDataCenter; - TDistribution ByRoom; - TDistribution ByRack; - - TClusterMap(TCmsStatePtr state); - - void AddPDisk(const TPDiskID& id); -}; // TClusterMap - -class TGuardian : public TClusterMap { + TCmsStatePtr State; + TDistribution ByDataCenter; + TDistribution ByRoom; + TDistribution ByRack; + + TClusterMap(TCmsStatePtr state); + + void AddPDisk(const TPDiskID& id); +}; // TClusterMap + +class TGuardian : public TClusterMap { static bool CheckRatio(ui32 check, ui32 base, ui32 ratio) { return (check * 100) <= (base * ratio); } @@ -101,7 +101,7 @@ class TGuardian : public TClusterMap { public: explicit TGuardian(TCmsStatePtr state, ui32 dataCenterRatio = 100, ui32 roomRatio = 100, ui32 rackRatio = 100); - TPDiskIDSet GetAllowedPDisks(const TClusterMap& all, TString& issues, TPDiskIDSet& disallowed) const; + TPDiskIDSet GetAllowedPDisks(const TClusterMap& all, TString& issues, TPDiskIDSet& disallowed) const; private: const ui32 DataCenterRatio; diff --git a/ydb/core/cms/sentinel_ut.cpp b/ydb/core/cms/sentinel_ut.cpp index a1be2dafc4..62c0fdf309 100644 --- a/ydb/core/cms/sentinel_ut.cpp +++ b/ydb/core/cms/sentinel_ut.cpp @@ -14,7 +14,7 @@ namespace NKikimr { namespace NCmsTest { static constexpr ui32 DefaultStateLimit = 5; -static constexpr ui32 DefaultErrorStateLimit = 60; +static constexpr ui32 DefaultErrorStateLimit = 60; auto DefaultStateLimits = NCms::TCmsSentinelConfig::DefaultStateLimits(); static constexpr NCms::EPDiskState ErrorStates[] = { @@ -65,12 +65,12 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { const EPDiskStatus initialStatus = EPDiskStatus::ACTIVE; TPDiskStatus st(initialStatus, DefaultStateLimit, DefaultStateLimits); - for (ui32 i = 1; i < DefaultStateLimits[state]; ++i) { - st.AddState(state); - - UNIT_ASSERT(!st.IsChanged()); - UNIT_ASSERT_VALUES_EQUAL(st.GetStatus(), initialStatus); - } + for (ui32 i = 1; i < DefaultStateLimits[state]; ++i) { + st.AddState(state); + + UNIT_ASSERT(!st.IsChanged()); + UNIT_ASSERT_VALUES_EQUAL(st.GetStatus(), initialStatus); + } st.AddState(state); UNIT_ASSERT(st.IsChanged()); @@ -79,42 +79,42 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { } } - Y_UNIT_TEST(PDiskInactiveAfterStateChange) { - for (const EPDiskState state : ErrorStates) { - const EPDiskStatus initialStatus = EPDiskStatus::ACTIVE; - TPDiskStatus st(initialStatus, DefaultStateLimit, DefaultStateLimits); - - for (ui32 i = 1; i < DefaultStateLimits[state]; ++i) { - st.AddState(state); - - UNIT_ASSERT(!st.IsChanged()); - UNIT_ASSERT_VALUES_EQUAL(st.GetStatus(), initialStatus); - } - st.AddState(state); - UNIT_ASSERT(st.IsChanged()); - st.ApplyChanges(); - UNIT_ASSERT_VALUES_EQUAL(st.GetStatus(), EPDiskStatus::FAULTY); - - auto it = DefaultStateLimits.find(NKikimrBlobStorage::TPDiskState::Normal); - const ui32 stateLimit = (it != DefaultStateLimits.end()) ? it->second : DefaultStateLimit; - for (ui32 i = 1; i < stateLimit; ++i) { - st.AddState(NKikimrBlobStorage::TPDiskState::Normal); - - if (i == 1) { - UNIT_ASSERT(st.IsChanged()); - st.ApplyChanges(); - } else { - UNIT_ASSERT(!st.IsChanged()); - } - UNIT_ASSERT_VALUES_EQUAL(st.GetStatus(), EPDiskStatus::INACTIVE); - } - st.AddState(NKikimrBlobStorage::TPDiskState::Normal); - UNIT_ASSERT(st.IsChanged()); - st.ApplyChanges(); - UNIT_ASSERT_VALUES_EQUAL(st.GetStatus(), EPDiskStatus::ACTIVE); - } - } - + Y_UNIT_TEST(PDiskInactiveAfterStateChange) { + for (const EPDiskState state : ErrorStates) { + const EPDiskStatus initialStatus = EPDiskStatus::ACTIVE; + TPDiskStatus st(initialStatus, DefaultStateLimit, DefaultStateLimits); + + for (ui32 i = 1; i < DefaultStateLimits[state]; ++i) { + st.AddState(state); + + UNIT_ASSERT(!st.IsChanged()); + UNIT_ASSERT_VALUES_EQUAL(st.GetStatus(), initialStatus); + } + st.AddState(state); + UNIT_ASSERT(st.IsChanged()); + st.ApplyChanges(); + UNIT_ASSERT_VALUES_EQUAL(st.GetStatus(), EPDiskStatus::FAULTY); + + auto it = DefaultStateLimits.find(NKikimrBlobStorage::TPDiskState::Normal); + const ui32 stateLimit = (it != DefaultStateLimits.end()) ? it->second : DefaultStateLimit; + for (ui32 i = 1; i < stateLimit; ++i) { + st.AddState(NKikimrBlobStorage::TPDiskState::Normal); + + if (i == 1) { + UNIT_ASSERT(st.IsChanged()); + st.ApplyChanges(); + } else { + UNIT_ASSERT(!st.IsChanged()); + } + UNIT_ASSERT_VALUES_EQUAL(st.GetStatus(), EPDiskStatus::INACTIVE); + } + st.AddState(NKikimrBlobStorage::TPDiskState::Normal); + UNIT_ASSERT(st.IsChanged()); + st.ApplyChanges(); + UNIT_ASSERT_VALUES_EQUAL(st.GetStatus(), EPDiskStatus::ACTIVE); + } + } + Y_UNIT_TEST(PDiskFaultyState) { for (const EPDiskState state : FaultyStates) { const EPDiskStatus initialStatus = EPDiskStatus::ACTIVE; @@ -136,15 +136,15 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { } } - TCmsStatePtr MockCmsState(ui16 numDataCenter, ui16 racksPerDataCenter, ui16 nodesPerRack, bool anyDC, bool anyRack) { + TCmsStatePtr MockCmsState(ui16 numDataCenter, ui16 racksPerDataCenter, ui16 nodesPerRack, bool anyDC, bool anyRack) { TCmsStatePtr state = new TCmsState; state->ClusterInfo = new TClusterInfo; - for (ui64 dc : xrange(numDataCenter)) { - for (ui64 rack : xrange(racksPerDataCenter)) { - for (ui64 node : xrange(nodesPerRack)) { - const ui64 id = (dc << 32) | (rack << 16) | node; - const TString name = TStringBuilder() << "dc_" << dc << "-rack_" << rack << "-node_" << node; + for (ui64 dc : xrange(numDataCenter)) { + for (ui64 rack : xrange(racksPerDataCenter)) { + for (ui64 node : xrange(nodesPerRack)) { + const ui64 id = (dc << 32) | (rack << 16) | node; + const TString name = TStringBuilder() << "dc_" << dc << "-rack_" << rack << "-node_" << node; NActorsInterconnect::TNodeLocation location; if (!anyDC) { @@ -157,12 +157,12 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { state->ClusterInfo->AddNode(TEvInterconnect::TNodeInfo(id, name, name, name, 10000, TNodeLocation(location)), nullptr); - NKikimrBlobStorage::TBaseConfig::TPDisk pdisk; - pdisk.SetNodeId(id); - pdisk.SetPDiskId(0); - pdisk.SetPath("pdisk.data"); - state->ClusterInfo->AddPDisk(pdisk); - } + NKikimrBlobStorage::TBaseConfig::TPDisk pdisk; + pdisk.SetNodeId(id); + pdisk.SetPDiskId(0); + pdisk.SetPath("pdisk.data"); + state->ClusterInfo->AddPDisk(pdisk); + } } } @@ -173,7 +173,7 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { UNIT_ASSERT(!anyDC || numDataCenter == 1); for (ui16 nodesPerDataCenter : nodesPerDataCenterVariants) { - TCmsStatePtr state = MockCmsState(numDataCenter, nodesPerDataCenter, 1, anyDC, false); + TCmsStatePtr state = MockCmsState(numDataCenter, nodesPerDataCenter, 1, anyDC, false); TGuardian all(state); TGuardian changed(state, 50); THashSet<TPDiskID, TPDiskIDHash> changedSet; @@ -182,11 +182,11 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { TVector<ui32> changedCount(numDataCenter); for (const auto& node : nodes) { - const ui64 nodeId = node.second->NodeId; + const ui64 nodeId = node.second->NodeId; const TPDiskID id(nodeId, 0); all.AddPDisk(id); - if (changedCount[nodeId >> 32]++ < (nodesPerDataCenter / 2)) { + if (changedCount[nodeId >> 32]++ < (nodesPerDataCenter / 2)) { changed.AddPDisk(id); changedSet.insert(id); } @@ -201,10 +201,10 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { changedCount.assign(numDataCenter, 0); for (const auto& node : nodes) { - const ui64 nodeId = node.second->NodeId; + const ui64 nodeId = node.second->NodeId; const TPDiskID id(nodeId, 0); - if (changedCount[nodeId >> 32]++ < ((nodesPerDataCenter / 2) + 1)) { + if (changedCount[nodeId >> 32]++ < ((nodesPerDataCenter / 2) + 1)) { changed.AddPDisk(id); changedSet.insert(id); } @@ -229,71 +229,71 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { GuardianDataCenterRatio(1, {3, 4, 5}, true); } - void GuardianRackRatio(ui16 numRacks, const TVector<ui16>& nodesPerRackVariants, bool anyRack) { - for (ui16 nodesPerRack : nodesPerRackVariants) { - TCmsStatePtr state = MockCmsState(1, numRacks, nodesPerRack, false, anyRack); - - TGuardian all(state); - TGuardian changed(state, 100, 100, 50); - THashSet<TPDiskID, TPDiskIDHash> changedSet; - - const auto& nodes = state->ClusterInfo->AllNodes(); - - TVector<ui32> changedCount(numRacks); - for (const auto& node : nodes) { - const ui64 nodeId = node.second->NodeId; - const TPDiskID id(nodeId, 0); - - all.AddPDisk(id); - if (changedCount[nodeId >> 16]++ < nodesPerRack / 2) { - changed.AddPDisk(id); - changedSet.insert(id); - } - } - - TString issues; - THashSet<TPDiskID, TPDiskIDHash> disallowed; - - UNIT_ASSERT_VALUES_EQUAL(changed.GetAllowedPDisks(all, issues, disallowed), changedSet); - UNIT_ASSERT(disallowed.empty()); - UNIT_ASSERT(issues.empty()); - - changedCount.assign(numRacks, 0); - for (const auto& node : nodes) { - const ui64 nodeId = node.second->NodeId; - const TPDiskID id(nodeId, 0); - - if (changedCount[nodeId >> 16]++ < nodesPerRack / 2 + 1) { - changed.AddPDisk(id); - changedSet.insert(id); - } - } - - disallowed.clear(); - const auto& allowed = changed.GetAllowedPDisks(all, issues, disallowed); - - if (anyRack || nodesPerRack == 1) { - UNIT_ASSERT_VALUES_EQUAL(allowed, changedSet); - UNIT_ASSERT(disallowed.empty()); - UNIT_ASSERT(issues.empty()); - } else { - UNIT_ASSERT_VALUES_EQUAL(allowed, decltype(allowed){}); - UNIT_ASSERT_VALUES_EQUAL(disallowed, changedSet); - UNIT_ASSERT_STRING_CONTAINS(issues, "due to RackRatio"); - } - } - } - - Y_UNIT_TEST(GuardianRackRatio) { - for (int anyRack = 0; anyRack < 2; ++anyRack) { - for (int numRacks = 1; numRacks < 5; ++numRacks) { - GuardianRackRatio(numRacks, {1, 2, 3, 4, 5}, anyRack); - - } - } - } - - + void GuardianRackRatio(ui16 numRacks, const TVector<ui16>& nodesPerRackVariants, bool anyRack) { + for (ui16 nodesPerRack : nodesPerRackVariants) { + TCmsStatePtr state = MockCmsState(1, numRacks, nodesPerRack, false, anyRack); + + TGuardian all(state); + TGuardian changed(state, 100, 100, 50); + THashSet<TPDiskID, TPDiskIDHash> changedSet; + + const auto& nodes = state->ClusterInfo->AllNodes(); + + TVector<ui32> changedCount(numRacks); + for (const auto& node : nodes) { + const ui64 nodeId = node.second->NodeId; + const TPDiskID id(nodeId, 0); + + all.AddPDisk(id); + if (changedCount[nodeId >> 16]++ < nodesPerRack / 2) { + changed.AddPDisk(id); + changedSet.insert(id); + } + } + + TString issues; + THashSet<TPDiskID, TPDiskIDHash> disallowed; + + UNIT_ASSERT_VALUES_EQUAL(changed.GetAllowedPDisks(all, issues, disallowed), changedSet); + UNIT_ASSERT(disallowed.empty()); + UNIT_ASSERT(issues.empty()); + + changedCount.assign(numRacks, 0); + for (const auto& node : nodes) { + const ui64 nodeId = node.second->NodeId; + const TPDiskID id(nodeId, 0); + + if (changedCount[nodeId >> 16]++ < nodesPerRack / 2 + 1) { + changed.AddPDisk(id); + changedSet.insert(id); + } + } + + disallowed.clear(); + const auto& allowed = changed.GetAllowedPDisks(all, issues, disallowed); + + if (anyRack || nodesPerRack == 1) { + UNIT_ASSERT_VALUES_EQUAL(allowed, changedSet); + UNIT_ASSERT(disallowed.empty()); + UNIT_ASSERT(issues.empty()); + } else { + UNIT_ASSERT_VALUES_EQUAL(allowed, decltype(allowed){}); + UNIT_ASSERT_VALUES_EQUAL(disallowed, changedSet); + UNIT_ASSERT_STRING_CONTAINS(issues, "due to RackRatio"); + } + } + } + + Y_UNIT_TEST(GuardianRackRatio) { + for (int anyRack = 0; anyRack < 2; ++anyRack) { + for (int numRacks = 1; numRacks < 5; ++numRacks) { + GuardianRackRatio(numRacks, {1, 2, 3, 4, 5}, anyRack); + + } + } + } + + } // TSentinelBaseTests Y_UNIT_TEST_SUITE(TSentinelTests) { @@ -318,8 +318,8 @@ Y_UNIT_TEST_SUITE(TSentinelTests) { } const TString name = TStringBuilder() << "node-" << nodeId; - NActorsInterconnect::TNodeLocation location; - location.SetRack(TStringBuilder() << "rack-" << (nodeId - 1) % 8 + 1); + NActorsInterconnect::TNodeLocation location; + location.SetRack(TStringBuilder() << "rack-" << (nodeId - 1) % 8 + 1); info->AddNode(TEvInterconnect::TNodeInfo(nodeId, name, name, name, 10000, TNodeLocation(location)), nullptr); info->AddPDisk(pdisk); } @@ -332,17 +332,17 @@ Y_UNIT_TEST_SUITE(TSentinelTests) { UNIT_ASSERT(DispatchEvents(options)); } - void SetPDiskStateImpl(const TSet<TPDiskID>& ids, EPDiskState state) { - for (const auto& id : ids) { - Y_VERIFY(MockNodes.contains(id.NodeId)); - auto& node = MockNodes.at(id.NodeId); + void SetPDiskStateImpl(const TSet<TPDiskID>& ids, EPDiskState state) { + for (const auto& id : ids) { + Y_VERIFY(MockNodes.contains(id.NodeId)); + auto& node = MockNodes.at(id.NodeId); + + Y_VERIFY(node.PDiskStateInfo.contains(id.DiskId)); + auto& pdisk = node.PDiskStateInfo.at(id.DiskId); - Y_VERIFY(node.PDiskStateInfo.contains(id.DiskId)); - auto& pdisk = node.PDiskStateInfo.at(id.DiskId); + pdisk.SetState(state); + } - pdisk.SetState(state); - } - Send(new IEventHandle(Sentinel, TActorId(), new TEvSentinel::TEvUpdateState)); } @@ -372,7 +372,7 @@ Y_UNIT_TEST_SUITE(TSentinelTests) { EnableScheduleForActor(Sentinel, true); WaitForSentinelBoot(); - SetLogPriority(NKikimrServices::CMS, NLog::PRI_DEBUG); + SetLogPriority(NKikimrServices::CMS, NLog::PRI_DEBUG); } TPDiskID RandomPDiskID() const { @@ -381,58 +381,58 @@ Y_UNIT_TEST_SUITE(TSentinelTests) { return TPDiskID(pdisk.GetNodeId(), pdisk.GetPDiskId()); } - TSet<TPDiskID> PDisksForRandomRack() const { - auto nodes = State->ClusterInfo->AllNodes(); - size_t idx = RandomNumber(nodes.size() - 1); - auto target = std::next(nodes.begin(), idx)->second; - - TString targetRack = target->Location.HasKey(TNodeLocation::TKeys::Rack) - ? target->Location.GetRackId() - : ""; - - TSet<TPDiskID> res; - for (const auto& [id, info] : nodes) { - TString foundRack = info->Location.HasKey(TNodeLocation::TKeys::Rack) - ? info->Location.GetRackId() - : ""; - if (targetRack == foundRack) { - std::copy(info->PDisks.begin(), info->PDisks.end(), std::inserter(res, res.begin())); - } - } - return res; - } - - TSet<TPDiskID> PDisksForRandomNode() const { - auto nodes = State->ClusterInfo->AllNodes(); - size_t idx = RandomNumber(nodes.size() - 1); - - auto info = std::next(nodes.begin(), idx)->second; - Y_VERIFY(info); - return info->PDisks; - } - - void SetPDiskState(const TSet<TPDiskID>& pdisks, EPDiskState state) { - SetPDiskStateImpl(pdisks, state); - + TSet<TPDiskID> PDisksForRandomRack() const { + auto nodes = State->ClusterInfo->AllNodes(); + size_t idx = RandomNumber(nodes.size() - 1); + auto target = std::next(nodes.begin(), idx)->second; + + TString targetRack = target->Location.HasKey(TNodeLocation::TKeys::Rack) + ? target->Location.GetRackId() + : ""; + + TSet<TPDiskID> res; + for (const auto& [id, info] : nodes) { + TString foundRack = info->Location.HasKey(TNodeLocation::TKeys::Rack) + ? info->Location.GetRackId() + : ""; + if (targetRack == foundRack) { + std::copy(info->PDisks.begin(), info->PDisks.end(), std::inserter(res, res.begin())); + } + } + return res; + } + + TSet<TPDiskID> PDisksForRandomNode() const { + auto nodes = State->ClusterInfo->AllNodes(); + size_t idx = RandomNumber(nodes.size() - 1); + + auto info = std::next(nodes.begin(), idx)->second; + Y_VERIFY(info); + return info->PDisks; + } + + void SetPDiskState(const TSet<TPDiskID>& pdisks, EPDiskState state) { + SetPDiskStateImpl(pdisks, state); + TDispatchOptions options; - options.FinalEvents.emplace_back(TEvSentinel::TEvStateUpdated::EventType, 1); + options.FinalEvents.emplace_back(TEvSentinel::TEvStateUpdated::EventType, 1); UNIT_ASSERT(DispatchEvents(options)); } - void SetPDiskState(const TSet<TPDiskID>& pdisks, EPDiskState state, EPDiskStatus expectedStatus) { - SetPDiskStateImpl(pdisks, state); + void SetPDiskState(const TSet<TPDiskID>& pdisks, EPDiskState state, EPDiskStatus expectedStatus) { + SetPDiskStateImpl(pdisks, state); bool stateUpdated = false; - struct TPDiskUpdates { - bool UpdateStatusRequested = false; - bool StatusChanged = false; - }; - THashMap<TPDiskID, TPDiskUpdates, TPDiskIDHash> pdiskUpdates; - for (const auto& id : pdisks) { - pdiskUpdates[id] = {};// TPDiskUpdates {false, false}); - } - + struct TPDiskUpdates { + bool UpdateStatusRequested = false; + bool StatusChanged = false; + }; + THashMap<TPDiskID, TPDiskUpdates, TPDiskIDHash> pdiskUpdates; + for (const auto& id : pdisks) { + pdiskUpdates[id] = {};// TPDiskUpdates {false, false}); + } + auto check = [&](IEventHandle& ev) { switch (ev.GetTypeRewrite()) { case TEvSentinel::TEvStateUpdated::EventType: @@ -440,29 +440,29 @@ Y_UNIT_TEST_SUITE(TSentinelTests) { break; case TEvBlobStorage::TEvControllerConfigRequest::EventType: - { + { const auto& request = ev.Get<TEvBlobStorage::TEvControllerConfigRequest>()->Record; UNIT_ASSERT(request.HasRequest()); if (request.GetRequest().CommandSize() && request.GetRequest().GetCommand(0).HasUpdateDriveStatus()) { - const auto& update = request.GetRequest().GetCommand(0).GetUpdateDriveStatus(); - TPDiskID id(update.GetHostKey().GetNodeId(), update.GetPDiskId()); - auto it = pdiskUpdates.find(id); - if (it != pdiskUpdates.end()) { - it->second.UpdateStatusRequested = (expectedStatus == update.GetStatus()); - } + const auto& update = request.GetRequest().GetCommand(0).GetUpdateDriveStatus(); + TPDiskID id(update.GetHostKey().GetNodeId(), update.GetPDiskId()); + auto it = pdiskUpdates.find(id); + if (it != pdiskUpdates.end()) { + it->second.UpdateStatusRequested = (expectedStatus == update.GetStatus()); + } } } break; case TEvSentinel::TEvStatusChanged::EventType: - { + { const auto* event = ev.Get<TEvSentinel::TEvStatusChanged>(); - auto it = pdiskUpdates.find(event->Id); - if (it != pdiskUpdates.end()) { + auto it = pdiskUpdates.find(event->Id); + if (it != pdiskUpdates.end()) { UNIT_ASSERT(event->Success); - it->second.StatusChanged = true; + it->second.StatusChanged = true; } } break; @@ -471,13 +471,13 @@ Y_UNIT_TEST_SUITE(TSentinelTests) { break; } - bool allUpdateStatusRequested = true; - bool allStatusChanged = true; - for (const auto& [id, info] : pdiskUpdates) { - allUpdateStatusRequested &= info.UpdateStatusRequested; - allStatusChanged &= info.StatusChanged; - } - return stateUpdated && pdiskUpdates.size() && allUpdateStatusRequested && allStatusChanged; + bool allUpdateStatusRequested = true; + bool allStatusChanged = true; + for (const auto& [id, info] : pdiskUpdates) { + allUpdateStatusRequested &= info.UpdateStatusRequested; + allStatusChanged &= info.StatusChanged; + } + return stateUpdated && pdiskUpdates.size() && allUpdateStatusRequested && allStatusChanged; }; TDispatchOptions options; @@ -506,7 +506,7 @@ Y_UNIT_TEST_SUITE(TSentinelTests) { for (const auto state : reservedStates) { const TPDiskID id = env.RandomPDiskID(); - env.SetPDiskState({id}, state); + env.SetPDiskState({id}, state); } } @@ -516,8 +516,8 @@ Y_UNIT_TEST_SUITE(TSentinelTests) { for (const EPDiskState state : ErrorStates) { const TPDiskID id = env.RandomPDiskID(); - env.SetPDiskState({id}, state, EPDiskStatus::FAULTY); - env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::ACTIVE); + env.SetPDiskState({id}, state, EPDiskStatus::FAULTY); + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::ACTIVE); } } @@ -528,53 +528,53 @@ Y_UNIT_TEST_SUITE(TSentinelTests) { const TPDiskID id = env.RandomPDiskID(); for (ui32 i = 1; i < DefaultStateLimit; ++i) { - env.SetPDiskState({id}, state); + env.SetPDiskState({id}, state); + } + + env.SetPDiskState({id}, state, EPDiskStatus::FAULTY); + env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::ACTIVE); + } + } + + Y_UNIT_TEST(PDiskRackGuardHalfRack) { + TTestEnv env(16, 4); // 16 nodes are distributed into 8 racks, 2 per rack + + for (const EPDiskState state : ErrorStates) { + auto pdisks = env.PDisksForRandomNode(); + + // disks should become INACTIVE immediately after disk is broken + env.SetPDiskState(pdisks, state, EPDiskStatus::INACTIVE); + for (ui32 i = 1; i < DefaultErrorStateLimit - 1; ++i) { + env.SetPDiskState(pdisks, state); } + // for half of rack pdisks is expected to become FAULTY + env.SetPDiskState(pdisks, state, EPDiskStatus::FAULTY); - env.SetPDiskState({id}, state, EPDiskStatus::FAULTY); - env.SetPDiskState({id}, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::ACTIVE); + env.SetPDiskState(pdisks, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::INACTIVE); + for (ui32 i = 1; i < DefaultStateLimit - 1; ++i) { + env.SetPDiskState(pdisks, NKikimrBlobStorage::TPDiskState::Normal); + } + env.SetPDiskState(pdisks, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::ACTIVE); } } - Y_UNIT_TEST(PDiskRackGuardHalfRack) { - TTestEnv env(16, 4); // 16 nodes are distributed into 8 racks, 2 per rack - - for (const EPDiskState state : ErrorStates) { - auto pdisks = env.PDisksForRandomNode(); - - // disks should become INACTIVE immediately after disk is broken - env.SetPDiskState(pdisks, state, EPDiskStatus::INACTIVE); - for (ui32 i = 1; i < DefaultErrorStateLimit - 1; ++i) { - env.SetPDiskState(pdisks, state); - } - // for half of rack pdisks is expected to become FAULTY - env.SetPDiskState(pdisks, state, EPDiskStatus::FAULTY); - - env.SetPDiskState(pdisks, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::INACTIVE); - for (ui32 i = 1; i < DefaultStateLimit - 1; ++i) { - env.SetPDiskState(pdisks, NKikimrBlobStorage::TPDiskState::Normal); - } - env.SetPDiskState(pdisks, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::ACTIVE); - } - } - - Y_UNIT_TEST(PDiskRackGuardFullRack) { - TTestEnv env(16, 4); // 16 nodes are distributed into 8 racks, 2 per rack - - for (const EPDiskState state : ErrorStates) { - auto pdisks = env.PDisksForRandomRack(); - - // disks should become INACTIVE immediately after disk is broken - env.SetPDiskState(pdisks, state, EPDiskStatus::INACTIVE); - for (ui32 i = 1; i < DefaultErrorStateLimit; ++i) { - env.SetPDiskState(pdisks, state); - } - - // for full rack pdisks is not expected to become FAULTY, so they become ACTIVE immediatetly - // after pdisk becomes Normal - env.SetPDiskState(pdisks, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::ACTIVE); - } - } + Y_UNIT_TEST(PDiskRackGuardFullRack) { + TTestEnv env(16, 4); // 16 nodes are distributed into 8 racks, 2 per rack + + for (const EPDiskState state : ErrorStates) { + auto pdisks = env.PDisksForRandomRack(); + + // disks should become INACTIVE immediately after disk is broken + env.SetPDiskState(pdisks, state, EPDiskStatus::INACTIVE); + for (ui32 i = 1; i < DefaultErrorStateLimit; ++i) { + env.SetPDiskState(pdisks, state); + } + + // for full rack pdisks is not expected to become FAULTY, so they become ACTIVE immediatetly + // after pdisk becomes Normal + env.SetPDiskState(pdisks, NKikimrBlobStorage::TPDiskState::Normal, EPDiskStatus::ACTIVE); + } + } } // TSentinelTests } // NCmsTest diff --git a/ydb/core/control/defs.h b/ydb/core/control/defs.h index 98541686e9..0d2ba71fe8 100644 --- a/ydb/core/control/defs.h +++ b/ydb/core/control/defs.h @@ -1,2 +1,2 @@ -#pragma once +#pragma once #include <ydb/core/base/defs.h> diff --git a/ydb/core/control/immediate_control_board_actor.cpp b/ydb/core/control/immediate_control_board_actor.cpp index e4223f2104..bcb4a3344c 100644 --- a/ydb/core/control/immediate_control_board_actor.cpp +++ b/ydb/core/control/immediate_control_board_actor.cpp @@ -1,135 +1,135 @@ -#include "immediate_control_board_actor.h" - +#include "immediate_control_board_actor.h" + #include <ydb/core/mon/mon.h> #include <ydb/core/base/appdata.h> #include <ydb/core/base/counters.h> #include <library/cpp/monlib/dynamic_counters/counters.h> #include <library/cpp/monlib/service/pages/templates.h> - -namespace NKikimr { - -using namespace NActors; - -class TImmediateControlActor : public TActorBootstrapped<TImmediateControlActor> { - struct TLogRecord { - TInstant Timestamp; - TString ParamName; - TAtomicBase PrevValue; - TAtomicBase NewValue; - - TLogRecord(TInstant timestamp, TString paramName, TAtomicBase prevValue, TAtomicBase newValue) - : Timestamp(timestamp) - , ParamName(paramName) - , PrevValue(prevValue) - , NewValue(newValue) - {} - - TString TimestampToStr() { - struct tm t_p; - Timestamp.LocalTime(&t_p); - return Sprintf("%4d-%02d-%02d %02d:%02d:%02d", (int)t_p.tm_year + 1900, (int)t_p.tm_mon + 1, - (int)t_p.tm_mday, (int)t_p.tm_hour, (int)t_p.tm_min, (int)t_p.tm_sec); - } - }; - - TMutex Mtx; - TIntrusivePtr<TControlBoard> Board; + +namespace NKikimr { + +using namespace NActors; + +class TImmediateControlActor : public TActorBootstrapped<TImmediateControlActor> { + struct TLogRecord { + TInstant Timestamp; + TString ParamName; + TAtomicBase PrevValue; + TAtomicBase NewValue; + + TLogRecord(TInstant timestamp, TString paramName, TAtomicBase prevValue, TAtomicBase newValue) + : Timestamp(timestamp) + , ParamName(paramName) + , PrevValue(prevValue) + , NewValue(newValue) + {} + + TString TimestampToStr() { + struct tm t_p; + Timestamp.LocalTime(&t_p); + return Sprintf("%4d-%02d-%02d %02d:%02d:%02d", (int)t_p.tm_year + 1900, (int)t_p.tm_mon + 1, + (int)t_p.tm_mday, (int)t_p.tm_hour, (int)t_p.tm_min, (int)t_p.tm_sec); + } + }; + + TMutex Mtx; + TIntrusivePtr<TControlBoard> Board; TVector<TLogRecord> HistoryLog; - - NMonitoring::TDynamicCounters::TCounterPtr HasChanged; - NMonitoring::TDynamicCounters::TCounterPtr ChangedCount; - -public: + + NMonitoring::TDynamicCounters::TCounterPtr HasChanged; + NMonitoring::TDynamicCounters::TCounterPtr ChangedCount; + +public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::IMMEDIATE_CONTROL_BOARD; } - TImmediateControlActor(TIntrusivePtr<TControlBoard> board, - const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters) - : Board(board) - { + TImmediateControlActor(TIntrusivePtr<TControlBoard> board, + const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters) + : Board(board) + { TIntrusivePtr<NMonitoring::TDynamicCounters> IcbGroup = GetServiceCounters(counters, "utils"); - HasChanged = IcbGroup->GetCounter("Icb/HasChangedContol"); - ChangedCount = IcbGroup->GetCounter("Icb/ChangedControlsCount"); - } - - - void Bootstrap(const TActorContext &ctx) { - auto mon = AppData(ctx)->Mon; - if (mon) { - NMonitoring::TIndexMonPage *actorsMonPage = mon->RegisterIndexPage("actors", "Actors"); - mon->RegisterActorPage(actorsMonPage, "icb", "Immediate Control Board", false, - ctx.ExecutorThread.ActorSystem, ctx.SelfID); - } - Become(&TThis::StateFunc); - } - -private: - void HandlePostParams(const TCgiParameters &cgi) { - if (cgi.Has("restoreDefaults")) { - Board->RestoreDefaults(); - HistoryLog.emplace_back(TInstant::Now(), "RestoreDefaults", 0, 0); - *HasChanged = 0; - *ChangedCount = 0; - } - for (const auto ¶m : cgi) { + HasChanged = IcbGroup->GetCounter("Icb/HasChangedContol"); + ChangedCount = IcbGroup->GetCounter("Icb/ChangedControlsCount"); + } + + + void Bootstrap(const TActorContext &ctx) { + auto mon = AppData(ctx)->Mon; + if (mon) { + NMonitoring::TIndexMonPage *actorsMonPage = mon->RegisterIndexPage("actors", "Actors"); + mon->RegisterActorPage(actorsMonPage, "icb", "Immediate Control Board", false, + ctx.ExecutorThread.ActorSystem, ctx.SelfID); + } + Become(&TThis::StateFunc); + } + +private: + void HandlePostParams(const TCgiParameters &cgi) { + if (cgi.Has("restoreDefaults")) { + Board->RestoreDefaults(); + HistoryLog.emplace_back(TInstant::Now(), "RestoreDefaults", 0, 0); + *HasChanged = 0; + *ChangedCount = 0; + } + for (const auto ¶m : cgi) { TAtomicBase newValue = strtoull(param.second.data(), nullptr, 10); - TAtomicBase prevValue = newValue; - bool isDefault = Board->SetValue(param.first, newValue, prevValue); - if (prevValue != newValue) { - HistoryLog.emplace_back(TInstant::Now(), param.first, prevValue, newValue); - if (isDefault) { - ChangedCount->Dec(); - } else { - ChangedCount->Inc(); - } - *HasChanged = (ui64)ChangedCount->Val() > 0; - } - } - } - - void Handle(NMon::TEvHttpInfo::TPtr &ev, const TActorContext &ctx) { + TAtomicBase prevValue = newValue; + bool isDefault = Board->SetValue(param.first, newValue, prevValue); + if (prevValue != newValue) { + HistoryLog.emplace_back(TInstant::Now(), param.first, prevValue, newValue); + if (isDefault) { + ChangedCount->Dec(); + } else { + ChangedCount->Inc(); + } + *HasChanged = (ui64)ChangedCount->Val() > 0; + } + } + } + + void Handle(NMon::TEvHttpInfo::TPtr &ev, const TActorContext &ctx) { HTTP_METHOD method = ev->Get()->Request.GetMethod(); - if (method == HTTP_METHOD_POST) { + if (method == HTTP_METHOD_POST) { HandlePostParams(ev->Get()->Request.GetPostParams()); - } - TStringStream str; - str << Board->RenderAsHtml(); - HTML(str) { - str << "<h3>History</h3>"; - TABLE_SORTABLE_CLASS("historyLogTable") { - TABLEHEAD() { - TABLER() { - TABLEH() {str << "Timestamp"; } - TABLEH() {str << "Parameter"; } - TABLEH() {str << "PrevValue"; } - TABLEH() {str << "NewValue"; } - } - } - TABLEBODY() { - for (auto &record : HistoryLog) { - TABLER() { - TABLED() { str << record.TimestampToStr(); } - TABLED() { str << record.ParamName; } - TABLED() { str << record.PrevValue; } - TABLED() { str << record.NewValue; } - } - } - } - } - } - ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str())); - } - - STFUNC(StateFunc) { - switch(ev->GetTypeRewrite()) { - HFunc(NMon::TEvHttpInfo, Handle); - } - } -}; - -NActors::IActor* CreateImmediateControlActor(TIntrusivePtr<TControlBoard> board, - const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters) { - return new NKikimr::TImmediateControlActor(board, counters); -} -}; + } + TStringStream str; + str << Board->RenderAsHtml(); + HTML(str) { + str << "<h3>History</h3>"; + TABLE_SORTABLE_CLASS("historyLogTable") { + TABLEHEAD() { + TABLER() { + TABLEH() {str << "Timestamp"; } + TABLEH() {str << "Parameter"; } + TABLEH() {str << "PrevValue"; } + TABLEH() {str << "NewValue"; } + } + } + TABLEBODY() { + for (auto &record : HistoryLog) { + TABLER() { + TABLED() { str << record.TimestampToStr(); } + TABLED() { str << record.ParamName; } + TABLED() { str << record.PrevValue; } + TABLED() { str << record.NewValue; } + } + } + } + } + } + ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str())); + } + + STFUNC(StateFunc) { + switch(ev->GetTypeRewrite()) { + HFunc(NMon::TEvHttpInfo, Handle); + } + } +}; + +NActors::IActor* CreateImmediateControlActor(TIntrusivePtr<TControlBoard> board, + const TIntrusivePtr<NMonitoring::TDynamicCounters>& counters) { + return new NKikimr::TImmediateControlActor(board, counters); +} +}; diff --git a/ydb/core/control/immediate_control_board_actor.h b/ydb/core/control/immediate_control_board_actor.h index 541fbc3f0e..0b2cff4cf6 100644 --- a/ydb/core/control/immediate_control_board_actor.h +++ b/ydb/core/control/immediate_control_board_actor.h @@ -1,20 +1,20 @@ -#pragma once -#include "defs.h" -#include "immediate_control_board_impl.h" - +#pragma once +#include "defs.h" +#include "immediate_control_board_impl.h" + #include <library/cpp/monlib/dynamic_counters/counters.h> #include <library/cpp/actors/core/actor.h> #include <library/cpp/actors/core/actor_bootstrapped.h> - -namespace NKikimr { - + +namespace NKikimr { + inline NActors::TActorId MakeIcbId(ui32 node) { - char x[12] = {'i','c','b','_','a','c','t','o','r'}; + char x[12] = {'i','c','b','_','a','c','t','o','r'}; return NActors::TActorId(node, TStringBuf(x, 12)); -} - -class TImmediateControlActor; - -NActors::IActor* CreateImmediateControlActor(TIntrusivePtr<TControlBoard> board, const TIntrusivePtr<NMonitoring::TDynamicCounters> &counters); - -} +} + +class TImmediateControlActor; + +NActors::IActor* CreateImmediateControlActor(TIntrusivePtr<TControlBoard> board, const TIntrusivePtr<NMonitoring::TDynamicCounters> &counters); + +} diff --git a/ydb/core/control/immediate_control_board_actor_ut.cpp b/ydb/core/control/immediate_control_board_actor_ut.cpp index ff528005eb..2753077d93 100644 --- a/ydb/core/control/immediate_control_board_actor_ut.cpp +++ b/ydb/core/control/immediate_control_board_actor_ut.cpp @@ -1,7 +1,7 @@ -#include "defs.h" -#include "immediate_control_board_actor.h" -#include "immediate_control_board_wrapper.h" - +#include "defs.h" +#include "immediate_control_board_actor.h" +#include "immediate_control_board_wrapper.h" + #include <library/cpp/actors/interconnect/interconnect.h> #include <ydb/core/mon/mon.h> #include <ydb/core/base/appdata.h> @@ -16,110 +16,110 @@ #include <library/cpp/actors/core/scheduler_basic.h> #include <library/cpp/testing/unittest/registar.h> #include <library/cpp/testing/unittest/tests_data.h> - -#include <util/generic/string.h> -#include <util/generic/yexception.h> - - -namespace NKikimr { - -constexpr ui32 TEST_TIMEOUT = NSan::PlainOrUnderSanitizer(300000, 1200000); - - -#define ASSERT_YTHROW(expr, str) \ -do { \ - if (!(expr)) { \ - ythrow TWithBackTrace<yexception>() << str; \ - } \ -} while(false) - - -#define VERBOSE_COUT(str) \ -do { \ - if (IsVerbose) { \ - Cerr << str << Endl; \ - } \ -} while(false) - - -static bool IsVerbose = false; - -static THolder<TActorSystem> ActorSystem; - -static TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; -static THolder<NActors::TMon> Monitoring; - -static TAtomic DoneCounter = 0; + +#include <util/generic/string.h> +#include <util/generic/yexception.h> + + +namespace NKikimr { + +constexpr ui32 TEST_TIMEOUT = NSan::PlainOrUnderSanitizer(300000, 1200000); + + +#define ASSERT_YTHROW(expr, str) \ +do { \ + if (!(expr)) { \ + ythrow TWithBackTrace<yexception>() << str; \ + } \ +} while(false) + + +#define VERBOSE_COUT(str) \ +do { \ + if (IsVerbose) { \ + Cerr << str << Endl; \ + } \ +} while(false) + + +static bool IsVerbose = false; + +static THolder<TActorSystem> ActorSystem; + +static TIntrusivePtr<NMonitoring::TDynamicCounters> Counters; +static THolder<NActors::TMon> Monitoring; + +static TAtomic DoneCounter = 0; static TSystemEvent DoneEvent(TSystemEvent::rAuto); -static yexception LastException; -static volatile bool IsLastExceptionSet = false; - - -static void SignalDoneEvent() { - AtomicIncrement(DoneCounter); - DoneEvent.Signal(); -} - -struct TTestConfig { +static yexception LastException; +static volatile bool IsLastExceptionSet = false; + + +static void SignalDoneEvent() { + AtomicIncrement(DoneCounter); + DoneEvent.Signal(); +} + +struct TTestConfig { TActorId IcbActorId; - TControlBoard *Icb; - + TControlBoard *Icb; + TTestConfig(TActorId icbActorId, TControlBoard *icb) - : IcbActorId(icbActorId) - , Icb(icb) - {} -}; - -template <class T> -static void Run(i64 instances = 1) { + : IcbActorId(icbActorId) + , Icb(icb) + {} +}; + +template <class T> +static void Run(i64 instances = 1) { TVector<TActorId> testIds; TAppData appData(0, 0, 0, 0, TMap<TString, ui32>(), nullptr, nullptr, nullptr, nullptr); - - try { - Counters = TIntrusivePtr<NMonitoring::TDynamicCounters>(new NMonitoring::TDynamicCounters()); - - testIds.resize(instances); - - TIntrusivePtr<TTableNameserverSetup> nameserverTable(new TTableNameserverSetup()); - TPortManager pm; - nameserverTable->StaticNodeTable[1] = std::pair<TString, ui32>("127.0.0.1", pm.GetPort(12001)); - nameserverTable->StaticNodeTable[2] = std::pair<TString, ui32>("127.0.0.1", pm.GetPort(12002)); - - THolder<TActorSystemSetup> setup(new TActorSystemSetup()); - setup->NodeId = 1; - setup->ExecutorsCount = 3; - setup->Executors.Reset(new TAutoPtr<IExecutorPool>[3]); - setup->Executors[0].Reset(new TBasicExecutorPool(0, 2, 20)); - setup->Executors[1].Reset(new TBasicExecutorPool(1, 2, 20)); - setup->Executors[2].Reset(new TIOExecutorPool(2, 10)); - setup->Scheduler.Reset(new TBasicSchedulerThread(TSchedulerConfig(512, 100))); - + + try { + Counters = TIntrusivePtr<NMonitoring::TDynamicCounters>(new NMonitoring::TDynamicCounters()); + + testIds.resize(instances); + + TIntrusivePtr<TTableNameserverSetup> nameserverTable(new TTableNameserverSetup()); + TPortManager pm; + nameserverTable->StaticNodeTable[1] = std::pair<TString, ui32>("127.0.0.1", pm.GetPort(12001)); + nameserverTable->StaticNodeTable[2] = std::pair<TString, ui32>("127.0.0.1", pm.GetPort(12002)); + + THolder<TActorSystemSetup> setup(new TActorSystemSetup()); + setup->NodeId = 1; + setup->ExecutorsCount = 3; + setup->Executors.Reset(new TAutoPtr<IExecutorPool>[3]); + setup->Executors[0].Reset(new TBasicExecutorPool(0, 2, 20)); + setup->Executors[1].Reset(new TBasicExecutorPool(1, 2, 20)); + setup->Executors[2].Reset(new TIOExecutorPool(2, 10)); + setup->Scheduler.Reset(new TBasicSchedulerThread(TSchedulerConfig(512, 100))); + const TActorId nameserviceId = GetNameserviceActorId(); - TActorSetupCmd nameserviceSetup(CreateNameserverTable(nameserverTable), TMailboxType::Simple, 0); + TActorSetupCmd nameserviceSetup(CreateNameserverTable(nameserverTable), TMailboxType::Simple, 0); setup->LocalServices.push_back(std::pair<TActorId, TActorSetupCmd>(nameserviceId, nameserviceSetup)); - - // ICB Actor creation + + // ICB Actor creation TActorId IcbActorId = MakeIcbId(setup->NodeId); - TActorSetupCmd testSetup(CreateImmediateControlActor(appData.Icb, Counters), TMailboxType::Revolving, 0); + TActorSetupCmd testSetup(CreateImmediateControlActor(appData.Icb, Counters), TMailboxType::Revolving, 0); setup->LocalServices.push_back(std::pair<TActorId, TActorSetupCmd>(IcbActorId, testSetup)); - - - THolder<TTestConfig> testConfig(new TTestConfig(IcbActorId, appData.Icb.Get())); - for (ui32 i = 0; i < instances; ++i) { - testIds[i] = MakeBlobStorageProxyID(1 + i); - TActorSetupCmd testSetup(new T(testConfig.Get()), TMailboxType::Revolving, 0); + + + THolder<TTestConfig> testConfig(new TTestConfig(IcbActorId, appData.Icb.Get())); + for (ui32 i = 0; i < instances; ++i) { + testIds[i] = MakeBlobStorageProxyID(1 + i); + TActorSetupCmd testSetup(new T(testConfig.Get()), TMailboxType::Revolving, 0); setup->LocalServices.push_back(std::pair<TActorId, TActorSetupCmd>(testIds[i], testSetup)); - } - - AtomicSet(DoneCounter, 0); - - - /////////////////////// LOGGER /////////////////////////////////////////////// - + } + + AtomicSet(DoneCounter, 0); + + + /////////////////////// LOGGER /////////////////////////////////////////////// + NActors::TActorId loggerActorId = NActors::TActorId(1, "logger"); - TIntrusivePtr<NActors::NLog::TSettings> logSettings( - new NActors::NLog::TSettings(loggerActorId, NKikimrServices::LOGGER, NActors::NLog::PRI_ERROR, NActors::NLog::PRI_ERROR, 0)); + TIntrusivePtr<NActors::NLog::TSettings> logSettings( + new NActors::NLog::TSettings(loggerActorId, NKikimrServices::LOGGER, NActors::NLog::PRI_ERROR, NActors::NLog::PRI_ERROR, 0)); //logSettings->Append( // NActorsServices::EServiceCommon_MIN, // NActorsServices::EServiceCommon_MAX, @@ -127,161 +127,161 @@ static void Run(i64 instances = 1) { //); logSettings->Append( NKikimrServices::EServiceKikimr_MIN, - NKikimrServices::EServiceKikimr_MAX, + NKikimrServices::EServiceKikimr_MAX, NKikimrServices::EServiceKikimr_Name ); - - TString explanation; - logSettings->SetLevel(NLog::PRI_EMERG, NKikimrServices::BS_PDISK, explanation); - - NActors::TLoggerActor *loggerActor = new NActors::TLoggerActor(logSettings, NActors::CreateStderrBackend(), + + TString explanation; + logSettings->SetLevel(NLog::PRI_EMERG, NKikimrServices::BS_PDISK, explanation); + + NActors::TLoggerActor *loggerActor = new NActors::TLoggerActor(logSettings, NActors::CreateStderrBackend(), GetServiceCounters(Counters, "utils")); - NActors::TActorSetupCmd loggerActorCmd(loggerActor, NActors::TMailboxType::Simple, 2); + NActors::TActorSetupCmd loggerActorCmd(loggerActor, NActors::TMailboxType::Simple, 2); std::pair<NActors::TActorId, NActors::TActorSetupCmd> loggerActorPair(loggerActorId, loggerActorCmd); - setup->LocalServices.push_back(loggerActorPair); - ////////////////////////////////////////////////////////////////////////////// - - ActorSystem.Reset(new TActorSystem(setup, &appData, logSettings)); - - ActorSystem->Start(); - - VERBOSE_COUT("Sending TEvBoot to test"); - for (ui32 i = 0; i < instances; ++i) { + setup->LocalServices.push_back(loggerActorPair); + ////////////////////////////////////////////////////////////////////////////// + + ActorSystem.Reset(new TActorSystem(setup, &appData, logSettings)); + + ActorSystem->Start(); + + VERBOSE_COUT("Sending TEvBoot to test"); + for (ui32 i = 0; i < instances; ++i) { ActorSystem->Send(testIds[i], new TEvTablet::TEvBoot(MakeTabletID(0, 0, 1), 0, nullptr, TActorId(), nullptr)); - } - - TAtomicBase doneCount = 0; - bool isOk = true; - TInstant startTime = Now(); - while (doneCount < instances && isOk) { - ui32 msRemaining = TEST_TIMEOUT - (ui32)(Now() - startTime).MilliSeconds(); - isOk = DoneEvent.Wait(msRemaining); - doneCount = AtomicGet(DoneCounter); - } - UNIT_ASSERT_VALUES_EQUAL_C(doneCount, instances, "test timeout "); - } catch (yexception ex) { - LastException = ex; - IsLastExceptionSet = true; - VERBOSE_COUT(ex.what()); - } - - Monitoring.Destroy(); - if (ActorSystem.Get()) { - ActorSystem->Stop(); - ActorSystem.Destroy(); - } - DoneEvent.Reset(); - if (IsLastExceptionSet) { - IsLastExceptionSet = false; - ythrow LastException; - } -} - -class TBaseTest : public TActor<TBaseTest> { -protected: - struct TResponseData { - - void *Cookie; - NKikimrProto::EReplyStatus Status; - NMon::TEvHttpInfoRes *HttpResult; - - TResponseData() { - Clear(); - } - - void Clear() { - Cookie = (void*)((ui64)-1); - Status = NKikimrProto::OK; - HttpResult = nullptr; - } - - void Check() { - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&Cookie, sizeof(Cookie)); - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&Status, sizeof(Status)); - } - }; - - TResponseData LastResponse; - + } + + TAtomicBase doneCount = 0; + bool isOk = true; + TInstant startTime = Now(); + while (doneCount < instances && isOk) { + ui32 msRemaining = TEST_TIMEOUT - (ui32)(Now() - startTime).MilliSeconds(); + isOk = DoneEvent.Wait(msRemaining); + doneCount = AtomicGet(DoneCounter); + } + UNIT_ASSERT_VALUES_EQUAL_C(doneCount, instances, "test timeout "); + } catch (yexception ex) { + LastException = ex; + IsLastExceptionSet = true; + VERBOSE_COUT(ex.what()); + } + + Monitoring.Destroy(); + if (ActorSystem.Get()) { + ActorSystem->Stop(); + ActorSystem.Destroy(); + } + DoneEvent.Reset(); + if (IsLastExceptionSet) { + IsLastExceptionSet = false; + ythrow LastException; + } +} + +class TBaseTest : public TActor<TBaseTest> { +protected: + struct TResponseData { + + void *Cookie; + NKikimrProto::EReplyStatus Status; + NMon::TEvHttpInfoRes *HttpResult; + + TResponseData() { + Clear(); + } + + void Clear() { + Cookie = (void*)((ui64)-1); + Status = NKikimrProto::OK; + HttpResult = nullptr; + } + + void Check() { + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&Cookie, sizeof(Cookie)); + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&Status, sizeof(Status)); + } + }; + + TResponseData LastResponse; + const TActorId IcbActor; - TControlBoard *Icb; - int TestStep; - - virtual void TestFSM(const TActorContext &ctx) = 0; - - void ActTestFSM(const TActorContext &ctx) { - LastResponse.Check(); - try { - TestFSM(ctx); - LastResponse.Clear(); - } - catch (yexception ex) { - LastException = ex; - IsLastExceptionSet = true; - SignalDoneEvent(); - } - } - void HandleBoot(TEvTablet::TEvBoot::TPtr &ev, const TActorContext &ctx) { - ActTestFSM(ctx); - Y_UNUSED(ev); - } - - void Handle(NMon::TEvHttpInfoRes::TPtr &ev, const TActorContext &ctx) { - LastResponse.HttpResult = static_cast<NMon::TEvHttpInfoRes*>(ev->Get()); - ActTestFSM(ctx); - } - -public: - TBaseTest(TTestConfig *cfg) - : TActor(&TThis::StateRegister) - , IcbActor(cfg->IcbActorId) - , Icb(cfg->Icb) - , TestStep(0) - {} - - STFUNC(StateRegister) { - switch (ev->GetTypeRewrite()) { - HFunc(NMon::TEvHttpInfoRes, Handle); - //HFunc(NNodeWhiteboard::TEvWhiteboard::, Handle); - HFunc(TEvTablet::TEvBoot, HandleBoot); - } - } -}; - -struct THttpRequest : NMonitoring::IHttpRequest { - HTTP_METHOD Method; - TCgiParameters CgiParameters; + TControlBoard *Icb; + int TestStep; + + virtual void TestFSM(const TActorContext &ctx) = 0; + + void ActTestFSM(const TActorContext &ctx) { + LastResponse.Check(); + try { + TestFSM(ctx); + LastResponse.Clear(); + } + catch (yexception ex) { + LastException = ex; + IsLastExceptionSet = true; + SignalDoneEvent(); + } + } + void HandleBoot(TEvTablet::TEvBoot::TPtr &ev, const TActorContext &ctx) { + ActTestFSM(ctx); + Y_UNUSED(ev); + } + + void Handle(NMon::TEvHttpInfoRes::TPtr &ev, const TActorContext &ctx) { + LastResponse.HttpResult = static_cast<NMon::TEvHttpInfoRes*>(ev->Get()); + ActTestFSM(ctx); + } + +public: + TBaseTest(TTestConfig *cfg) + : TActor(&TThis::StateRegister) + , IcbActor(cfg->IcbActorId) + , Icb(cfg->Icb) + , TestStep(0) + {} + + STFUNC(StateRegister) { + switch (ev->GetTypeRewrite()) { + HFunc(NMon::TEvHttpInfoRes, Handle); + //HFunc(NNodeWhiteboard::TEvWhiteboard::, Handle); + HFunc(TEvTablet::TEvBoot, HandleBoot); + } + } +}; + +struct THttpRequest : NMonitoring::IHttpRequest { + HTTP_METHOD Method; + TCgiParameters CgiParameters; THttpHeaders HttpHeaders; - - THttpRequest(HTTP_METHOD method) - : Method(method) - {} - - ~THttpRequest() {} - - const char* GetURI() const override { - return ""; - } - - const char* GetPath() const override { - return ""; - } - - const TCgiParameters& GetParams() const override { - return CgiParameters; - } - - const TCgiParameters& GetPostParams() const override { - return CgiParameters; - } - - TStringBuf GetPostContent() const override { - return TString(); - } - - HTTP_METHOD GetMethod() const override { - return Method; - } + + THttpRequest(HTTP_METHOD method) + : Method(method) + {} + + ~THttpRequest() {} + + const char* GetURI() const override { + return ""; + } + + const char* GetPath() const override { + return ""; + } + + const TCgiParameters& GetParams() const override { + return CgiParameters; + } + + const TCgiParameters& GetPostParams() const override { + return CgiParameters; + } + + TStringBuf GetPostContent() const override { + return TString(); + } + + HTTP_METHOD GetMethod() const override { + return Method; + } const THttpHeaders& GetHeaders() const override { return HttpHeaders; @@ -290,155 +290,155 @@ struct THttpRequest : NMonitoring::IHttpRequest { TString GetRemoteAddr() const override { return TString(); } -}; - -class TTestHttpGetResponse : public TBaseTest { - TAutoPtr<THttpRequest> HttpRequest; - NMonitoring::TMonService2HttpRequest MonService2HttpRequest; - - void TestFSM(const TActorContext &ctx) { - Y_UNUSED(ctx); - VERBOSE_COUT("Test step " << TestStep); - switch (TestStep) { - case 0: - VERBOSE_COUT("Sending TEvHttpInfo"); - ctx.Send(IcbActor, new NMon::TEvHttpInfo(MonService2HttpRequest)); - break; - case 10: - ASSERT_YTHROW(LastResponse.HttpResult && LastResponse.HttpResult->Type() == NActors::NMon::HttpInfoRes, - "Unexpected response message type, expected HttpInfoRes"); - ASSERT_YTHROW(LastResponse.HttpResult->Answer.size() > 0, "Html page cannot have zero size"); - VERBOSE_COUT("Done"); - SignalDoneEvent(); - break; - default: - ythrow TWithBackTrace<yexception>() << "Unexpected TestStep " << TestStep << Endl; - break; - } - TestStep += 10; - } -public: - TTestHttpGetResponse(TTestConfig *cfg) - : TBaseTest(cfg) - , HttpRequest(new THttpRequest(HTTP_METHOD_GET)) - , MonService2HttpRequest(nullptr, HttpRequest.Get(), nullptr, nullptr, "", nullptr) - {} -}; - -class TTestHttpPostReaction : public TBaseTest { - TAutoPtr<THttpRequest> HttpRequest; - NMonitoring::TMonService2HttpRequest MonService2HttpRequest; - - void TestFSM(const TActorContext &ctx) { - Y_UNUSED(ctx); - VERBOSE_COUT("Test step " << TestStep); - switch (TestStep) { - case 0: - VERBOSE_COUT("Testing POST request with an unexistentParameter"); - HttpRequest->CgiParameters.emplace("unexistentParameter", "10"); - ctx.Send(IcbActor, new NMon::TEvHttpInfo(MonService2HttpRequest)); - break; - case 10: - { - ASSERT_YTHROW(LastResponse.HttpResult && LastResponse.HttpResult->Type() == NActors::NMon::HttpInfoRes, - "Unexpected response message type, expected is HttpInfoRes"); - bool isControlExists; - TAtomicBase value; - Icb->GetValue("unexistentParameter", value, isControlExists); - ASSERT_YTHROW(!isControlExists, "Parameter mustn't be created by POST request"); - VERBOSE_COUT("Testing POST request with an existentParameter"); - TControlWrapper control(10); - Icb->RegisterSharedControl(control, "existentParameter"); - Icb->GetValue("existentParameter", value, isControlExists); - ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); - ASSERT_YTHROW(value == 10, "Error in control creation and registration"); - HttpRequest->CgiParameters.clear(); - HttpRequest->CgiParameters.emplace("existentParameter", "15"); - ctx.Send(IcbActor, new NMon::TEvHttpInfo(MonService2HttpRequest)); - break; - } - case 20: - { - ASSERT_YTHROW(LastResponse.HttpResult && LastResponse.HttpResult->Type() == NActors::NMon::HttpInfoRes, - "Unexpected response message type, expected is HttpInfoRes"); - bool isControlExists; - TAtomicBase value; - Icb->GetValue("existentParameter", value, isControlExists); - ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); - ASSERT_YTHROW(value == 15, "Parameter haven't changed by POST request"); - VERBOSE_COUT("Test of restoreDefaults POST request"); - HttpRequest->CgiParameters.clear(); - HttpRequest->CgiParameters.emplace("restoreDefaults", ""); - ctx.Send(IcbActor, new NMon::TEvHttpInfo(MonService2HttpRequest)); - break; - } - case 30: - { - ASSERT_YTHROW(LastResponse.HttpResult && LastResponse.HttpResult->Type() == NActors::NMon::HttpInfoRes, - "Unexpected response message type, expected is HttpInfoRes"); - bool isControlExists; - TAtomicBase value; - Icb->GetValue("existentParameter", value, isControlExists); - ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); - ASSERT_YTHROW(value == 10, "Parameter haven't restored default value"); - VERBOSE_COUT("Test is bounds pulling wokrs"); - TControlWrapper control1(10, 5, 15); - TControlWrapper control2(10, 5, 15); - Icb->RegisterSharedControl(control1, "existentParameterWithBoundsLower"); - Icb->RegisterSharedControl(control2, "existentParameterWithBoundsUpper"); - Icb->GetValue("existentParameterWithBoundsLower", value, isControlExists); - ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); - ASSERT_YTHROW(value == 10, "Error in control creation and registration"); - Icb->GetValue("existentParameterWithBoundsUpper", value, isControlExists); - ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); - ASSERT_YTHROW(value == 10, "Error in control creation and registration"); - HttpRequest->CgiParameters.clear(); - HttpRequest->CgiParameters.emplace("existentParameterWithBoundsLower", "1"); - HttpRequest->CgiParameters.emplace("existentParameterWithBoundsUpper", "99999"); - ctx.Send(IcbActor, new NMon::TEvHttpInfo(MonService2HttpRequest)); - break; - } - case 40: - { - ASSERT_YTHROW(LastResponse.HttpResult && LastResponse.HttpResult->Type() == NActors::NMon::HttpInfoRes, - "Unexpected response message type, expected is HttpInfoRes"); - bool isControlExists; - TAtomicBase value; - - Icb->GetValue("existentParameterWithBoundsLower", value, isControlExists); - ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); - ASSERT_YTHROW(value == 5, "Pulling value to bounds doesn't work"); - - Icb->GetValue("existentParameterWithBoundsUpper", value, isControlExists); - ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); - ASSERT_YTHROW(value == 15, "Pulling value to bounds doesn't work"); - - VERBOSE_COUT("Done"); - SignalDoneEvent(); - break; - } - default: - ythrow TWithBackTrace<yexception>() << "Unexpected TestStep " << TestStep << Endl; - break; - } - TestStep += 10; - } -public: - TTestHttpPostReaction(TTestConfig *cfg) - : TBaseTest(cfg) - , HttpRequest(new THttpRequest(HTTP_METHOD_POST)) - , MonService2HttpRequest(nullptr, HttpRequest.Get(), nullptr, nullptr, "", nullptr) - {} -}; - +}; + +class TTestHttpGetResponse : public TBaseTest { + TAutoPtr<THttpRequest> HttpRequest; + NMonitoring::TMonService2HttpRequest MonService2HttpRequest; + + void TestFSM(const TActorContext &ctx) { + Y_UNUSED(ctx); + VERBOSE_COUT("Test step " << TestStep); + switch (TestStep) { + case 0: + VERBOSE_COUT("Sending TEvHttpInfo"); + ctx.Send(IcbActor, new NMon::TEvHttpInfo(MonService2HttpRequest)); + break; + case 10: + ASSERT_YTHROW(LastResponse.HttpResult && LastResponse.HttpResult->Type() == NActors::NMon::HttpInfoRes, + "Unexpected response message type, expected HttpInfoRes"); + ASSERT_YTHROW(LastResponse.HttpResult->Answer.size() > 0, "Html page cannot have zero size"); + VERBOSE_COUT("Done"); + SignalDoneEvent(); + break; + default: + ythrow TWithBackTrace<yexception>() << "Unexpected TestStep " << TestStep << Endl; + break; + } + TestStep += 10; + } +public: + TTestHttpGetResponse(TTestConfig *cfg) + : TBaseTest(cfg) + , HttpRequest(new THttpRequest(HTTP_METHOD_GET)) + , MonService2HttpRequest(nullptr, HttpRequest.Get(), nullptr, nullptr, "", nullptr) + {} +}; + +class TTestHttpPostReaction : public TBaseTest { + TAutoPtr<THttpRequest> HttpRequest; + NMonitoring::TMonService2HttpRequest MonService2HttpRequest; + + void TestFSM(const TActorContext &ctx) { + Y_UNUSED(ctx); + VERBOSE_COUT("Test step " << TestStep); + switch (TestStep) { + case 0: + VERBOSE_COUT("Testing POST request with an unexistentParameter"); + HttpRequest->CgiParameters.emplace("unexistentParameter", "10"); + ctx.Send(IcbActor, new NMon::TEvHttpInfo(MonService2HttpRequest)); + break; + case 10: + { + ASSERT_YTHROW(LastResponse.HttpResult && LastResponse.HttpResult->Type() == NActors::NMon::HttpInfoRes, + "Unexpected response message type, expected is HttpInfoRes"); + bool isControlExists; + TAtomicBase value; + Icb->GetValue("unexistentParameter", value, isControlExists); + ASSERT_YTHROW(!isControlExists, "Parameter mustn't be created by POST request"); + VERBOSE_COUT("Testing POST request with an existentParameter"); + TControlWrapper control(10); + Icb->RegisterSharedControl(control, "existentParameter"); + Icb->GetValue("existentParameter", value, isControlExists); + ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); + ASSERT_YTHROW(value == 10, "Error in control creation and registration"); + HttpRequest->CgiParameters.clear(); + HttpRequest->CgiParameters.emplace("existentParameter", "15"); + ctx.Send(IcbActor, new NMon::TEvHttpInfo(MonService2HttpRequest)); + break; + } + case 20: + { + ASSERT_YTHROW(LastResponse.HttpResult && LastResponse.HttpResult->Type() == NActors::NMon::HttpInfoRes, + "Unexpected response message type, expected is HttpInfoRes"); + bool isControlExists; + TAtomicBase value; + Icb->GetValue("existentParameter", value, isControlExists); + ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); + ASSERT_YTHROW(value == 15, "Parameter haven't changed by POST request"); + VERBOSE_COUT("Test of restoreDefaults POST request"); + HttpRequest->CgiParameters.clear(); + HttpRequest->CgiParameters.emplace("restoreDefaults", ""); + ctx.Send(IcbActor, new NMon::TEvHttpInfo(MonService2HttpRequest)); + break; + } + case 30: + { + ASSERT_YTHROW(LastResponse.HttpResult && LastResponse.HttpResult->Type() == NActors::NMon::HttpInfoRes, + "Unexpected response message type, expected is HttpInfoRes"); + bool isControlExists; + TAtomicBase value; + Icb->GetValue("existentParameter", value, isControlExists); + ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); + ASSERT_YTHROW(value == 10, "Parameter haven't restored default value"); + VERBOSE_COUT("Test is bounds pulling wokrs"); + TControlWrapper control1(10, 5, 15); + TControlWrapper control2(10, 5, 15); + Icb->RegisterSharedControl(control1, "existentParameterWithBoundsLower"); + Icb->RegisterSharedControl(control2, "existentParameterWithBoundsUpper"); + Icb->GetValue("existentParameterWithBoundsLower", value, isControlExists); + ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); + ASSERT_YTHROW(value == 10, "Error in control creation and registration"); + Icb->GetValue("existentParameterWithBoundsUpper", value, isControlExists); + ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); + ASSERT_YTHROW(value == 10, "Error in control creation and registration"); + HttpRequest->CgiParameters.clear(); + HttpRequest->CgiParameters.emplace("existentParameterWithBoundsLower", "1"); + HttpRequest->CgiParameters.emplace("existentParameterWithBoundsUpper", "99999"); + ctx.Send(IcbActor, new NMon::TEvHttpInfo(MonService2HttpRequest)); + break; + } + case 40: + { + ASSERT_YTHROW(LastResponse.HttpResult && LastResponse.HttpResult->Type() == NActors::NMon::HttpInfoRes, + "Unexpected response message type, expected is HttpInfoRes"); + bool isControlExists; + TAtomicBase value; + + Icb->GetValue("existentParameterWithBoundsLower", value, isControlExists); + ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); + ASSERT_YTHROW(value == 5, "Pulling value to bounds doesn't work"); + + Icb->GetValue("existentParameterWithBoundsUpper", value, isControlExists); + ASSERT_YTHROW(isControlExists, "Error in control creation and registration"); + ASSERT_YTHROW(value == 15, "Pulling value to bounds doesn't work"); + + VERBOSE_COUT("Done"); + SignalDoneEvent(); + break; + } + default: + ythrow TWithBackTrace<yexception>() << "Unexpected TestStep " << TestStep << Endl; + break; + } + TestStep += 10; + } +public: + TTestHttpPostReaction(TTestConfig *cfg) + : TBaseTest(cfg) + , HttpRequest(new THttpRequest(HTTP_METHOD_POST)) + , MonService2HttpRequest(nullptr, HttpRequest.Get(), nullptr, nullptr, "", nullptr) + {} +}; + Y_UNIT_TEST_SUITE(IcbAsActorTests) { Y_UNIT_TEST(TestHttpGetResponse) { - Run<TTestHttpGetResponse>(); - } - + Run<TTestHttpGetResponse>(); + } + Y_UNIT_TEST(TestHttpPostReaction) { - Run<TTestHttpPostReaction>(); - } -}; - -} // namespace NKikimr + Run<TTestHttpPostReaction>(); + } +}; + +} // namespace NKikimr diff --git a/ydb/core/control/immediate_control_board_control.cpp b/ydb/core/control/immediate_control_board_control.cpp index f88c4ebbde..7762cf9d88 100644 --- a/ydb/core/control/immediate_control_board_control.cpp +++ b/ydb/core/control/immediate_control_board_control.cpp @@ -1,52 +1,52 @@ -#include "immediate_control_board_control.h" -#include <util/stream/str.h> - -namespace NKikimr { - -TControl::TControl(TAtomicBase defaultValue, TAtomicBase lowerBound, TAtomicBase upperBound) - : Value(defaultValue) - , Default(defaultValue) - , LowerBound(lowerBound) - , UpperBound(upperBound) -{} - -void TControl::Set(TAtomicBase newValue) { - AtomicSet(Value, newValue); - AtomicSet(Default, newValue); -} - -TAtomicBase TControl::SetFromHtmlRequest(TAtomicBase newValue) { - TAtomicBase prevValue = AtomicGet(Value); - if (newValue == AtomicGet(Default)) { - AtomicSet(Value, newValue); - } else { - newValue = Max(newValue, LowerBound); - newValue = Min(newValue, UpperBound); - AtomicSet(Value, newValue); - } - return prevValue; -} - -TAtomicBase TControl::Get() const { - return AtomicGet(Value); -} - -TAtomicBase TControl::GetDefault() const { - return AtomicGet(Default); -} - -void TControl::RestoreDefault() { - AtomicSet(Value, Default); -} - -bool TControl::IsDefault() const { - return AtomicGet(Value) == AtomicGet(Default); -} - -TString TControl::RangeAsString() const { - TStringStream str; - str << "[" << LowerBound << ", " << UpperBound << "]"; - return str.Str(); -} - -} +#include "immediate_control_board_control.h" +#include <util/stream/str.h> + +namespace NKikimr { + +TControl::TControl(TAtomicBase defaultValue, TAtomicBase lowerBound, TAtomicBase upperBound) + : Value(defaultValue) + , Default(defaultValue) + , LowerBound(lowerBound) + , UpperBound(upperBound) +{} + +void TControl::Set(TAtomicBase newValue) { + AtomicSet(Value, newValue); + AtomicSet(Default, newValue); +} + +TAtomicBase TControl::SetFromHtmlRequest(TAtomicBase newValue) { + TAtomicBase prevValue = AtomicGet(Value); + if (newValue == AtomicGet(Default)) { + AtomicSet(Value, newValue); + } else { + newValue = Max(newValue, LowerBound); + newValue = Min(newValue, UpperBound); + AtomicSet(Value, newValue); + } + return prevValue; +} + +TAtomicBase TControl::Get() const { + return AtomicGet(Value); +} + +TAtomicBase TControl::GetDefault() const { + return AtomicGet(Default); +} + +void TControl::RestoreDefault() { + AtomicSet(Value, Default); +} + +bool TControl::IsDefault() const { + return AtomicGet(Value) == AtomicGet(Default); +} + +TString TControl::RangeAsString() const { + TStringStream str; + str << "[" << LowerBound << ", " << UpperBound << "]"; + return str.Str(); +} + +} diff --git a/ydb/core/control/immediate_control_board_control.h b/ydb/core/control/immediate_control_board_control.h index 4677af3c21..7fd038b03a 100644 --- a/ydb/core/control/immediate_control_board_control.h +++ b/ydb/core/control/immediate_control_board_control.h @@ -1,33 +1,33 @@ -#pragma once -#include "defs.h" - -#include <util/generic/ptr.h> -#include <util/system/atomic.h> - -namespace NKikimr { - -class TControl : public TThrRefBase { - TAtomic Value; - TAtomic Default; - TAtomicBase LowerBound; - TAtomicBase UpperBound; - -public: - TControl(TAtomicBase defaultValue, TAtomicBase lowerBound, TAtomicBase upperBound); - - void Set(TAtomicBase newValue); - - TAtomicBase SetFromHtmlRequest(TAtomicBase newValue); - - TAtomicBase Get() const; - - TAtomicBase GetDefault() const; - - void RestoreDefault(); - - bool IsDefault() const; - - TString RangeAsString() const; -}; - -} +#pragma once +#include "defs.h" + +#include <util/generic/ptr.h> +#include <util/system/atomic.h> + +namespace NKikimr { + +class TControl : public TThrRefBase { + TAtomic Value; + TAtomic Default; + TAtomicBase LowerBound; + TAtomicBase UpperBound; + +public: + TControl(TAtomicBase defaultValue, TAtomicBase lowerBound, TAtomicBase upperBound); + + void Set(TAtomicBase newValue); + + TAtomicBase SetFromHtmlRequest(TAtomicBase newValue); + + TAtomicBase Get() const; + + TAtomicBase GetDefault() const; + + void RestoreDefault(); + + bool IsDefault() const; + + TString RangeAsString() const; +}; + +} diff --git a/ydb/core/control/immediate_control_board_impl.cpp b/ydb/core/control/immediate_control_board_impl.cpp index fa7686ec80..fa26926fae 100644 --- a/ydb/core/control/immediate_control_board_impl.cpp +++ b/ydb/core/control/immediate_control_board_impl.cpp @@ -1,33 +1,33 @@ -#include "immediate_control_board_impl.h" - -#include <util/generic/string.h> -#include <util/stream/str.h> +#include "immediate_control_board_impl.h" + +#include <util/generic/string.h> +#include <util/stream/str.h> #include <library/cpp/monlib/service/pages/templates.h> - -namespace NKikimr { - -bool TControlBoard::RegisterLocalControl(TControlWrapper control, TString name) { + +namespace NKikimr { + +bool TControlBoard::RegisterLocalControl(TControlWrapper control, TString name) { bool result = true; if (Board.Has(name)) { result = false; - } + } Board.Insert(name, control.Control); return result; -} - -void TControlBoard::RegisterSharedControl(TControlWrapper& control, TString name) { +} + +void TControlBoard::RegisterSharedControl(TControlWrapper& control, TString name) { control.Control = Board.InsertIfAbsent(name, control.Control); -} - -void TControlBoard::RestoreDefaults() { +} + +void TControlBoard::RestoreDefaults() { for (auto& bucket : Board.Buckets) { TReadGuard guard(bucket.GetLock()); for (auto &control : bucket.GetMap()) { control.second->RestoreDefault(); } - } -} - + } +} + void TControlBoard::RestoreDefault(TString name) { TIntrusivePtr<TControl> control; if (Board.Get(name, control)) { @@ -35,39 +35,39 @@ void TControlBoard::RestoreDefault(TString name) { } } -bool TControlBoard::SetValue(TString name, TAtomic value, TAtomic &outPrevValue) { +bool TControlBoard::SetValue(TString name, TAtomic value, TAtomic &outPrevValue) { TIntrusivePtr<TControl> control; if (Board.Get(name, control)) { outPrevValue = control->SetFromHtmlRequest(value); return control->IsDefault(); - } - return true; -} - -// Only for tests -void TControlBoard::GetValue(TString name, TAtomic &outValue, bool &outIsControlExists) const { + } + return true; +} + +// Only for tests +void TControlBoard::GetValue(TString name, TAtomic &outValue, bool &outIsControlExists) const { TIntrusivePtr<TControl> control; outIsControlExists = Board.Get(name, control); - if (outIsControlExists) { + if (outIsControlExists) { outValue = control->Get(); - } -} - -TString TControlBoard::RenderAsHtml() const { - TStringStream str; - HTML(str) { - TABLE_SORTABLE_CLASS("table") { - TABLEHEAD() { - TABLER() { - TABLEH() { str << "Parameter"; } - TABLEH() { str << "Acceptable range"; } - TABLEH() { str << "Current"; } - TABLEH() { str << "Default"; } - TABLEH() { str << "Send new value"; } - TABLEH() { str << "Changed"; } - } - } - TABLEBODY() { + } +} + +TString TControlBoard::RenderAsHtml() const { + TStringStream str; + HTML(str) { + TABLE_SORTABLE_CLASS("table") { + TABLEHEAD() { + TABLER() { + TABLEH() { str << "Parameter"; } + TABLEH() { str << "Acceptable range"; } + TABLEH() { str << "Current"; } + TABLEH() { str << "Default"; } + TABLEH() { str << "Send new value"; } + TABLEH() { str << "Changed"; } + } + } + TABLEBODY() { for (const auto& bucket : Board.Buckets) { TReadGuard guard(bucket.GetLock()); for (const auto &item : bucket.GetMap()) { @@ -80,14 +80,14 @@ TString TControlBoard::RenderAsHtml() const { } else { str << "<p style='color:red;'><b>" << item.second->Get() << " </b></p>"; } - } + } TABLED() { if (item.second->IsDefault()) { str << "<p>" << item.second->GetDefault() << "</p>"; } else { str << "<p style='color:red;'><b>" << item.second->GetDefault() << " </b></p>"; } - } + } TABLED() { str << "<form class='form_horizontal' method='post'>"; str << "<input name='" << item.first << "' type='text' value='" @@ -96,16 +96,16 @@ TString TControlBoard::RenderAsHtml() const { str << "</form>"; } TABLED() { str << !item.second->IsDefault(); } - } - } - } - } - } - str << "<form class='form_horizontal' method='post'>"; - str << "<button type='submit' name='restoreDefaults' style='color:green;'><b>Restore Default</b></button>"; - str << "</form>"; - } - return str.Str(); -} - + } + } + } + } + } + str << "<form class='form_horizontal' method='post'>"; + str << "<button type='submit' name='restoreDefaults' style='color:green;'><b>Restore Default</b></button>"; + str << "</form>"; + } + return str.Str(); +} + } diff --git a/ydb/core/control/immediate_control_board_impl.h b/ydb/core/control/immediate_control_board_impl.h index 8b4d84f385..a01e09f4d6 100644 --- a/ydb/core/control/immediate_control_board_impl.h +++ b/ydb/core/control/immediate_control_board_impl.h @@ -1,31 +1,31 @@ -#pragma once +#pragma once + +#include "defs.h" +#include "immediate_control_board_wrapper.h" -#include "defs.h" -#include "immediate_control_board_wrapper.h" - #include <ydb/core/util/concurrent_rw_hash.h> - -namespace NKikimr { - -class TControlBoard : public TThrRefBase { + +namespace NKikimr { + +class TControlBoard : public TThrRefBase { private: TConcurrentRWHashMap<TString, TIntrusivePtr<TControl>, 16> Board; - -public: - bool RegisterLocalControl(TControlWrapper control, TString name); - - void RegisterSharedControl(TControlWrapper& control, TString name); - - void RestoreDefaults(); - + +public: + bool RegisterLocalControl(TControlWrapper control, TString name); + + void RegisterSharedControl(TControlWrapper& control, TString name); + + void RestoreDefaults(); + void RestoreDefault(TString name); - bool SetValue(TString name, TAtomic value, TAtomic &outPrevValue); - - // Only for tests - void GetValue(TString name, TAtomic &outValue, bool &outIsControlExists) const; - - TString RenderAsHtml() const; -}; - -} + bool SetValue(TString name, TAtomic value, TAtomic &outPrevValue); + + // Only for tests + void GetValue(TString name, TAtomic &outValue, bool &outIsControlExists) const; + + TString RenderAsHtml() const; +}; + +} diff --git a/ydb/core/control/immediate_control_board_ut.cpp b/ydb/core/control/immediate_control_board_ut.cpp index 6baf0b7a89..dba6280ab5 100644 --- a/ydb/core/control/immediate_control_board_ut.cpp +++ b/ydb/core/control/immediate_control_board_ut.cpp @@ -1,141 +1,141 @@ -#include "immediate_control_board_impl.h" -#include "immediate_control_board_wrapper.h" +#include "immediate_control_board_impl.h" +#include "immediate_control_board_wrapper.h" #include <library/cpp/testing/unittest/registar.h> -#include <util/random/mersenne64.h> -#include <util/random/entropy.h> -#include <util/string/printf.h> -#include <util/system/thread.h> -#include <array> - -namespace NKikimr { - -#define TEST_REPEATS 1000000 -#define TEST_THREADS_CNT 4 -#define IS_VERBOSE 1 - -#if IS_VERBOSE -# define VERBOSE_COUT(a) \ - Cout << a; \ - Cout << Endl -#endif - +#include <util/random/mersenne64.h> +#include <util/random/entropy.h> +#include <util/string/printf.h> +#include <util/system/thread.h> +#include <array> + +namespace NKikimr { + +#define TEST_REPEATS 1000000 +#define TEST_THREADS_CNT 4 +#define IS_VERBOSE 1 + +#if IS_VERBOSE +# define VERBOSE_COUT(a) \ + Cout << a; \ + Cout << Endl +#endif + Y_UNIT_TEST_SUITE(ControlImplementationTests) { Y_UNIT_TEST(TestTControl) { - NPrivate::TMersenne64 randGen(Seed()); - std::array<i64, 3> bounds; - for (ui64 i = 0; i < 3; ++i) { - bounds[i] = (i64)randGen.GenRand(); - } - std::sort(bounds.begin(), bounds.end()); - i64 lowerBound = bounds[0]; - i64 defaultValue = bounds[1]; - i64 upperBound = bounds[2]; - - TIntrusivePtr<TControl> control(new TControl(defaultValue, lowerBound, upperBound)); - for (ui64 i = 0; i < TEST_REPEATS; ++i) { - i64 num = (i64)randGen.GenRand(); - control->Set(num); - UNIT_ASSERT_EQUAL(control->Get(), num); - UNIT_ASSERT_EQUAL(control->GetDefault(), num); - } - control = new TControl(defaultValue, lowerBound, upperBound); - for (ui64 i = 0; i < TEST_REPEATS; ++i) { - i64 num = (i64)randGen.GenRand(); - control->SetFromHtmlRequest(num); - if (num < lowerBound) { - UNIT_ASSERT_EQUAL(control->Get(), lowerBound); - } else if (upperBound < num) { - UNIT_ASSERT_EQUAL(control->Get(), upperBound); - } else { - UNIT_ASSERT_EQUAL(control->Get(), num); - } - UNIT_ASSERT_EQUAL(control->GetDefault(), defaultValue); - control->RestoreDefault(); - UNIT_ASSERT(control->IsDefault()); - UNIT_ASSERT_EQUAL(control->Get(), defaultValue); - UNIT_ASSERT_EQUAL(control->GetDefault(), defaultValue); - } - } - + NPrivate::TMersenne64 randGen(Seed()); + std::array<i64, 3> bounds; + for (ui64 i = 0; i < 3; ++i) { + bounds[i] = (i64)randGen.GenRand(); + } + std::sort(bounds.begin(), bounds.end()); + i64 lowerBound = bounds[0]; + i64 defaultValue = bounds[1]; + i64 upperBound = bounds[2]; + + TIntrusivePtr<TControl> control(new TControl(defaultValue, lowerBound, upperBound)); + for (ui64 i = 0; i < TEST_REPEATS; ++i) { + i64 num = (i64)randGen.GenRand(); + control->Set(num); + UNIT_ASSERT_EQUAL(control->Get(), num); + UNIT_ASSERT_EQUAL(control->GetDefault(), num); + } + control = new TControl(defaultValue, lowerBound, upperBound); + for (ui64 i = 0; i < TEST_REPEATS; ++i) { + i64 num = (i64)randGen.GenRand(); + control->SetFromHtmlRequest(num); + if (num < lowerBound) { + UNIT_ASSERT_EQUAL(control->Get(), lowerBound); + } else if (upperBound < num) { + UNIT_ASSERT_EQUAL(control->Get(), upperBound); + } else { + UNIT_ASSERT_EQUAL(control->Get(), num); + } + UNIT_ASSERT_EQUAL(control->GetDefault(), defaultValue); + control->RestoreDefault(); + UNIT_ASSERT(control->IsDefault()); + UNIT_ASSERT_EQUAL(control->Get(), defaultValue); + UNIT_ASSERT_EQUAL(control->GetDefault(), defaultValue); + } + } + Y_UNIT_TEST(TestControlWrapperAsI64) { - NPrivate::TMersenne64 randGen(Seed()); - TControlWrapper wrapper1; - for (ui64 i = 0; i < TEST_REPEATS; ++i) { - i64 num = (i64)randGen.GenRand(); - wrapper1 = num; - TControlWrapper wrapper2(num); - UNIT_ASSERT_EQUAL(wrapper1, num); - UNIT_ASSERT_EQUAL(wrapper2, num); - } - } - + NPrivate::TMersenne64 randGen(Seed()); + TControlWrapper wrapper1; + for (ui64 i = 0; i < TEST_REPEATS; ++i) { + i64 num = (i64)randGen.GenRand(); + wrapper1 = num; + TControlWrapper wrapper2(num); + UNIT_ASSERT_EQUAL(wrapper1, num); + UNIT_ASSERT_EQUAL(wrapper2, num); + } + } + Y_UNIT_TEST(TestControlWrapperBounds) { - NPrivate::TMersenne64 randGen(Seed()); - std::array<i64, 3> bounds; - for (ui64 i = 0; i < 3; ++i) { - bounds[i] = (i64)randGen.GenRand(); - } - std::sort(bounds.begin(), bounds.end()); - i64 lowerBound = bounds[0]; - i64 defaultValue = bounds[1]; - i64 upperBound = bounds[2]; - - TControlWrapper wrapper(defaultValue, lowerBound, upperBound); - for (ui64 i = 0; i < TEST_REPEATS; ++i) { - i64 num = (i64)randGen.GenRand(); - wrapper = num; - UNIT_ASSERT_EQUAL(wrapper, num); - } - } - + NPrivate::TMersenne64 randGen(Seed()); + std::array<i64, 3> bounds; + for (ui64 i = 0; i < 3; ++i) { + bounds[i] = (i64)randGen.GenRand(); + } + std::sort(bounds.begin(), bounds.end()); + i64 lowerBound = bounds[0]; + i64 defaultValue = bounds[1]; + i64 upperBound = bounds[2]; + + TControlWrapper wrapper(defaultValue, lowerBound, upperBound); + for (ui64 i = 0; i < TEST_REPEATS; ++i) { + i64 num = (i64)randGen.GenRand(); + wrapper = num; + UNIT_ASSERT_EQUAL(wrapper, num); + } + } + Y_UNIT_TEST(TestRegisterLocalControl) { - TIntrusivePtr<TControlBoard> Icb(new TControlBoard); - TControlWrapper control1(1, 1, 1); - TControlWrapper control2(2, 2, 2); - UNIT_ASSERT(Icb->RegisterLocalControl(control1, "localControl")); - UNIT_ASSERT(!Icb->RegisterLocalControl(control2, "localControl")); - UNIT_ASSERT_EQUAL(1, 1); - } - + TIntrusivePtr<TControlBoard> Icb(new TControlBoard); + TControlWrapper control1(1, 1, 1); + TControlWrapper control2(2, 2, 2); + UNIT_ASSERT(Icb->RegisterLocalControl(control1, "localControl")); + UNIT_ASSERT(!Icb->RegisterLocalControl(control2, "localControl")); + UNIT_ASSERT_EQUAL(1, 1); + } + Y_UNIT_TEST(TestRegisterSharedControl) { - TIntrusivePtr<TControlBoard> Icb(new TControlBoard); - TControlWrapper control1(1, 1, 1); - TControlWrapper control1_origin(control1); - TControlWrapper control2(2, 2, 2); - TControlWrapper control2_origin(control2); - Icb->RegisterSharedControl(control1, "sharedControl"); - UNIT_ASSERT(control1.IsTheSame(control1_origin)); - Icb->RegisterSharedControl(control2, "sharedControl"); - UNIT_ASSERT(control2.IsTheSame(control1_origin)); - } - + TIntrusivePtr<TControlBoard> Icb(new TControlBoard); + TControlWrapper control1(1, 1, 1); + TControlWrapper control1_origin(control1); + TControlWrapper control2(2, 2, 2); + TControlWrapper control2_origin(control2); + Icb->RegisterSharedControl(control1, "sharedControl"); + UNIT_ASSERT(control1.IsTheSame(control1_origin)); + Icb->RegisterSharedControl(control2, "sharedControl"); + UNIT_ASSERT(control2.IsTheSame(control1_origin)); + } + Y_UNIT_TEST(TestParallelRegisterSharedControl) { - void* (*parallelJob)(void*) = [](void *controlBoard) -> void *{ - TControlBoard *Icb = reinterpret_cast<TControlBoard *>(controlBoard); - TControlWrapper control1(1, 1, 1); - Icb->RegisterSharedControl(control1, "sharedControl"); - // Useless because running this test with --sanitize=thread cannot reveal - // race condition in Icb->RegisterLocalControl(...) without mutex - TControlWrapper control2(2, 2, 2); - TControlWrapper control2_origin(control2); - Icb->RegisterLocalControl(control2, "localControl"); - UNIT_ASSERT_EQUAL(control2, control2_origin); - return nullptr; - }; - TIntrusivePtr<TControlBoard> Icb(new TControlBoard); + void* (*parallelJob)(void*) = [](void *controlBoard) -> void *{ + TControlBoard *Icb = reinterpret_cast<TControlBoard *>(controlBoard); + TControlWrapper control1(1, 1, 1); + Icb->RegisterSharedControl(control1, "sharedControl"); + // Useless because running this test with --sanitize=thread cannot reveal + // race condition in Icb->RegisterLocalControl(...) without mutex + TControlWrapper control2(2, 2, 2); + TControlWrapper control2_origin(control2); + Icb->RegisterLocalControl(control2, "localControl"); + UNIT_ASSERT_EQUAL(control2, control2_origin); + return nullptr; + }; + TIntrusivePtr<TControlBoard> Icb(new TControlBoard); TVector<THolder<TThread>> threads; - threads.reserve(TEST_THREADS_CNT); - for (ui64 i = 0; i < TEST_THREADS_CNT; ++i) { - threads.emplace_back(new TThread(parallelJob, (void *)Icb.Get())); - } - for (ui64 i = 0; i < TEST_THREADS_CNT; ++i) { - threads[i]->Start(); - } - for (ui64 i = 0; i < TEST_THREADS_CNT; ++i) { - threads[i]->Join(); - } - } -} - -} // namespace NKikimr - + threads.reserve(TEST_THREADS_CNT); + for (ui64 i = 0; i < TEST_THREADS_CNT; ++i) { + threads.emplace_back(new TThread(parallelJob, (void *)Icb.Get())); + } + for (ui64 i = 0; i < TEST_THREADS_CNT; ++i) { + threads[i]->Start(); + } + for (ui64 i = 0; i < TEST_THREADS_CNT; ++i) { + threads[i]->Join(); + } + } +} + +} // namespace NKikimr + diff --git a/ydb/core/control/immediate_control_board_wrapper.h b/ydb/core/control/immediate_control_board_wrapper.h index d707f52dfa..ce8a6adde5 100644 --- a/ydb/core/control/immediate_control_board_wrapper.h +++ b/ydb/core/control/immediate_control_board_wrapper.h @@ -1,36 +1,36 @@ -#pragma once -#include "defs.h" -#include "immediate_control_board_control.h" - -namespace NKikimr { - -class TControlWrapper { - TIntrusivePtr<TControl> Control; - friend class TControlBoard; - -public: - TControlWrapper(TAtomicBase defaultValue = 0) - : Control(new TControl(defaultValue, Min<TAtomicBase>(), Max<TAtomicBase>())) - {} - - TControlWrapper(TAtomicBase defaultValue, TAtomicBase lowerBound, TAtomicBase upperBound) - : Control(new TControl(defaultValue, lowerBound, upperBound)) - {} - - operator i64() const { - return Control->Get(); - } - - i64 operator=(i64 value) { - Control->Set(value); - return value; - } - - bool IsTheSame(TControlWrapper another) { - return Control == another.Control; - } -}; - +#pragma once +#include "defs.h" +#include "immediate_control_board_control.h" + +namespace NKikimr { + +class TControlWrapper { + TIntrusivePtr<TControl> Control; + friend class TControlBoard; + +public: + TControlWrapper(TAtomicBase defaultValue = 0) + : Control(new TControl(defaultValue, Min<TAtomicBase>(), Max<TAtomicBase>())) + {} + + TControlWrapper(TAtomicBase defaultValue, TAtomicBase lowerBound, TAtomicBase upperBound) + : Control(new TControl(defaultValue, lowerBound, upperBound)) + {} + + operator i64() const { + return Control->Get(); + } + + i64 operator=(i64 value) { + Control->Set(value); + return value; + } + + bool IsTheSame(TControlWrapper another) { + return Control == another.Control; + } +}; + class TMemorizableControlWrapper { static constexpr i32 RequestCountWithRelevantValue = 1024; static constexpr TDuration TimeDurationWithRelevantValue = TDuration::Seconds(15); @@ -61,4 +61,4 @@ public: } }; -} +} diff --git a/ydb/core/control/ut/ya.make b/ydb/core/control/ut/ya.make index 7d8cbbe565..18a4c9dc3b 100644 --- a/ydb/core/control/ut/ya.make +++ b/ydb/core/control/ut/ya.make @@ -4,14 +4,14 @@ OWNER( ) UNITTEST_FOR(ydb/core/control) - -FORK_SUBTESTS() - + +FORK_SUBTESTS() + TIMEOUT(600) SIZE(MEDIUM) - -PEERDIR( + +PEERDIR( library/cpp/actors/core library/cpp/actors/interconnect library/cpp/testing/unittest @@ -19,11 +19,11 @@ PEERDIR( ydb/core/base ydb/core/mind ydb/core/mon -) - -SRCS( - immediate_control_board_ut.cpp - immediate_control_board_actor_ut.cpp -) - -END() +) + +SRCS( + immediate_control_board_ut.cpp + immediate_control_board_actor_ut.cpp +) + +END() diff --git a/ydb/core/control/ya.make b/ydb/core/control/ya.make index edefb8e019..bb1de545ae 100644 --- a/ydb/core/control/ya.make +++ b/ydb/core/control/ya.make @@ -1,31 +1,31 @@ -LIBRARY() - -OWNER( - va-kuznecov - g:kikimr -) - -PEERDIR( +LIBRARY() + +OWNER( + va-kuznecov + g:kikimr +) + +PEERDIR( library/cpp/actors/core library/cpp/monlib/dynamic_counters - util + util ydb/core/base ydb/core/mon ydb/core/node_whiteboard -) - -SRCS( - defs.h - immediate_control_board_actor.cpp - immediate_control_board_actor.h - immediate_control_board_control.cpp - immediate_control_board_control.h - immediate_control_board_impl.cpp - immediate_control_board_impl.h - immediate_control_board_wrapper.h -) - -END() +) + +SRCS( + defs.h + immediate_control_board_actor.cpp + immediate_control_board_actor.h + immediate_control_board_control.cpp + immediate_control_board_control.h + immediate_control_board_impl.cpp + immediate_control_board_impl.h + immediate_control_board_wrapper.h +) + +END() RECURSE_FOR_TESTS( ut diff --git a/ydb/core/driver_lib/base_utils/format_info.cpp b/ydb/core/driver_lib/base_utils/format_info.cpp index 12575d649a..e5d53a7b80 100644 --- a/ydb/core/driver_lib/base_utils/format_info.cpp +++ b/ydb/core/driver_lib/base_utils/format_info.cpp @@ -52,7 +52,7 @@ int MainFormatInfo(const TCommandConfig &cmdConf, int argc, char** argv) { Cout << "nonceReversalCount: " << nonceReversalCount << Endl; } } else { - Cout << "Error. Can't read PDisk format info. Reason# " << info.ErrorReason << Endl; + Cout << "Error. Can't read PDisk format info. Reason# " << info.ErrorReason << Endl; } return 0; } diff --git a/ydb/core/driver_lib/cli_base/cli_cmds_db.cpp b/ydb/core/driver_lib/cli_base/cli_cmds_db.cpp index 09ceec2f5d..450d4135c6 100644 --- a/ydb/core/driver_lib/cli_base/cli_cmds_db.cpp +++ b/ydb/core/driver_lib/cli_base/cli_cmds_db.cpp @@ -121,14 +121,14 @@ public: : TClientCommandConfig("execute", { "exec" }, "Execute schema protobuf") {} - bool ReturnTxId; - + bool ReturnTxId; + TList<TAutoPtr<NKikimrClient::TSchemeOperation>> Requests; virtual void Config(TConfig& config) override { TClientCommand::Config(config); - ReturnTxId = false; - config.Opts->AddLongOption('t', "txid", "Print TxId").NoArgument().SetFlag(&ReturnTxId); + ReturnTxId = false; + config.Opts->AddLongOption('t', "txid", "Print TxId").NoArgument().SetFlag(&ReturnTxId); config.SetFreeArgsNum(1); SetFreeArgTitle(0, "<SCHEMA-PROTO>", "Schema protobuf or file with schema protobuf"); } @@ -151,18 +151,18 @@ public: TAutoPtr<NMsgBusProxy::TBusSchemeOperation> request(new NMsgBusProxy::TBusSchemeOperation()); request->Record.MergeFrom(*pbRequest); result = MessageBusCall<NMsgBusProxy::TBusSchemeOperation, NMsgBusProxy::TBusResponse>(config, request, - [this](const NMsgBusProxy::TBusResponse& response) -> int { + [this](const NMsgBusProxy::TBusResponse& response) -> int { if (response.Record.GetStatus() != NMsgBusProxy::MSTATUS_OK) { Cerr << ToCString(static_cast<NMsgBusProxy::EResponseStatus>(response.Record.GetStatus())) << " " << response.Record.GetErrorReason() << Endl; return 1; } - if (ReturnTxId) { - if (response.Record.HasFlatTxId() && response.Record.GetFlatTxId().HasTxId()) { - Cout << "TxId: " << response.Record.GetFlatTxId().GetTxId() << Endl; - } else { - Cout << "TxId: not returned" << Endl; - } - } + if (ReturnTxId) { + if (response.Record.HasFlatTxId() && response.Record.GetFlatTxId().HasTxId()) { + Cout << "TxId: " << response.Record.GetFlatTxId().GetTxId() << Endl; + } else { + Cout << "TxId: not returned" << Endl; + } + } return 0; }); if (result != 0) { @@ -357,29 +357,29 @@ public: NKikimrSchemeOp::TBackupProgress backup = path.GetBackupProgress(); ui32 total = backup.GetTotal(); ui32 notYet = backup.GetNotCompleteYet(); - Cout << "backup in progress: " << (total - notYet) << "/" << total; - if (backup.HasTxId()) { - Cout << " txId: " << backup.GetTxId(); - } - Cout << Endl; + Cout << "backup in progress: " << (total - notYet) << "/" << total; + if (backup.HasTxId()) { + Cout << " txId: " << backup.GetTxId(); + } + Cout << Endl; } for (const auto& backupResult : path.GetLastBackupResult()) { Cout << "backup done: " << backupResult.GetCompleteTimeStamp() - << " txId: " << backupResult.GetTxId() - << " errors: " << backupResult.GetErrorCount(); - if (backupResult.ErrorsSize()) { - Cout << " errorsExplain: { "; - bool first = true; - for (const auto &shardError : backupResult.GetErrors()) { - Cout << shardError.GetExplain().Quote(); - if (!first) { - Cout << ", "; - } - first = false; - } - Cout << " }"; - } - Cout << Endl; + << " txId: " << backupResult.GetTxId() + << " errors: " << backupResult.GetErrorCount(); + if (backupResult.ErrorsSize()) { + Cout << " errorsExplain: { "; + bool first = true; + for (const auto &shardError : backupResult.GetErrors()) { + Cout << shardError.GetExplain().Quote(); + if (!first) { + Cout << ", "; + } + first = false; + } + Cout << " }"; + } + Cout << Endl; } } if (Protobuf) { diff --git a/ydb/core/driver_lib/cli_utils/cli_cmds_console.cpp b/ydb/core/driver_lib/cli_utils/cli_cmds_console.cpp index 816fa920db..0497d4ea8f 100644 --- a/ydb/core/driver_lib/cli_utils/cli_cmds_console.cpp +++ b/ydb/core/driver_lib/cli_utils/cli_cmds_console.cpp @@ -122,7 +122,7 @@ public: ConfigFiles["PQClusterDiscoveryConfig"] = "pqcd.txt"; ConfigFiles["NetClassifierConfig"] = "netclassifier.txt"; ConfigFiles["KeyConfig"] = "key.txt"; - ConfigFiles["PDiskKeyConfig"] = "pdisk_key.txt"; + ConfigFiles["PDiskKeyConfig"] = "pdisk_key.txt"; ConfigFiles["ClusterYamlConfig"] = "cluster.yaml"; } diff --git a/ydb/core/driver_lib/cli_utils/cli_cmds_disk.cpp b/ydb/core/driver_lib/cli_utils/cli_cmds_disk.cpp index efb3e29b35..a9be370d37 100644 --- a/ydb/core/driver_lib/cli_utils/cli_cmds_disk.cpp +++ b/ydb/core/driver_lib/cli_utils/cli_cmds_disk.cpp @@ -23,7 +23,7 @@ public: LockDevice = false; MainKey = 0; config.SetFreeArgsNum(1); - SetFreeArgTitle(0, "<PATH>", "Disk path"); + SetFreeArgTitle(0, "<PATH>", "Disk path"); config.Opts->AddLongOption('k', "main-key", "encryption main-key to use while reading").RequiredArgument("NUM") .Optional().StoreResult(&MainKey); // TODO: make required config.Opts->AddLongOption("master-key", "obsolete: use main-key").RequiredArgument("NUM") @@ -121,7 +121,7 @@ public: Guid = 0; IsErasureEncode = false; config.SetFreeArgsNum(1); - SetFreeArgTitle(0, "<PATH>", "Disk path"); + SetFreeArgTitle(0, "<PATH>", "Disk path"); config.Opts->AddLongOption('d', "disk-size", "disk size to set (supports K/M/G/T suffixes, 0 to autodetect, default = 0)\n" "kikimr needs disk of at least 16 GiB, disk must be large enough to contain at least 100 chunks") .OptionalArgument("BYTES").StoreResult(&DiskSize); @@ -174,36 +174,36 @@ public: } }; -class TClientCommandDiskObliterate : public TClientCommand { -public: - TClientCommandDiskObliterate() - : TClientCommand("obliterate", {}, "Obliterate local disk, so it will be self-formatted on startup") - {} - - TString Path; - - virtual void Config(TConfig& config) override { - TClientCommand::Config(config); - config.SetFreeArgsNum(1); - SetFreeArgTitle(0, "<PATH>", "Disk path"); - } - - virtual void Parse(TConfig& config) override { - TClientCommand::Parse(config); - Path = config.ParseResult->GetFreeArgs()[0]; - } - - virtual int Run(TConfig&) override { - try { - ObliterateDisk(Path); - } catch (TFileError& e) { - Cerr << "Error, what# " << e.what() << Endl; - return 1; - } - return 0; - } -}; - +class TClientCommandDiskObliterate : public TClientCommand { +public: + TClientCommandDiskObliterate() + : TClientCommand("obliterate", {}, "Obliterate local disk, so it will be self-formatted on startup") + {} + + TString Path; + + virtual void Config(TConfig& config) override { + TClientCommand::Config(config); + config.SetFreeArgsNum(1); + SetFreeArgTitle(0, "<PATH>", "Disk path"); + } + + virtual void Parse(TConfig& config) override { + TClientCommand::Parse(config); + Path = config.ParseResult->GetFreeArgs()[0]; + } + + virtual int Run(TConfig&) override { + try { + ObliterateDisk(Path); + } catch (TFileError& e) { + Cerr << "Error, what# " << e.what() << Endl; + return 1; + } + return 0; + } +}; + class TClientCommandDisk : public TClientCommandTree { public: TClientCommandDisk() diff --git a/ydb/core/driver_lib/cli_utils/cli_cmds_genconfig.cpp b/ydb/core/driver_lib/cli_utils/cli_cmds_genconfig.cpp index 7065fb30a1..af2801de36 100644 --- a/ydb/core/driver_lib/cli_utils/cli_cmds_genconfig.cpp +++ b/ydb/core/driver_lib/cli_utils/cli_cmds_genconfig.cpp @@ -270,12 +270,12 @@ public: pdiskItem.SetPath(pdiskInfo.Path); pdiskItem.SetPDiskGuid(pdiskInfo.PDiskGuid); - TPDiskCategory::EDeviceType deviceType = TPDiskCategory::DeviceTypeFromStr(pdiskInfo.Type); - if (deviceType == TPDiskCategory::DEVICE_TYPE_UNKNOWN) { + TPDiskCategory::EDeviceType deviceType = TPDiskCategory::DeviceTypeFromStr(pdiskInfo.Type); + if (deviceType == TPDiskCategory::DEVICE_TYPE_UNKNOWN) { ythrow yexception() << "invalid PDisk Type " << pdiskInfo.Type; } const ui64 kind = 0; - TPDiskCategory cat(deviceType, kind); + TPDiskCategory cat(deviceType, kind); pdiskItem.SetPDiskCategory(cat.GetRaw()); if (pdiskInfo.PDiskConfig) { diff --git a/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp b/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp index b49674a68a..6766dd3171 100644 --- a/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp +++ b/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp @@ -213,8 +213,8 @@ protected: config.Opts->AddLongOption("netclassifier-file", "NetClassifier config file").OptionalArgument("PATH"); config.Opts->AddLongOption("auth-file", "authorization configuration").OptionalArgument("PATH"); config.Opts->AddLongOption("auth-token-file", "authorization token configuration").OptionalArgument("PATH"); - config.Opts->AddLongOption("key-file", "tanant encryption key configuration").OptionalArgument("PATH"); - config.Opts->AddLongOption("pdisk-key-file", "pdisk encryption key configuration").OptionalArgument("PATH"); + config.Opts->AddLongOption("key-file", "tanant encryption key configuration").OptionalArgument("PATH"); + config.Opts->AddLongOption("pdisk-key-file", "pdisk encryption key configuration").OptionalArgument("PATH"); config.Opts->AddLongOption("sqs-file", "SQS config file").OptionalArgument("PATH"); config.Opts->AddLongOption("bootstrap-file", "Bootstrap config file").OptionalArgument("PATH"); config.Opts->AddLongOption("dyn-nodes-file", "Dynamic nodes config file").OptionalArgument("PATH"); @@ -222,7 +222,7 @@ protected: config.Opts->AddLongOption("alloc-file", "Allocator config file").OptionalArgument("PATH"); config.Opts->AddLongOption("yql-file", "Yql Analytics config file").OptionalArgument("PATH"); config.Opts->AddLongOption("yq-file", "Yandex Query config file").OptionalArgument("PATH"); - config.Opts->AddLongOption("feature-flags-file", "File with feature flags to turn new features on/off").OptionalArgument("PATH"); + config.Opts->AddLongOption("feature-flags-file", "File with feature flags to turn new features on/off").OptionalArgument("PATH"); config.Opts->AddLongOption("rb-file", "File with resource broker customizations").OptionalArgument("PATH"); config.Opts->AddLongOption("metering-file", "File with metering config").OptionalArgument("PATH"); config.Opts->AddLongOption('r', "restarts-count-file", "State for restarts monitoring counter,\nuse empty string to disable\n") @@ -427,7 +427,7 @@ protected: OPTION("auth-file", AuthConfig); OPTION_MERGE("auth-token-file", AuthConfig); OPTION("key-file", KeyConfig); - OPTION("pdisk-key-file", PDiskKeyConfig); + OPTION("pdisk-key-file", PDiskKeyConfig); OPTION("sqs-file", SqsConfig); OPTION("feature-flags-file", FeatureFlags); OPTION("rb-file", ResourceBrokerConfig); diff --git a/ydb/core/driver_lib/run/config.h b/ydb/core/driver_lib/run/config.h index 3a70989d98..faf1797413 100644 --- a/ydb/core/driver_lib/run/config.h +++ b/ydb/core/driver_lib/run/config.h @@ -13,7 +13,7 @@ namespace NKikimr { union TBasicKikimrServicesMask { struct { bool EnableBasicServices:1; - bool EnableIcbService:1; + bool EnableIcbService:1; bool EnableWhiteBoard:1; bool EnableBSNodeWarden:1; bool EnableStateStorageService:1; @@ -21,7 +21,7 @@ union TBasicKikimrServicesMask { bool EnableSharedCache:1; bool EnableBlobCache:1; bool EnableLogger:1; - bool EnableSchedulerActor:1; + bool EnableSchedulerActor:1; bool EnableProfiler:1; bool EnableResourceBroker:1; bool EnableTabletResolver:1; diff --git a/ydb/core/driver_lib/run/config_parser.cpp b/ydb/core/driver_lib/run/config_parser.cpp index ce4e16924f..bb68656291 100644 --- a/ydb/core/driver_lib/run/config_parser.cpp +++ b/ydb/core/driver_lib/run/config_parser.cpp @@ -133,10 +133,10 @@ void TRunCommandConfigParser::ParseConfigFiles(const NLastGetopt::TOptsParseResu Y_VERIFY(ParsePBFromFile(res.Get("grpc-file"), Config.AppConfig.MutableGRpcConfig())); } - if (res.Has("feature-flags-file")) { + if (res.Has("feature-flags-file")) { Y_VERIFY(ParsePBFromFile(res.Get("feature-flags-file"), Config.AppConfig.MutableFeatureFlags(), true)); - } - + } + if (res.Has("sqs-file")) { Y_VERIFY(ParsePBFromFile(res.Get("sqs-file"), Config.AppConfig.MutableSqsConfig())); } @@ -192,10 +192,10 @@ void TRunCommandConfigParser::ParseConfigFiles(const NLastGetopt::TOptsParseResu Y_VERIFY(ParsePBFromFile(res.Get("key-file"), Config.AppConfig.MutableKeyConfig())); } - if (res.Has("pdisk-key-file")) { - Y_VERIFY(ParsePBFromFile(res.Get("pdisk-key-file"), Config.AppConfig.MutablePDiskKeyConfig())); - } - + if (res.Has("pdisk-key-file")) { + Y_VERIFY(ParsePBFromFile(res.Get("pdisk-key-file"), Config.AppConfig.MutablePDiskKeyConfig())); + } + if (res.Has("alloc-file")) { Y_VERIFY(ParsePBFromFile(res.Get("alloc-file"), Config.AppConfig.MutableAllocatorConfig())); } else { @@ -249,7 +249,7 @@ void TRunCommandConfigParser::ParseRunOpts(int argc, char **argv) { SetupLastGetOptForConfigFiles(opts); opts.AddLongOption("bootstrap-file", "Bootstrap config file").OptionalArgument("PATH"); - opts.AddLongOption("feature-flags-file", "File with feature flags to turn new features on/off").OptionalArgument("PATH"); + opts.AddLongOption("feature-flags-file", "File with feature flags to turn new features on/off").OptionalArgument("PATH"); opts.AddLongOption('r', "restarts-count-file", "State for restarts monitoring counter,\nuse empty string to disable\n") .OptionalArgument("PATH").DefaultValue(RunOpts.RestartsCountFile).StoreResult(&RunOpts.RestartsCountFile); opts.AddLongOption("compile-inflight-limit", "Limit on parallel programs compilation").OptionalArgument("NUM").StoreResult(&RunOpts.CompileInflightLimit); diff --git a/ydb/core/driver_lib/run/factories.h b/ydb/core/driver_lib/run/factories.h index eb0be2ba89..41a45b44e0 100644 --- a/ydb/core/driver_lib/run/factories.h +++ b/ydb/core/driver_lib/run/factories.h @@ -38,9 +38,9 @@ struct TModuleFactories { // Can be nullptr. In that case there would be no ability to work with Yandex Logbroker in Yandex Query. NPq::NConfigurationManager::IConnections::TPtr PqCmConnections; // Export implementation for Data Shards - std::shared_ptr<NDataShard::IExportFactory> DataShardExportFactory; + std::shared_ptr<NDataShard::IExportFactory> DataShardExportFactory; // Factory for Simple queue services implementation details - std::shared_ptr<NSQS::IEventsWriterFactory> SqsEventsWriterFactory; + std::shared_ptr<NSQS::IEventsWriterFactory> SqsEventsWriterFactory; IActor*(*CreateTicketParser)(const NKikimrProto::TAuthConfig&); IActor*(*FolderServiceFactory)(const NKikimrProto::NFolderService::TFolderServiceConfig&); @@ -51,8 +51,8 @@ struct TModuleFactories { TGrpcServiceFactory GrpcServiceFactory; std::shared_ptr<NPQ::IPersQueueMirrorReaderFactory> PersQueueMirrorReaderFactory; - /// Factory for pdisk's aio engines - std::shared_ptr<NPDisk::IIoContextFactory> IoContextFactory; + /// Factory for pdisk's aio engines + std::shared_ptr<NPDisk::IIoContextFactory> IoContextFactory; std::function<NActors::TMon* (NActors::TMon::TConfig)> MonitoringFactory; std::shared_ptr<NSQS::IAuthFactory> SqsAuthFactory; diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp index c8f4af624e..819c1478d1 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp @@ -342,14 +342,14 @@ static TCpuManagerConfig CreateCpuManagerConfig(const NKikimrConfig::TActorSyste return cpuManager; } -static TSchedulerConfig CreateSchedulerConfig(const NKikimrConfig::TActorSystemConfig::TScheduler &config) { +static TSchedulerConfig CreateSchedulerConfig(const NKikimrConfig::TActorSystemConfig::TScheduler &config) { const ui64 resolution = config.HasResolution() ? config.GetResolution() : 1024; Y_VERIFY_DEBUG((resolution & (resolution - 1)) == 0); // resolution must be power of 2 const ui64 spinThreshold = config.HasSpinThreshold() ? config.GetSpinThreshold() : 0; const ui64 progressThreshold = config.HasProgressThreshold() ? config.GetProgressThreshold() : 10000; const bool useSchedulerActor = config.HasUseSchedulerActor() ? config.GetUseSchedulerActor() : false; - return TSchedulerConfig(resolution, spinThreshold, progressThreshold, useSchedulerActor); + return TSchedulerConfig(resolution, spinThreshold, progressThreshold, useSchedulerActor); } TBasicServicesInitializer::TBasicServicesInitializer(const TKikimrRunConfig& runConfig) @@ -762,27 +762,27 @@ void TBasicServicesInitializer::InitializeServices(NActors::TActorSystemSetup* s } } -// TImmediateControlBoardInitializer +// TImmediateControlBoardInitializer -TImmediateControlBoardInitializer::TImmediateControlBoardInitializer(const TKikimrRunConfig& runConfig) - : IKikimrServicesInitializer(runConfig) { -} - -void TImmediateControlBoardInitializer::InitializeServices(NActors::TActorSystemSetup* setup, - const NKikimr::TAppData* appData) { +TImmediateControlBoardInitializer::TImmediateControlBoardInitializer(const TKikimrRunConfig& runConfig) + : IKikimrServicesInitializer(runConfig) { +} + +void TImmediateControlBoardInitializer::InitializeServices(NActors::TActorSystemSetup* setup, + const NKikimr::TAppData* appData) { setup->LocalServices.push_back(std::pair<TActorId, TActorSetupCmd>( - MakeIcbId(NodeId), + MakeIcbId(NodeId), TActorSetupCmd(CreateImmediateControlActor(appData->Icb, appData->Counters), TMailboxType::ReadAsFilled, appData->UserPoolId) - )); + )); setup->LocalServices.push_back(std::pair<TActorId, TActorSetupCmd>( TActorId(), TActorSetupCmd(NConsole::CreateImmediateControlsConfigurator(appData->Icb, Config.GetImmediateControlsConfig()), TMailboxType::ReadAsFilled, appData->UserPoolId) )); -} - - +} + + // TBSNodeWardenInitializer TBSNodeWardenInitializer::TBSNodeWardenInitializer(const TKikimrRunConfig& runConfig) @@ -813,7 +813,7 @@ void TBSNodeWardenInitializer::InitializeServices(NActors::TActorSystemSetup* se ObtainTenantKey(&nodeWardenConfig->TenantKey, Config.GetKeyConfig()); ObtainStaticKey(&nodeWardenConfig->StaticKey); - ObtainPDiskKey(&nodeWardenConfig->PDiskKey, Config.GetPDiskKeyConfig()); + ObtainPDiskKey(&nodeWardenConfig->PDiskKey, Config.GetPDiskKeyConfig()); setup->LocalServices.push_back(std::pair<TActorId, TActorSetupCmd>(MakeBlobStorageNodeWardenID(NodeId), TActorSetupCmd(CreateBSNodeWarden(nodeWardenConfig.Release()), @@ -1047,23 +1047,23 @@ void TLoggerInitializer::InitializeServices( TActorSetupCmd(configurator, TMailboxType::HTSwap, appData->UserPoolId)); } -// TSchedulerActorInitializer - -TSchedulerActorInitializer::TSchedulerActorInitializer(const TKikimrRunConfig& runConfig) - : IKikimrServicesInitializer(runConfig) { -} - -void TSchedulerActorInitializer::InitializeServices( - NActors::TActorSystemSetup* setup, - const NKikimr::TAppData* appData) { - auto& systemConfig = Config.GetActorSystemConfig(); - NActors::IActor *schedulerActor = CreateSchedulerActor(CreateSchedulerConfig(systemConfig.GetScheduler())); - if (schedulerActor) { +// TSchedulerActorInitializer + +TSchedulerActorInitializer::TSchedulerActorInitializer(const TKikimrRunConfig& runConfig) + : IKikimrServicesInitializer(runConfig) { +} + +void TSchedulerActorInitializer::InitializeServices( + NActors::TActorSystemSetup* setup, + const NKikimr::TAppData* appData) { + auto& systemConfig = Config.GetActorSystemConfig(); + NActors::IActor *schedulerActor = CreateSchedulerActor(CreateSchedulerConfig(systemConfig.GetScheduler())); + if (schedulerActor) { NActors::TActorSetupCmd schedulerActorCmd(schedulerActor, NActors::TMailboxType::ReadAsFilled, appData->SystemPoolId); setup->LocalServices.emplace_back(MakeSchedulerActorId(), schedulerActorCmd); - } -} - + } +} + // TProfilerInitializer TProfilerInitializer::TProfilerInitializer(const TKikimrRunConfig& runConfig) @@ -1773,7 +1773,7 @@ TLoadInitializer::TLoadInitializer(const TKikimrRunConfig& runConfig) {} void TLoadInitializer::InitializeServices(NActors::TActorSystemSetup *setup, const NKikimr::TAppData *appData) { - IActor *actor = CreateTestLoadActor(appData->Counters); + IActor *actor = CreateTestLoadActor(appData->Counters); setup->LocalServices.emplace_back(MakeBlobStorageLoadID(NodeId), TActorSetupCmd(actor, TMailboxType::HTSwap, appData->UserPoolId)); // FIXME: correct service id } diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.h b/ydb/core/driver_lib/run/kikimr_services_initializers.h index 6e8422955c..407ce1bb7b 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.h +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.h @@ -55,13 +55,13 @@ public: void InitializeServices(NActors::TActorSystemSetup *setup, const NKikimr::TAppData *appData) override; }; -class TImmediateControlBoardInitializer : public IKikimrServicesInitializer { -public: - TImmediateControlBoardInitializer(const TKikimrRunConfig& runConfig); - +class TImmediateControlBoardInitializer : public IKikimrServicesInitializer { +public: + TImmediateControlBoardInitializer(const TKikimrRunConfig& runConfig); + void InitializeServices(NActors::TActorSystemSetup *setup, const NKikimr::TAppData *appData) override; -}; - +}; + class TBSNodeWardenInitializer : public IKikimrServicesInitializer { public: TBSNodeWardenInitializer(const TKikimrRunConfig& runConfig); @@ -111,13 +111,13 @@ public: void InitializeServices(NActors::TActorSystemSetup *setup, const NKikimr::TAppData *appData) override; }; -class TSchedulerActorInitializer : public IKikimrServicesInitializer { -public: - TSchedulerActorInitializer(const TKikimrRunConfig& runConfig); - +class TSchedulerActorInitializer : public IKikimrServicesInitializer { +public: + TSchedulerActorInitializer(const TKikimrRunConfig& runConfig); + void InitializeServices(NActors::TActorSystemSetup *setup, const NKikimr::TAppData *appData) override; -}; - +}; + class TProfilerInitializer : public IKikimrServicesInitializer { public: TProfilerInitializer(const TKikimrRunConfig& runConfig); diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp index d56380a889..a4f74aa4e0 100644 --- a/ydb/core/driver_lib/run/run.cpp +++ b/ydb/core/driver_lib/run/run.cpp @@ -25,7 +25,7 @@ #include <ydb/core/actorlib_impl/mad_squirrel.h> #include <ydb/core/control/immediate_control_board_actor.h> - + #include <library/cpp/actors/protos/services_common.pb.h> #include <ydb/core/keyvalue/keyvalue.h> #include <ydb/core/formats/clickhouse_block.h> @@ -391,13 +391,13 @@ void TKikimrRunner::InitializeMonitoringLogin(const TKikimrRunConfig&) } } -void TKikimrRunner::InitializeControlBoard(const TKikimrRunConfig& runConfig) -{ - if (Monitoring) { - Monitoring->RegisterActorPage(ActorsMonPage, "icb", "Immediate Control Board", false, ActorSystem.Get(), MakeIcbId(runConfig.NodeId)); - } -} - +void TKikimrRunner::InitializeControlBoard(const TKikimrRunConfig& runConfig) +{ + if (Monitoring) { + Monitoring->RegisterActorPage(ActorsMonPage, "icb", "Immediate Control Board", false, ActorSystem.Get(), MakeIcbId(runConfig.NodeId)); + } +} + void TKikimrRunner::InitializeMessageBus( const TKikimrRunConfig& runConfig, std::shared_ptr<TModuleFactories> factories @@ -850,10 +850,10 @@ void TKikimrRunner::InitializeAppData(const TKikimrRunConfig& runConfig) FunctionRegistry.Get(), FormatFactory.Get(), &KikimrShouldContinue)); - AppData->DataShardExportFactory = ModuleFactories ? ModuleFactories->DataShardExportFactory.get() : nullptr; - AppData->SqsEventsWriterFactory = ModuleFactories ? ModuleFactories->SqsEventsWriterFactory.get() : nullptr; + AppData->DataShardExportFactory = ModuleFactories ? ModuleFactories->DataShardExportFactory.get() : nullptr; + AppData->SqsEventsWriterFactory = ModuleFactories ? ModuleFactories->SqsEventsWriterFactory.get() : nullptr; AppData->PersQueueMirrorReaderFactory = ModuleFactories ? ModuleFactories->PersQueueMirrorReaderFactory.get() : nullptr; - AppData->IoContextFactory = ModuleFactories ? ModuleFactories->IoContextFactory.get() : nullptr; + AppData->IoContextFactory = ModuleFactories ? ModuleFactories->IoContextFactory.get() : nullptr; AppData->SqsAuthFactory = ModuleFactories ? ModuleFactories->SqsAuthFactory.get() @@ -898,10 +898,10 @@ void TKikimrRunner::InitializeAppData(const TKikimrRunConfig& runConfig) AppData->KeyConfig.CopyFrom(runConfig.AppConfig.GetKeyConfig()); } - if (runConfig.AppConfig.HasPDiskKeyConfig()) { - AppData->PDiskKeyConfig.CopyFrom(runConfig.AppConfig.GetPDiskKeyConfig()); - } - + if (runConfig.AppConfig.HasPDiskKeyConfig()) { + AppData->PDiskKeyConfig.CopyFrom(runConfig.AppConfig.GetPDiskKeyConfig()); + } + if (runConfig.AppConfig.HasHiveConfig()) { AppData->HiveConfig.CopyFrom(runConfig.AppConfig.GetHiveConfig()); } @@ -1141,9 +1141,9 @@ TIntrusivePtr<TServiceInitializersList> TKikimrRunner::CreateServiceInitializers if (serviceMask.EnableBasicServices) { sil->AddServiceInitializer(new TBasicServicesInitializer(runConfig)); } - if (serviceMask.EnableIcbService) { - sil->AddServiceInitializer(new TImmediateControlBoardInitializer(runConfig)); - } + if (serviceMask.EnableIcbService) { + sil->AddServiceInitializer(new TImmediateControlBoardInitializer(runConfig)); + } if (serviceMask.EnableWhiteBoard) { sil->AddServiceInitializer(new TWhiteBoardServiceInitializer(runConfig)); } @@ -1171,9 +1171,9 @@ TIntrusivePtr<TServiceInitializersList> TKikimrRunner::CreateServiceInitializers if (serviceMask.EnableLogger) { sil->AddServiceInitializer(new TLoggerInitializer(runConfig, LogSettings, LogBackend)); } - if (serviceMask.EnableSchedulerActor) { - sil->AddServiceInitializer(new TSchedulerActorInitializer(runConfig)); - } + if (serviceMask.EnableSchedulerActor) { + sil->AddServiceInitializer(new TSchedulerActorInitializer(runConfig)); + } if (serviceMask.EnableProfiler) { sil->AddServiceInitializer(new TProfilerInitializer(runConfig)); } @@ -1486,8 +1486,8 @@ void TKikimrRunner::KikimrStop(bool graceful) { } if (ModuleFactories) { - if (ModuleFactories->DataShardExportFactory) { - ModuleFactories->DataShardExportFactory->Shutdown(); + if (ModuleFactories->DataShardExportFactory) { + ModuleFactories->DataShardExportFactory->Shutdown(); } } } diff --git a/ydb/core/driver_lib/run/run.h b/ydb/core/driver_lib/run/run.h index 98a333734b..dfd93a24e7 100644 --- a/ydb/core/driver_lib/run/run.h +++ b/ydb/core/driver_lib/run/run.h @@ -77,8 +77,8 @@ protected: void InitializeMonitoring(const TKikimrRunConfig& runConfig, bool includeHostName = true); - void InitializeControlBoard(const TKikimrRunConfig& runConfig); - + void InitializeControlBoard(const TKikimrRunConfig& runConfig); + void InitializeMonitoringLogin(const TKikimrRunConfig& runConfig); void InitializeMessageBus( diff --git a/ydb/core/erasure/erasure.cpp b/ydb/core/erasure/erasure.cpp index 156e61259d..a41b027932 100644 --- a/ydb/core/erasure/erasure.cpp +++ b/ydb/core/erasure/erasure.cpp @@ -1,12 +1,12 @@ #include "erasure.h" #include <util/generic/yexception.h> -#include <util/system/unaligned_mem.h> +#include <util/system/unaligned_mem.h> #include <library/cpp/containers/stack_vector/stack_vec.h> #include <library/cpp/digest/crc32c/crc32c.h> #define MAX_TOTAL_PARTS 8 -#define MAX_LINES_IN_BLOCK 8 +#define MAX_LINES_IN_BLOCK 8 #define IS_VERBOSE 0 #define IS_TRACE 0 @@ -28,9 +28,9 @@ static TString DebugFormatBits(ui64 value) { return s.Str(); } #else -# define VERBOSE_COUT(a) \ - do { \ - } while (false) +# define VERBOSE_COUT(a) \ + do { \ + } while (false) #endif #if IS_TRACE @@ -100,14 +100,14 @@ static const std::array<TErasureParameters, TErasureType::ErasureSpeciesCount> E ,{TErasureType::ErasureParityStripe, 3, 2, 3} // 7 = ErasureSpicies::Erasure3Plus2Stipe ,{TErasureType::ErasureMirror, 1, 2, 1} // 8 = ErasureSpicies::ErasureMirror3Plus2 ,{TErasureType::ErasureMirror, 1, 2, 1} // 9 = ErasureSpicies::ErasureMirror3dc - ,{TErasureType::ErasureParityBlock, 4, 3, 5} // 10 = ErasureSpicies::Erasure4Plus3Block - ,{TErasureType::ErasureParityStripe, 4, 3, 5} // 11 = ErasureSpicies::Erasure4Plus3Stripe - ,{TErasureType::ErasureParityBlock, 3, 3, 3} // 12 = ErasureSpicies::Erasure3Plus3Block - ,{TErasureType::ErasureParityStripe, 3, 3, 3} // 13 = ErasureSpicies::Erasure3Plus3Stripe - ,{TErasureType::ErasureParityBlock, 2, 3, 3} // 14 = ErasureSpicies::Erasure2Plus3Block - ,{TErasureType::ErasureParityStripe, 2, 3, 3} // 15 = ErasureSpicies::Erasure2Plus3Stripe - ,{TErasureType::ErasureParityBlock, 2, 2, 3} // 16 = ErasureSpicies::Erasure2Plus2Block - ,{TErasureType::ErasureParityStripe, 2, 2, 3} // 17 = ErasureSpicies::Erasure2Plus2Stripe + ,{TErasureType::ErasureParityBlock, 4, 3, 5} // 10 = ErasureSpicies::Erasure4Plus3Block + ,{TErasureType::ErasureParityStripe, 4, 3, 5} // 11 = ErasureSpicies::Erasure4Plus3Stripe + ,{TErasureType::ErasureParityBlock, 3, 3, 3} // 12 = ErasureSpicies::Erasure3Plus3Block + ,{TErasureType::ErasureParityStripe, 3, 3, 3} // 13 = ErasureSpicies::Erasure3Plus3Stripe + ,{TErasureType::ErasureParityBlock, 2, 3, 3} // 14 = ErasureSpicies::Erasure2Plus3Block + ,{TErasureType::ErasureParityStripe, 2, 3, 3} // 15 = ErasureSpicies::Erasure2Plus3Stripe + ,{TErasureType::ErasureParityBlock, 2, 2, 3} // 16 = ErasureSpicies::Erasure2Plus2Block + ,{TErasureType::ErasureParityStripe, 2, 2, 3} // 17 = ErasureSpicies::Erasure2Plus2Stripe ,{TErasureType::ErasureMirror, 1, 2, 1} // 18 = ErasureSpicies::ErasureMirror3of4 }}; @@ -120,24 +120,24 @@ void PadAndCrcAtTheEnd(char *data, ui64 dataSize, ui64 bufferSize) { memcpy(data + bufferSize - sizeof(ui32), &hash, sizeof(ui32)); } -bool CheckCrcAtTheEnd(TErasureType::ECrcMode crcMode, const TString& buf) { - switch (crcMode) { - case TErasureType::CrcModeNone: - return true; - case TErasureType::CrcModeWholePart: +bool CheckCrcAtTheEnd(TErasureType::ECrcMode crcMode, const TString& buf) { + switch (crcMode) { + case TErasureType::CrcModeNone: + return true; + case TErasureType::CrcModeWholePart: if (buf.size() == 0) { - return true; - } else { + return true; + } else { Y_VERIFY(buf.size() > sizeof(ui32), "Error in CheckWholeBlobCrc: blob part size# %" PRIu64 " is less then crcSize# %" PRIu64, (ui64)buf.size(), (ui64)sizeof(ui32)); ui32 crc = Crc32c(buf.data(), buf.size() - sizeof(ui32)); ui32 expectedCrc = ReadUnaligned<ui32>(buf.data() + buf.size() - sizeof(ui32)); - return crc == expectedCrc; - } - } - ythrow TWithBackTrace<yexception>() << "Unknown crcMode = " << (i32)crcMode; -} - + return crc == expectedCrc; + } + } + ythrow TWithBackTrace<yexception>() << "Unknown crcMode = " << (i32)crcMode; +} + class TBlockParams { public: ui64 DataSize; @@ -286,7 +286,7 @@ public: } #if IS_VERBOSE -# define VERBOSE_COUT_BLOCK(IS_FULL_DATA, FULL_DATA_ELEM, PART_ELEM, COL_M, COL_M1) \ +# define VERBOSE_COUT_BLOCK(IS_FULL_DATA, FULL_DATA_ELEM, PART_ELEM, COL_M, COL_M1) \ do { \ for (ui32 row = 0; row < LineCount; ++row) { \ VERBOSE_COUT(Endl); \ @@ -302,37 +302,37 @@ public: } \ VERBOSE_COUT(Endl); \ } while (false) -# define VERBOSE_COUT_BLOCK_M2(IS_FULL_DATA, FULL_DATA_ELEM, PART_ELEM, COL_M, COL_M1, COL_M2) \ - do { \ - for (ui32 row = 0; row < LineCount; ++row) { \ - VERBOSE_COUT(Endl); \ - for (ui32 col = 0; col < DataParts; ++col) { \ - if (IS_FULL_DATA) { \ - VERBOSE_COUT(DebugFormatBits(FULL_DATA_ELEM(row, col)) << ", "); \ - } else { \ - VERBOSE_COUT(DebugFormatBits(PART_ELEM(row, col)) << ", "); \ - } \ - } \ - VERBOSE_COUT(DebugFormatBits(COL_M(row)) << ", "); \ - VERBOSE_COUT(DebugFormatBits(COL_M1(row)) << ", "); \ - VERBOSE_COUT(DebugFormatBits(COL_M2(row))); \ - } \ - VERBOSE_COUT(Endl); \ - } while (false) +# define VERBOSE_COUT_BLOCK_M2(IS_FULL_DATA, FULL_DATA_ELEM, PART_ELEM, COL_M, COL_M1, COL_M2) \ + do { \ + for (ui32 row = 0; row < LineCount; ++row) { \ + VERBOSE_COUT(Endl); \ + for (ui32 col = 0; col < DataParts; ++col) { \ + if (IS_FULL_DATA) { \ + VERBOSE_COUT(DebugFormatBits(FULL_DATA_ELEM(row, col)) << ", "); \ + } else { \ + VERBOSE_COUT(DebugFormatBits(PART_ELEM(row, col)) << ", "); \ + } \ + } \ + VERBOSE_COUT(DebugFormatBits(COL_M(row)) << ", "); \ + VERBOSE_COUT(DebugFormatBits(COL_M1(row)) << ", "); \ + VERBOSE_COUT(DebugFormatBits(COL_M2(row))); \ + } \ + VERBOSE_COUT(Endl); \ + } while (false) #else -# define VERBOSE_COUT_BLOCK(IS_FULL_DATA, FULL_DATA_ELEM, PART_ELEM, COL_M, COL_M1) \ - do { \ - } while (false) -# define VERBOSE_COUT_BLOCK_M2(IS_FULL_DATA, FULL_DATA_ELEM, PART_ELEM, COL_M, COL_M1, COL_M2) \ - do { \ - } while (false) +# define VERBOSE_COUT_BLOCK(IS_FULL_DATA, FULL_DATA_ELEM, PART_ELEM, COL_M, COL_M1) \ + do { \ + } while (false) +# define VERBOSE_COUT_BLOCK_M2(IS_FULL_DATA, FULL_DATA_ELEM, PART_ELEM, COL_M, COL_M1, COL_M2) \ + do { \ + } while (false) #endif - + template <bool isStripe, bool isFromDataParts> void EoSplitWhole(char *data, TBufferDataPart &bufferDataPart, TDataPartSet &outPartSet, ui64 writePosition, ui64 firstBlock, ui64 lastBlock) { - const ui32 lastPartIdx = DataParts + 1; - const ui32 m = Prime; + const ui32 lastPartIdx = DataParts + 1; + const ui32 m = Prime; for (ui64 blockIdx = firstBlock; blockIdx != lastBlock; ++blockIdx) { #define IN_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + (row)) * DataParts + (column)) @@ -349,128 +349,128 @@ public: } else { VERBOSE_COUT_BLOCK(true, IN_EL_BLOCK, IN_EL_BLOCK, OUT_M, OUT_M1); } - ui64 adj = 0; - const ui32 mint = (m - 2 < LineCount ? 1 : m - 2 - LineCount); - VERBOSE_COUT("mint = " << mint << " m - 1 - t = " << (m - 1 - mint) << Endl); - for (ui32 t = mint; t < DataParts; ++t) { + ui64 adj = 0; + const ui32 mint = (m - 2 < LineCount ? 1 : m - 2 - LineCount); + VERBOSE_COUT("mint = " << mint << " m - 1 - t = " << (m - 1 - mint) << Endl); + for (ui32 t = mint; t < DataParts; ++t) { adj ^= IN_EL(m - 1 - t, t); - VERBOSE_COUT("s: " << adj << " el[" << (m - 1 - t) << ", " << t << "]: " << + VERBOSE_COUT("s: " << adj << " el[" << (m - 1 - t) << ", " << t << "]: " << DebugFormatBits(IN_EL(m - 1 - t, t)) << Endl); - } + } for (ui32 l = 0; l < LineCount; ++l) { ui64 sourceData = IN_EL(l, 0); - OUT_M1(l) = adj ^ sourceData; - OUT_M(l) = sourceData; + OUT_M1(l) = adj ^ sourceData; + OUT_M(l) = sourceData; if (!isFromDataParts) { OUT_EL(l, 0) = sourceData; } - } - for (ui32 t = 1; t < DataParts; ++t) { - for (ui32 l = 0; l < LineCount; ++l) { + } + for (ui32 t = 1; t < DataParts; ++t) { + for (ui32 l = 0; l < LineCount; ++l) { ui64 sourceData = IN_EL(l, t); - OUT_M(l) ^= sourceData; + OUT_M(l) ^= sourceData; if (!isFromDataParts) { OUT_EL(l, t) = sourceData; } - VERBOSE_COUT("OUT_M(" << l << ") = " << DebugFormatBits(OUT_M(l)) << Endl); + VERBOSE_COUT("OUT_M(" << l << ") = " << DebugFormatBits(OUT_M(l)) << Endl); } } - for (ui32 t = 1; t < DataParts; ++t) { - for (ui32 l = 0; l < LineCount - t; ++l) { - ui32 row = l + t; + for (ui32 t = 1; t < DataParts; ++t) { + for (ui32 l = 0; l < LineCount - t; ++l) { + ui32 row = l + t; OUT_M1(row) ^= IN_EL(l, t); VERBOSE_COUT(DebugFormatBits(IN_EL(row, t)) << Endl); - } - for (ui32 l = LineCount - t + 1; l < LineCount; ++l) { - ui32 row = l + t - m; + } + for (ui32 l = LineCount - t + 1; l < LineCount; ++l) { + ui32 row = l + t - m; OUT_M1(row) ^= IN_EL(l, t); VERBOSE_COUT(DebugFormatBits(IN_EL(row, t)) << Endl); - } - } - VERBOSE_COUT_BLOCK(true, OUT_EL, OUT_EL, OUT_M, OUT_M1); + } + } + VERBOSE_COUT_BLOCK(true, OUT_EL, OUT_EL, OUT_M, OUT_M1); #undef IN_EL -#undef OUT_M1 -#undef OUT_M -#undef OUT_EL -#undef IN_EL_BLOCK -#undef IN_EL_STRIPE - writePosition += ColumnSize; - } - } - +#undef OUT_M1 +#undef OUT_M +#undef OUT_EL +#undef IN_EL_BLOCK +#undef IN_EL_STRIPE + writePosition += ColumnSize; + } + } + template <bool isStripe, bool isFromDataParts> void StarSplitWhole(char *data, TBufferDataPart &bufferDataPart, TDataPartSet &outPartSet, ui64 writePosition, ui32 blocks) { - const ui32 m = Prime; -#define IN_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + (row)) * DataParts + (column)) -#define IN_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + (row)] + const ui32 m = Prime; +#define IN_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + (row)) * DataParts + (column)) +#define IN_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + (row)] #define IN_EL_SB(row, column) (isStripe ? IN_EL_STRIPE(row, column) : IN_EL_BLOCK(row, column)) #define OUT_EL(row, column) *((ui64*)(outPartSet.Parts[column].GetDataAt(writePosition + (row) * sizeof(ui64)))) #define IN_EL(row, column) (isFromDataParts ? OUT_EL(row, column) : IN_EL_SB(row, column)) #define OUT_M(row) *((ui64*)(outPartSet.Parts[DataParts].GetDataAt(writePosition + (row) * sizeof(ui64)))) #define OUT_M1(row) *((ui64*)(outPartSet.Parts[DataParts + 1].GetDataAt(writePosition + (row) * sizeof(ui64)))) #define OUT_M2(row) *((ui64*)(outPartSet.Parts[DataParts + 2].GetDataAt(writePosition + (row) * sizeof(ui64)))) - for (ui64 blockIdx = 0; blockIdx < blocks; ++blockIdx) { - if (isStripe) { - VERBOSE_COUT_BLOCK(true, IN_EL_STRIPE, IN_EL_STRIPE, OUT_M, OUT_M1); - } else { - VERBOSE_COUT_BLOCK(true, IN_EL_BLOCK, IN_EL_BLOCK, OUT_M, OUT_M1); - } - ui64 s1 = 0; + for (ui64 blockIdx = 0; blockIdx < blocks; ++blockIdx) { + if (isStripe) { + VERBOSE_COUT_BLOCK(true, IN_EL_STRIPE, IN_EL_STRIPE, OUT_M, OUT_M1); + } else { + VERBOSE_COUT_BLOCK(true, IN_EL_BLOCK, IN_EL_BLOCK, OUT_M, OUT_M1); + } + ui64 s1 = 0; const ui32 mint = (m - 2 < LineCount ? 1 : m - 2 - LineCount); VERBOSE_COUT("mint = " << mint << " m - 1 - t = " << (m - 1 - mint) << Endl); for (ui32 t = mint; t < DataParts; ++t) { - s1 ^= IN_EL(m - 1 - t, t); - VERBOSE_COUT("s1: " << s1 << " el[" << (m - 1 - t) << ", " << t << "]: " << - DebugFormatBits(isStripe ? IN_EL_STRIPE(m - 1 - t, t): IN_EL_BLOCK(m - 1 - t, t)) << Endl); - } - ui64 s2 = 0; - for (ui32 t = 1; t < DataParts; ++t) { - s2 ^= IN_EL(t - 1, t); - VERBOSE_COUT("s2: " << s2 << " el[" << (t - 1) << ", " << t << "]: " << - DebugFormatBits(IN_EL(t - 1, t)) << Endl); - } + s1 ^= IN_EL(m - 1 - t, t); + VERBOSE_COUT("s1: " << s1 << " el[" << (m - 1 - t) << ", " << t << "]: " << + DebugFormatBits(isStripe ? IN_EL_STRIPE(m - 1 - t, t): IN_EL_BLOCK(m - 1 - t, t)) << Endl); + } + ui64 s2 = 0; + for (ui32 t = 1; t < DataParts; ++t) { + s2 ^= IN_EL(t - 1, t); + VERBOSE_COUT("s2: " << s2 << " el[" << (t - 1) << ", " << t << "]: " << + DebugFormatBits(IN_EL(t - 1, t)) << Endl); + } for (ui32 l = 0; l < LineCount; ++l) { - ui64 dataIN_EL = IN_EL(l, 0); - OUT_M(l) = dataIN_EL; - OUT_M1(l) = s1 ^ dataIN_EL; - OUT_M2(l) = s2 ^ dataIN_EL; + ui64 dataIN_EL = IN_EL(l, 0); + OUT_M(l) = dataIN_EL; + OUT_M1(l) = s1 ^ dataIN_EL; + OUT_M2(l) = s2 ^ dataIN_EL; if (!isFromDataParts) { OUT_EL(l, 0) = dataIN_EL; } - } - for (ui32 t = 1; t < DataParts; ++t) { - for (ui32 l = 0; l < LineCount; ++l) { - ui64 dataIN_EL = IN_EL(l, t); - ui32 row1 = (l + t) % m; - OUT_M(l) ^= dataIN_EL; - if (row1 < LineCount) { - OUT_M1(row1) ^= dataIN_EL; - VERBOSE_COUT(IN_EL(row1, t) << Endl); + } + for (ui32 t = 1; t < DataParts; ++t) { + for (ui32 l = 0; l < LineCount; ++l) { + ui64 dataIN_EL = IN_EL(l, t); + ui32 row1 = (l + t) % m; + OUT_M(l) ^= dataIN_EL; + if (row1 < LineCount) { + OUT_M1(row1) ^= dataIN_EL; + VERBOSE_COUT(IN_EL(row1, t) << Endl); + } + ui32 row2 = (m + l - t) % m; + if (row2 < LineCount) { + OUT_M2(row2) ^= dataIN_EL; + VERBOSE_COUT(IN_EL(row2, t) << Endl); } - ui32 row2 = (m + l - t) % m; - if (row2 < LineCount) { - OUT_M2(row2) ^= dataIN_EL; - VERBOSE_COUT(IN_EL(row2, t) << Endl); - } if (!isFromDataParts) { OUT_EL(l, t) = dataIN_EL; } } } -#if IS_VERBOSE - for (ui32 l = 0; l < LineCount; ++l) { - VERBOSE_COUT("OUT_M1(" << l << ") = " << DebugFormatBits(OUT_M1(l)) << Endl); - } - VERBOSE_COUT_BLOCK_M2(true, OUT_EL, OUT_EL, OUT_M, OUT_M1, OUT_M2); -#endif - writePosition += ColumnSize; - } -#undef OUT_M2 +#if IS_VERBOSE + for (ui32 l = 0; l < LineCount; ++l) { + VERBOSE_COUT("OUT_M1(" << l << ") = " << DebugFormatBits(OUT_M1(l)) << Endl); + } + VERBOSE_COUT_BLOCK_M2(true, OUT_EL, OUT_EL, OUT_M, OUT_M1, OUT_M2); +#endif + writePosition += ColumnSize; + } +#undef OUT_M2 #undef OUT_M1 #undef OUT_M #undef OUT_EL -#undef IN_EL +#undef IN_EL #undef IN_EL_BLOCK #undef IN_EL_STRIPE } @@ -527,23 +527,23 @@ public: } template <bool isStripe, bool isFromDataParts> - void StarSplit(TDataPartSet &outPartSet) { - // Use all whole columns of all the parts + void StarSplit(TDataPartSet &outPartSet) { + // Use all whole columns of all the parts StarSplitWhole<isStripe, isFromDataParts>(Data, BufferDataPart, outPartSet, 0ull, WholeBlocks); - - // Use the remaining parts to fill in the last block - // Write the tail of the data - if (TailSize) { + + // Use the remaining parts to fill in the last block + // Write the tail of the data + if (TailSize) { char lastBlockSource[MAX_TOTAL_PARTS * (MAX_TOTAL_PARTS - 2) * sizeof(ui64)] = {}; TBufferDataPart bufferDataPart; if (!isFromDataParts) { PrepareLastBlockData<isStripe>(lastBlockSource, bufferDataPart); } - + StarSplitWhole<isStripe, isFromDataParts>(lastBlockSource, bufferDataPart, outPartSet, WholeBlocks * ColumnSize, 1); - } - } - + } + } + template <bool isStripe, bool isFromDataParts, bool isIncremental = false> void EoSplit(TDataPartSet &outPartSet) { ui64 readPosition = isIncremental? ColumnSize * outPartSet.CurBlockIdx: 0; @@ -621,24 +621,24 @@ public: template <bool isStripe, bool restoreParts, bool restoreFullData, bool reversed, bool restoreParityParts> void EoDiagonalRestorePartWhole(char *data, TBufferDataPart &bufferDataPart, TDataPartSet &partSet, ui64 readPosition, ui32 beginBlockIdx, ui32 endBlockIdx, ui32 missingDataPartIdx) { - ui32 lastColumn = reversed ? DataParts + 2 : DataParts + 1; - const ui32 m = Prime; + ui32 lastColumn = reversed ? DataParts + 2 : DataParts + 1; + const ui32 m = Prime; // Use all whole columns of all the parts for (ui64 blockIdx = beginBlockIdx; blockIdx < endBlockIdx; ++blockIdx) { -#define RIGHT_ROW(row) (reversed ? LineCount - 1 - (row) : (row)) -#define OUT_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + RIGHT_ROW(row)] -#define OUT_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + RIGHT_ROW(row)) * DataParts + (column)) +#define RIGHT_ROW(row) (reversed ? LineCount - 1 - (row) : (row)) +#define OUT_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + RIGHT_ROW(row)] +#define OUT_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + RIGHT_ROW(row)) * DataParts + (column)) #define IN_EL(row, column) *((ui64*)(partSet.Parts[column].GetDataAt(readPosition + RIGHT_ROW(row) * sizeof(ui64)))) #define IN_M(row) *((ui64*)(partSet.Parts[DataParts].GetDataAt(readPosition + RIGHT_ROW(row) * sizeof(ui64)))) #define IN_M12(row) *((ui64*)(partSet.Parts[lastColumn].GetDataAt(readPosition + RIGHT_ROW(row) * sizeof(ui64)))) - VERBOSE_COUT_BLOCK(true, IN_EL, IN_EL, IN_M, IN_M12); + VERBOSE_COUT_BLOCK(true, IN_EL, IN_EL, IN_M, IN_M12); ui64 s = 0; ui32 colLimit = DataParts; ui32 rowLimit = LineCount; { ui32 idx = (m + missingDataPartIdx - 1) % m; if (idx < rowLimit) { - s = IN_M12(idx); + s = IN_M12(idx); VERBOSE_COUT("s(" << idx << ", m1): " << DebugFormatBits(s) << Endl); } } @@ -693,7 +693,7 @@ public: ui32 idx = (m + k + missingDataPartIdx) % m; if (idx < LineCount) { VERBOSE_COUT("idx = " << idx); - res ^= IN_M12(idx); // This is missing in the article! + res ^= IN_M12(idx); // This is missing in the article! } if (restoreFullData) { VERBOSE_COUT("out [" << k << ", " << missingDataPartIdx << "] = " << DebugFormatBits(res) << Endl); @@ -711,21 +711,21 @@ public: tmp ^= IN_EL(k, l); } IN_M(k) = tmp; - } + } } } if (isStripe) { - VERBOSE_COUT_BLOCK(restoreFullData, OUT_EL_STRIPE, IN_EL, IN_M, IN_M12); + VERBOSE_COUT_BLOCK(restoreFullData, OUT_EL_STRIPE, IN_EL, IN_M, IN_M12); } else { - VERBOSE_COUT_BLOCK(restoreFullData, OUT_EL_BLOCK, IN_EL, IN_M, IN_M12); + VERBOSE_COUT_BLOCK(restoreFullData, OUT_EL_BLOCK, IN_EL, IN_M, IN_M12); } -#undef IN_M12 +#undef IN_M12 #undef IN_M #undef IN_EL #undef OUT_EL_BLOCK #undef OUT_EL_STRIPE -#undef RIGHT_ROW - +#undef RIGHT_ROW + readPosition += ColumnSize; } } @@ -758,7 +758,7 @@ public: PrepareLastBlockPointers<isStripe>(lastBlock, bufferDataPart); EoDiagonalRestorePartWhole<isStripe, restoreParts, restoreFullData, reversed, restoreParityParts>(lastBlock, bufferDataPart, - partSet, WholeBlocks * ColumnSize, 0, 1, missingDataPartIdx); + partSet, WholeBlocks * ColumnSize, 0, 1, missingDataPartIdx); if (restoreFullData) { PlaceLastBlock<isStripe>(bufferDataPart, lastBlock); @@ -771,254 +771,254 @@ public: template <bool isStripe, bool restoreParts, bool restoreFullData, bool restoreParityParts> void StarMainRestorePartsWholeSymmetric(char *data, TBufferDataPart &bufferDataPart, TDataPartSet& partSet, - ui64 readPosition, ui32 endBlockIdx, ui32 missingDataPartIdxA, ui32 missingDataPartIdxB, - ui32 missingDataPartIdxC) { - VERBOSE_COUT("Start of StarMainRestorePartsWholeSymmetric for blocks " << missingDataPartIdxA - << " " << missingDataPartIdxB << " " <<missingDataPartIdxC << Endl); - // Notation used in this function is taken from article - // Cheng Huang, Lihao Xu (2005, 4th USENIX Conf.) - STAR: An Efficient Coding Scheme... - ui64 readPositionStart = readPosition; - const ui32 m = Prime; - const ui32 r = missingDataPartIdxA; - const ui32 s = missingDataPartIdxB; - const ui32 t = missingDataPartIdxC; - const ui32 dr = (m + s - r) % m; - // Use all whole columns of all the parts -#define OUT_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + (row)] -#define OUT_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + (row)) * DataParts + (column)) -#define OUT_EL(row, column) (isStripe ? OUT_EL_STRIPE(row, column) : OUT_EL_BLOCK(row, column)) + ui64 readPosition, ui32 endBlockIdx, ui32 missingDataPartIdxA, ui32 missingDataPartIdxB, + ui32 missingDataPartIdxC) { + VERBOSE_COUT("Start of StarMainRestorePartsWholeSymmetric for blocks " << missingDataPartIdxA + << " " << missingDataPartIdxB << " " <<missingDataPartIdxC << Endl); + // Notation used in this function is taken from article + // Cheng Huang, Lihao Xu (2005, 4th USENIX Conf.) - STAR: An Efficient Coding Scheme... + ui64 readPositionStart = readPosition; + const ui32 m = Prime; + const ui32 r = missingDataPartIdxA; + const ui32 s = missingDataPartIdxB; + const ui32 t = missingDataPartIdxC; + const ui32 dr = (m + s - r) % m; + // Use all whole columns of all the parts +#define OUT_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + (row)] +#define OUT_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + (row)) * DataParts + (column)) +#define OUT_EL(row, column) (isStripe ? OUT_EL_STRIPE(row, column) : OUT_EL_BLOCK(row, column)) #define IN_EL(row, column) *((ui64*)(partSet.Parts[column].GetDataAt(readPosition + (row) * sizeof(ui64)))) #define IN_M(row) *((ui64*)(partSet.Parts[DataParts].GetDataAt(readPosition + (row) * sizeof(ui64)))) #define IN_M1(row) *((ui64*)(partSet.Parts[DataParts + 1].GetDataAt(readPosition + (row) * sizeof(ui64)))) #define IN_M2(row) *((ui64*)(partSet.Parts[DataParts + 2].GetDataAt(readPosition + (row) * sizeof(ui64)))) - for (ui64 blockIdx = 0; blockIdx < endBlockIdx; ++blockIdx) { - VERBOSE_COUT_BLOCK_M2(true, IN_EL, IN_EL, IN_M, IN_M1, IN_M2); - // 1) Adjusters recovery adj0 is for S0 - ui64 adj0 = 0; - ui64 adj1 = 0; - ui64 adj2 = 0; - for (ui32 i = 0; i < LineCount; ++i) { - adj0 ^= IN_M(i); - adj1 ^= IN_M1(i); - adj2 ^= IN_M2(i); - } - adj1 = adj0 ^ adj1; - adj2 = adj0 ^ adj2; - // 2) Syndrome calculation - ui64 s0[MAX_LINES_IN_BLOCK]; - ui64 s1[MAX_LINES_IN_BLOCK]; - ui64 s2[MAX_LINES_IN_BLOCK]; - ui32 row; - for (ui32 i = 0; i < LineCount; ++i) { - s0[i] = IN_M(i); - s1[i] = IN_M1(i) ^ adj1; - s2[i] = IN_M2(i) ^ adj2; + for (ui64 blockIdx = 0; blockIdx < endBlockIdx; ++blockIdx) { + VERBOSE_COUT_BLOCK_M2(true, IN_EL, IN_EL, IN_M, IN_M1, IN_M2); + // 1) Adjusters recovery adj0 is for S0 + ui64 adj0 = 0; + ui64 adj1 = 0; + ui64 adj2 = 0; + for (ui32 i = 0; i < LineCount; ++i) { + adj0 ^= IN_M(i); + adj1 ^= IN_M1(i); + adj2 ^= IN_M2(i); + } + adj1 = adj0 ^ adj1; + adj2 = adj0 ^ adj2; + // 2) Syndrome calculation + ui64 s0[MAX_LINES_IN_BLOCK]; + ui64 s1[MAX_LINES_IN_BLOCK]; + ui64 s2[MAX_LINES_IN_BLOCK]; + ui32 row; + for (ui32 i = 0; i < LineCount; ++i) { + s0[i] = IN_M(i); + s1[i] = IN_M1(i) ^ adj1; + s2[i] = IN_M2(i) ^ adj2; VERBOSE_COUT("IN_M[" << i << "] = " << DebugFormatBits(IN_M(i)) << ", "); VERBOSE_COUT("IN_M1[" << i << "] ^ adj1 = " << DebugFormatBits(IN_M1(i) ^ adj1) << ", "); VERBOSE_COUT("IN_M2[" << i << "] ^ adj2 = " << DebugFormatBits(IN_M2(i) ^ adj2) << Endl); - } - s0[m - 1] = 0; - s1[m - 1] = adj1; - s2[m - 1] = adj2; - for (ui32 j = 0; j < DataParts; ++j) { - if (j == r || j == s || j == t) { - continue; - } - for (ui32 i = 0; i < LineCount; ++i) { - ui64 data_tmp = IN_EL(i, j); - if (restoreFullData) { - OUT_EL(i, j) = data_tmp; - } - s0[i] ^= data_tmp; - row = (i + j) % m; - if (row < m) { - s1[row] ^= IN_EL(i, j); - } - row = (m + i - j) % m; - if (row < m) { - s2[row] ^= IN_EL(i, j); - VERBOSE_COUT("s2[" << i << "] ^= IN_EL(" << row << "," << j << ");" << Endl;); - - } - } - } - for (ui32 i = 0; i < m; ++i) { - VERBOSE_COUT("s0[" << i << "] = " << DebugFormatBits(s0[i]) << ", "); - VERBOSE_COUT("s1[" << i << "] = " << DebugFormatBits(s1[i]) << ", "); - VERBOSE_COUT("s2[" << i << "] = " << DebugFormatBits(s2[i]) << Endl); - } - // 3) Compute all rows in s - ui32 row1 = (m - 1 + r) % m; - ui32 row2 = (m + m - 1 - 2*dr - r) % m; - ui32 row01 = (m + row1 - r) % m; - ui32 row02 = (row2 + r) % m; - ui64 res = 0; - for (ui32 i = 0; i < LineCount; ++i) { - res = s0[row01] ^ s1[row1] ^ s0[row02] ^ s2[row2] ^ res; - if (restoreFullData) { - OUT_EL(row02, s) = res; - } - IN_EL(row02, s) = res; - VERBOSE_COUT("IN_EL(" << row02 << ", " << s << ") = " << DebugFormatBits(res) << Endl); - row1 = (m + row1 - 2*dr) % m; - row2 = (m + row2 - 2*dr) % m; - row01 = (m + row1 - r) % m; - row02 = (row2 + r) % m; - } - VERBOSE_COUT_BLOCK_M2(true, IN_EL, IN_EL, IN_M, IN_M1, IN_M2); - readPosition += ColumnSize; - } - VERBOSE_COUT("End of StarMainRestorePartsWholeSymmetric" << Endl); + } + s0[m - 1] = 0; + s1[m - 1] = adj1; + s2[m - 1] = adj2; + for (ui32 j = 0; j < DataParts; ++j) { + if (j == r || j == s || j == t) { + continue; + } + for (ui32 i = 0; i < LineCount; ++i) { + ui64 data_tmp = IN_EL(i, j); + if (restoreFullData) { + OUT_EL(i, j) = data_tmp; + } + s0[i] ^= data_tmp; + row = (i + j) % m; + if (row < m) { + s1[row] ^= IN_EL(i, j); + } + row = (m + i - j) % m; + if (row < m) { + s2[row] ^= IN_EL(i, j); + VERBOSE_COUT("s2[" << i << "] ^= IN_EL(" << row << "," << j << ");" << Endl;); + + } + } + } + for (ui32 i = 0; i < m; ++i) { + VERBOSE_COUT("s0[" << i << "] = " << DebugFormatBits(s0[i]) << ", "); + VERBOSE_COUT("s1[" << i << "] = " << DebugFormatBits(s1[i]) << ", "); + VERBOSE_COUT("s2[" << i << "] = " << DebugFormatBits(s2[i]) << Endl); + } + // 3) Compute all rows in s + ui32 row1 = (m - 1 + r) % m; + ui32 row2 = (m + m - 1 - 2*dr - r) % m; + ui32 row01 = (m + row1 - r) % m; + ui32 row02 = (row2 + r) % m; + ui64 res = 0; + for (ui32 i = 0; i < LineCount; ++i) { + res = s0[row01] ^ s1[row1] ^ s0[row02] ^ s2[row2] ^ res; + if (restoreFullData) { + OUT_EL(row02, s) = res; + } + IN_EL(row02, s) = res; + VERBOSE_COUT("IN_EL(" << row02 << ", " << s << ") = " << DebugFormatBits(res) << Endl); + row1 = (m + row1 - 2*dr) % m; + row2 = (m + row2 - 2*dr) % m; + row01 = (m + row1 - r) % m; + row02 = (row2 + r) % m; + } + VERBOSE_COUT_BLOCK_M2(true, IN_EL, IN_EL, IN_M, IN_M1, IN_M2); + readPosition += ColumnSize; + } + VERBOSE_COUT("End of StarMainRestorePartsWholeSymmetric" << Endl); EoMainRestorePartsWhole<isStripe, restoreParts, restoreFullData, false, restoreParityParts>(data, bufferDataPart, - partSet, readPositionStart, endBlockIdx, Min(r, t), Max(r,t)); -#undef IN_M2 -#undef IN_M1 -#undef IN_M -#undef IN_EL -#undef OUT_EL -#undef OUT_EL_BLOCK -#undef OUT_EL_STRIPE - } - + partSet, readPositionStart, endBlockIdx, Min(r, t), Max(r,t)); +#undef IN_M2 +#undef IN_M1 +#undef IN_M +#undef IN_EL +#undef OUT_EL +#undef OUT_EL_BLOCK +#undef OUT_EL_STRIPE + } + template <bool isStripe, bool restoreParts, bool restoreFullData, bool restoreParityParts> void StarRestoreHorizontalPartWhole(char *data, TBufferDataPart &bufferDataPart, TDataPartSet& partSet, - ui64 readPosition, ui32 endBlockIdx, ui32 missingDataPartIdxA, ui32 missingDataPartIdxB) { - VERBOSE_COUT("Start of StarRestoreHorizontalPartWhole for blocks " - << missingDataPartIdxA << " " << missingDataPartIdxB << Endl); - // Notation ised in this function is taken from article - // Cheng Huang, Lihao Xu (2005, 4th USENIX Conf.) - STAR: An Efficient Coding Scheme... - ui64 readPositionStart = readPosition; - const ui32 m = Prime; - const ui32 r = missingDataPartIdxA; - const ui32 s = missingDataPartIdxB; - const ui32 dr = (m + s - r) % m; + ui64 readPosition, ui32 endBlockIdx, ui32 missingDataPartIdxA, ui32 missingDataPartIdxB) { + VERBOSE_COUT("Start of StarRestoreHorizontalPartWhole for blocks " + << missingDataPartIdxA << " " << missingDataPartIdxB << Endl); + // Notation ised in this function is taken from article + // Cheng Huang, Lihao Xu (2005, 4th USENIX Conf.) - STAR: An Efficient Coding Scheme... + ui64 readPositionStart = readPosition; + const ui32 m = Prime; + const ui32 r = missingDataPartIdxA; + const ui32 s = missingDataPartIdxB; + const ui32 dr = (m + s - r) % m; // Use all whole columns of all the parts #define OUT_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + (row)] #define OUT_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + (row)) * DataParts + (column)) -#define OUT_EL(row, column) (isStripe ? OUT_EL_STRIPE(row, column) : OUT_EL_BLOCK(row, column)) +#define OUT_EL(row, column) (isStripe ? OUT_EL_STRIPE(row, column) : OUT_EL_BLOCK(row, column)) #define IN_EL(row, column) *((ui64*)(partSet.Parts[column].GetDataAt(readPosition + (row) * sizeof(ui64)))) #define IN_M(row) *((ui64*)(partSet.Parts[DataParts].GetDataAt(readPosition + (row) * sizeof(ui64)))) #define IN_M1(row) *((ui64*)(partSet.Parts[DataParts + 1].GetDataAt(readPosition + (row) * sizeof(ui64)))) #define IN_M2(row) *((ui64*)(partSet.Parts[DataParts + 2].GetDataAt(readPosition + (row) * sizeof(ui64)))) - for (ui64 blockIdx = 0; blockIdx < endBlockIdx; ++blockIdx) { - VERBOSE_COUT_BLOCK_M2(true, IN_EL, IN_EL, IN_M, IN_M1, IN_M2); - // 1) Adjusters recovery - ui64 adj12 = 0; - for (ui32 i = 0; i < LineCount; ++i) { - adj12 ^= IN_M1(i) ^ IN_M2(i); - } - VERBOSE_COUT("adj12# " << DebugFormatBits(adj12) << Endl); - // 2) Syndrome calculation - ui64 s1[MAX_LINES_IN_BLOCK]; - ui64 s2[MAX_LINES_IN_BLOCK]; - //ui32 row_adj; - for (ui32 i = 0; i < LineCount; ++i) { - IN_M(i) = 0; - s1[i] = IN_M1(i); - s2[i] = IN_M2(i); - } - s1[m - 1] = 0; - s2[m - 1] = 0; - ui32 row; - for (ui32 j = 0; j < DataParts; ++j) { - if (j == r || j == s) { - continue; - } - for (ui32 i = 0; i < LineCount; ++i) { - ui64 data_tmp = IN_EL(i, j); - IN_M(i) ^= data_tmp; // Store horizontal syndrome directly in M-column - if (restoreFullData) { - OUT_EL(i, j) = data_tmp; - } - row = (i + j) % m; - s1[row] ^= data_tmp; - row = (m + i - j) % m; - s2[row] ^= data_tmp; - } - } - for (ui32 i = 0; i < m; ++i) { - VERBOSE_COUT("s1[" << i << "] = " << DebugFormatBits(s1[i]) << ", "); - VERBOSE_COUT("s2[" << i << "] = " << DebugFormatBits(s2[i]) << Endl); - } - // 3) Compute all row pairs - ui32 row1 = (m - 1 + r) % m; - ui32 row2 = (m + m - 1 - r - dr) % m; - ui32 row3 = (row2 + r) % m; - ui64 res = 0; - for (ui32 i = 0; i < LineCount; ++i) { - res = s1[row1] ^ s2[row2] ^ adj12 ^ res; - IN_M(row3) ^= res; - VERBOSE_COUT("IN_M(" << row3 << ") = " << DebugFormatBits(IN_M(row3)) << Endl); - //row1 = (m + row1 - dr) % m; - VERBOSE_COUT("row1,2,3# " << row1 << " " << row2 << " " << row3 << Endl); - row1 = (m + row1 - dr) % m; - row2 = (m + row2 - dr) % m; - row3 = (m + row3 - dr) % m; - } - VERBOSE_COUT_BLOCK_M2(true, IN_EL, IN_EL, IN_M, IN_M1, IN_M2); - readPosition += ColumnSize; - } - VERBOSE_COUT("End of StarRestoreHorizontalPartWhole" << Endl); + for (ui64 blockIdx = 0; blockIdx < endBlockIdx; ++blockIdx) { + VERBOSE_COUT_BLOCK_M2(true, IN_EL, IN_EL, IN_M, IN_M1, IN_M2); + // 1) Adjusters recovery + ui64 adj12 = 0; + for (ui32 i = 0; i < LineCount; ++i) { + adj12 ^= IN_M1(i) ^ IN_M2(i); + } + VERBOSE_COUT("adj12# " << DebugFormatBits(adj12) << Endl); + // 2) Syndrome calculation + ui64 s1[MAX_LINES_IN_BLOCK]; + ui64 s2[MAX_LINES_IN_BLOCK]; + //ui32 row_adj; + for (ui32 i = 0; i < LineCount; ++i) { + IN_M(i) = 0; + s1[i] = IN_M1(i); + s2[i] = IN_M2(i); + } + s1[m - 1] = 0; + s2[m - 1] = 0; + ui32 row; + for (ui32 j = 0; j < DataParts; ++j) { + if (j == r || j == s) { + continue; + } + for (ui32 i = 0; i < LineCount; ++i) { + ui64 data_tmp = IN_EL(i, j); + IN_M(i) ^= data_tmp; // Store horizontal syndrome directly in M-column + if (restoreFullData) { + OUT_EL(i, j) = data_tmp; + } + row = (i + j) % m; + s1[row] ^= data_tmp; + row = (m + i - j) % m; + s2[row] ^= data_tmp; + } + } + for (ui32 i = 0; i < m; ++i) { + VERBOSE_COUT("s1[" << i << "] = " << DebugFormatBits(s1[i]) << ", "); + VERBOSE_COUT("s2[" << i << "] = " << DebugFormatBits(s2[i]) << Endl); + } + // 3) Compute all row pairs + ui32 row1 = (m - 1 + r) % m; + ui32 row2 = (m + m - 1 - r - dr) % m; + ui32 row3 = (row2 + r) % m; + ui64 res = 0; + for (ui32 i = 0; i < LineCount; ++i) { + res = s1[row1] ^ s2[row2] ^ adj12 ^ res; + IN_M(row3) ^= res; + VERBOSE_COUT("IN_M(" << row3 << ") = " << DebugFormatBits(IN_M(row3)) << Endl); + //row1 = (m + row1 - dr) % m; + VERBOSE_COUT("row1,2,3# " << row1 << " " << row2 << " " << row3 << Endl); + row1 = (m + row1 - dr) % m; + row2 = (m + row2 - dr) % m; + row3 = (m + row3 - dr) % m; + } + VERBOSE_COUT_BLOCK_M2(true, IN_EL, IN_EL, IN_M, IN_M1, IN_M2); + readPosition += ColumnSize; + } + VERBOSE_COUT("End of StarRestoreHorizontalPartWhole" << Endl); EoMainRestorePartsWhole<isStripe, restoreParts, restoreFullData, false, restoreParityParts>(data, bufferDataPart, - partSet, readPositionStart, endBlockIdx, r, s); -#undef IN_M2 -#undef IN_M1 -#undef IN_M -#undef IN_EL -#undef OUT_EL -#undef OUT_EL_BLOCK -#undef OUT_EL_STRIPE - } - - + partSet, readPositionStart, endBlockIdx, r, s); +#undef IN_M2 +#undef IN_M1 +#undef IN_M +#undef IN_EL +#undef OUT_EL +#undef OUT_EL_BLOCK +#undef OUT_EL_STRIPE + } + + template <bool isStripe, bool restoreParts, bool restoreFullData, bool reversed, bool restoreParityParts> void EoMainRestorePartsWhole(char *data, TBufferDataPart &bufferDataPart, TDataPartSet& partSet, ui64 readPosition, - ui32 endBlockIdx, ui32 missingDataPartIdxA, ui32 missingDataPartIdxB) { - VERBOSE_COUT("Start of EoMainRestorePartsWhole" << Endl); - ui32 lastColumn = reversed ? DataParts + 2 : DataParts + 1; - const ui32 m = Prime; - // Use all whole columns of all the parts -#define RIGHT_ROW(row) (reversed ? LineCount - 1 - (row) : (row)) -#define OUT_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + RIGHT_ROW(row)] -#define OUT_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + RIGHT_ROW(row)) * DataParts + (column)) + ui32 endBlockIdx, ui32 missingDataPartIdxA, ui32 missingDataPartIdxB) { + VERBOSE_COUT("Start of EoMainRestorePartsWhole" << Endl); + ui32 lastColumn = reversed ? DataParts + 2 : DataParts + 1; + const ui32 m = Prime; + // Use all whole columns of all the parts +#define RIGHT_ROW(row) (reversed ? LineCount - 1 - (row) : (row)) +#define OUT_EL_BLOCK(row, column) bufferDataPart[column][blockIdx * LineCount + RIGHT_ROW(row)] +#define OUT_EL_STRIPE(row, column) *((ui64*)data + (blockIdx * LineCount + RIGHT_ROW(row)) * DataParts + (column)) #define OUT_EL(row, column) (isStripe ? OUT_EL_STRIPE((row), column) : OUT_EL_BLOCK((row), (column))) #define IN_EL(row, column) *((ui64*)(partSet.Parts[column].GetDataAt(readPosition + RIGHT_ROW(row) * sizeof(ui64)))) #define IN_M(row) *((ui64*)(partSet.Parts[DataParts].GetDataAt(readPosition + RIGHT_ROW(row) * sizeof(ui64)))) #define IN_M12(row) *((ui64*)(partSet.Parts[lastColumn].GetDataAt(readPosition + RIGHT_ROW(row) * sizeof(ui64)))) - for (ui64 blockIdx = 0; blockIdx < endBlockIdx; ++blockIdx) { - VERBOSE_COUT_BLOCK(true, IN_EL, IN_EL, IN_M, IN_M12); + for (ui64 blockIdx = 0; blockIdx < endBlockIdx; ++blockIdx) { + VERBOSE_COUT_BLOCK(true, IN_EL, IN_EL, IN_M, IN_M12); // compute diagonal partiy s ui64 s = 0; - ui64 s0[MAX_LINES_IN_BLOCK]; + ui64 s0[MAX_LINES_IN_BLOCK]; for (ui32 l = 0; l < LineCount; ++l) { - ui64 tmp = IN_M(l); - s0[l] = tmp; - s ^= tmp; - s ^= IN_M12(l); + ui64 tmp = IN_M(l); + s0[l] = tmp; + s ^= tmp; + s ^= IN_M12(l); VERBOSE_COUT("Diag [l,m] s:" << DebugFormatBits(s) << Endl); } // compute horizontal syndromes s0 - for (ui32 t = 0; t < DataParts; ++t) { - if (t == missingDataPartIdxA || t == missingDataPartIdxB) { - continue; + for (ui32 t = 0; t < DataParts; ++t) { + if (t == missingDataPartIdxA || t == missingDataPartIdxB) { + continue; } - for (ui32 l = 0; l < LineCount; ++l) { - ui64 val = IN_EL(l, t); - s0[l] ^= val; + for (ui32 l = 0; l < LineCount; ++l) { + ui64 val = IN_EL(l, t); + s0[l] ^= val; if (restoreFullData) { - OUT_EL(l, t) = val; + OUT_EL(l, t) = val; } } } // compute diagonal syndromes s1 - ui64 s1[MAX_LINES_IN_BLOCK]; + ui64 s1[MAX_LINES_IN_BLOCK]; for (ui32 u = 0; u < m; ++u) { s1[u] = s; VERBOSE_COUT("S1 = s = " << DebugFormatBits(s1[u]) << Endl); if (u < LineCount) { - s1[u] ^= IN_M12(u); + s1[u] ^= IN_M12(u); VERBOSE_COUT("S1 ^= a[" << u << ", m+1] = " << DebugFormatBits(s1[u]) << Endl); } for (ui32 l = 0; l < missingDataPartIdxA; ++l) { @@ -1049,7 +1049,7 @@ public: } s = (m - (missingDataPartIdxB - missingDataPartIdxA) - 1) % m; - ui64 aVal = 0; + ui64 aVal = 0; do { if (s < LineCount) { ui64 bVal = s1[(missingDataPartIdxB + s) % m]; @@ -1058,11 +1058,11 @@ public: ui32 bRow = (m + s + (missingDataPartIdxB - missingDataPartIdxA)) % m; if (bRow < LineCount) { VERBOSE_COUT("read [" << bRow << ", " << missingDataPartIdxA << "] = "); - bVal ^= aVal; + bVal ^= aVal; if (restoreParts) { VERBOSE_COUT("i " << DebugFormatBits(IN_EL(bRow, missingDataPartIdxA)) << Endl); } else { - VERBOSE_COUT("o " << DebugFormatBits(OUT_EL_STRIPE(bRow,missingDataPartIdxA)) << Endl); + VERBOSE_COUT("o " << DebugFormatBits(OUT_EL_STRIPE(bRow,missingDataPartIdxA)) << Endl); } } if (restoreParts) { @@ -1071,19 +1071,19 @@ public: << Endl); } if (restoreFullData) { - OUT_EL(s, missingDataPartIdxB) = bVal; + OUT_EL(s, missingDataPartIdxB) = bVal; VERBOSE_COUT("write [" << s << ", " << missingDataPartIdxB << "] = " << DebugFormatBits(bVal) << Endl); } - aVal = s0[s]; + aVal = s0[s]; VERBOSE_COUT("aVal = s0[" << s << "] = " << DebugFormatBits(aVal) << Endl); VERBOSE_COUT("read [" << s << ", " << missingDataPartIdxB << "] = "); - aVal ^= bVal; + aVal ^= bVal; if (restoreParts) { VERBOSE_COUT("i " << DebugFormatBits(IN_EL(s,missingDataPartIdxB)) << Endl); } else { - VERBOSE_COUT("o " << DebugFormatBits(OUT_EL_STRIPE(s,missingDataPartIdxB)) << Endl); + VERBOSE_COUT("o " << DebugFormatBits(OUT_EL_STRIPE(s,missingDataPartIdxB)) << Endl); } if (restoreParts) { @@ -1092,7 +1092,7 @@ public: << Endl); } if (restoreFullData) { - OUT_EL(s, missingDataPartIdxA) = aVal; + OUT_EL(s, missingDataPartIdxA) = aVal; VERBOSE_COUT("write [" << s << ", " << missingDataPartIdxA << "] = " << DebugFormatBits(bVal) << Endl); } @@ -1100,8 +1100,8 @@ public: s = (m + s - (missingDataPartIdxB - missingDataPartIdxA)) % m; } while (s != m - 1); - VERBOSE_COUT_BLOCK(restoreFullData, OUT_EL, IN_EL, IN_M, IN_M12); -#undef IN_M12 + VERBOSE_COUT_BLOCK(restoreFullData, OUT_EL, IN_EL, IN_M, IN_M12); +#undef IN_M12 #undef IN_M #undef IN_EL #undef OUT_EL_BLOCK @@ -1111,26 +1111,26 @@ public: } template <bool isStripe, bool restoreParts, bool restoreFullData, bool restoreParityParts> - void StarRestoreHorizontalPart(TDataPartSet& partSet, ui32 missingDataPartIdxA, - ui32 missingDataPartIdxB) { - // Read data and parity - VERBOSE_COUT("StarRestoreHorizontalPart for " << missingDataPartIdxA << " " << missingDataPartIdxB << Endl); + void StarRestoreHorizontalPart(TDataPartSet& partSet, ui32 missingDataPartIdxA, + ui32 missingDataPartIdxB) { + // Read data and parity + VERBOSE_COUT("StarRestoreHorizontalPart for " << missingDataPartIdxA << " " << missingDataPartIdxB << Endl); StarRestoreHorizontalPartWhole<isStripe, restoreParts, restoreFullData, restoreParityParts>(Data, BufferDataPart, - partSet, 0ull, WholeBlocks, missingDataPartIdxA, missingDataPartIdxB); - - if (TailSize) { + partSet, 0ull, WholeBlocks, missingDataPartIdxA, missingDataPartIdxB); + + if (TailSize) { char lastBlockSource[MAX_TOTAL_PARTS * (MAX_TOTAL_PARTS - 2) * sizeof(ui64)] = {}; TBufferDataPart bufferDataPart; - PrepareLastBlockPointers<isStripe>(lastBlockSource, bufferDataPart); - + PrepareLastBlockPointers<isStripe>(lastBlockSource, bufferDataPart); + StarRestoreHorizontalPartWhole<isStripe, restoreParts, restoreFullData, restoreParityParts>(lastBlockSource, - bufferDataPart, partSet, WholeBlocks * ColumnSize, 1, missingDataPartIdxA, - missingDataPartIdxB); - - if (restoreFullData) { - PlaceLastBlock<isStripe>(bufferDataPart, lastBlockSource); - } - } + bufferDataPart, partSet, WholeBlocks * ColumnSize, 1, missingDataPartIdxA, + missingDataPartIdxB); + + if (restoreFullData) { + PlaceLastBlock<isStripe>(bufferDataPart, lastBlockSource); + } + } if (restoreParts) { if (missingDataPartIdxA < partSet.Parts.size()) { PadAndCrcPart(partSet, missingDataPartIdxA); @@ -1139,30 +1139,30 @@ public: PadAndCrcPart(partSet, missingDataPartIdxB); } } - } - - + } + + template <bool isStripe, bool restoreParts, bool restoreFullData, bool restoreParityParts> - void StarMainRestorePartsSymmetric(TDataPartSet& partSet, ui32 missingDataPartIdxA, - ui32 missingDataPartIdxB, ui32 missingDataPartIdxC) { - // Read data and parity - VERBOSE_COUT("StarMainRestorePartsSymmetric" << Endl); + void StarMainRestorePartsSymmetric(TDataPartSet& partSet, ui32 missingDataPartIdxA, + ui32 missingDataPartIdxB, ui32 missingDataPartIdxC) { + // Read data and parity + VERBOSE_COUT("StarMainRestorePartsSymmetric" << Endl); StarMainRestorePartsWholeSymmetric<isStripe, restoreParts, restoreFullData, restoreParityParts>(Data, BufferDataPart, - partSet, 0ull, WholeBlocks, missingDataPartIdxA, missingDataPartIdxB, missingDataPartIdxC); - - if (TailSize) { + partSet, 0ull, WholeBlocks, missingDataPartIdxA, missingDataPartIdxB, missingDataPartIdxC); + + if (TailSize) { char lastBlockSource[MAX_TOTAL_PARTS * (MAX_TOTAL_PARTS - 2) * sizeof(ui64)] = {}; TBufferDataPart bufferDataPart; - PrepareLastBlockPointers<isStripe>(lastBlockSource, bufferDataPart); - + PrepareLastBlockPointers<isStripe>(lastBlockSource, bufferDataPart); + StarMainRestorePartsWholeSymmetric<isStripe, restoreParts, restoreFullData, restoreParityParts>(lastBlockSource, - bufferDataPart, partSet, WholeBlocks * ColumnSize, 1, missingDataPartIdxA, - missingDataPartIdxB, missingDataPartIdxC); - - if (restoreFullData) { - PlaceLastBlock<isStripe>(bufferDataPart, lastBlockSource); - } - } + bufferDataPart, partSet, WholeBlocks * ColumnSize, 1, missingDataPartIdxA, + missingDataPartIdxB, missingDataPartIdxC); + + if (restoreFullData) { + PlaceLastBlock<isStripe>(bufferDataPart, lastBlockSource); + } + } if (restoreParts) { if (missingDataPartIdxA < partSet.Parts.size()) { PadAndCrcPart(partSet, missingDataPartIdxA); @@ -1174,8 +1174,8 @@ public: PadAndCrcPart(partSet, missingDataPartIdxC); } } - } - + } + template <bool isStripe, bool restoreParts, bool restoreFullData, bool reversed, bool restoreParityParts> void EoMainRestoreParts(TDataPartSet& partSet, ui32 missingDataPartIdxA, ui32 missingDataPartIdxB) { // Read data and parity @@ -1202,7 +1202,7 @@ public: PrepareLastBlockPointers<isStripe>(lastBlockSource, bufferDataPart); EoMainRestorePartsWhole<isStripe, restoreParts, restoreFullData, reversed, restoreParityParts>(lastBlockSource, - bufferDataPart, partSet, WholeBlocks * ColumnSize, 1, missingDataPartIdxA, missingDataPartIdxB); + bufferDataPart, partSet, WholeBlocks * ColumnSize, 1, missingDataPartIdxA, missingDataPartIdxB); if (restoreFullData) { PlaceLastBlock<isStripe>(bufferDataPart, lastBlockSource); @@ -1381,24 +1381,24 @@ template <bool isStripe> void StarBlockSplit(TErasureType::ECrcMode crcMode, const TErasureType &type, const TString &buffer, TDataPartSet &outPartSet) { TBlockParams p(crcMode, type, buffer.size()); - - // Prepare input data pointers + + // Prepare input data pointers p.PrepareInputDataPointers<isStripe>(const_cast<char*>(buffer.data())); - + outPartSet.FullDataSize = buffer.size(); - outPartSet.PartsMask = ~((~(ui32)0) << p.TotalParts); - outPartSet.Parts.resize(p.TotalParts); - for (ui32 i = 0; i < p.TotalParts; ++i) { + outPartSet.PartsMask = ~((~(ui32)0) << p.TotalParts); + outPartSet.Parts.resize(p.TotalParts); + for (ui32 i = 0; i < p.TotalParts; ++i) { TRACE("Line# " << __LINE__ << Endl); Refurbish(outPartSet.Parts[i], p.PartContainerSize); - } + } outPartSet.MemoryConsumed = p.TotalParts * outPartSet.Parts[0].MemoryConsumed(); - + p.StarSplit<isStripe, false>(outPartSet); PadAndCrcParts(crcMode, p, outPartSet); -} - -template <bool isStripe> +} + +template <bool isStripe> void EoBlockSplit(TErasureType::ECrcMode crcMode, const TErasureType &type, const TString &buffer, TDataPartSet &outPartSet) { TBlockParams p(crcMode, type, buffer.size()); @@ -1453,7 +1453,7 @@ template <bool isStripe, bool restoreParts, bool restoreFullData, bool restorePa void EoBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TDataPartSet& partSet) { TString &outBuffer = partSet.FullDataFragment.OwnedString; ui32 totalParts = type.TotalPartCount(); - Y_VERIFY(partSet.Parts.size() >= totalParts); + Y_VERIFY(partSet.Parts.size() >= totalParts); ui32 missingDataPartIdxA = totalParts; ui32 missingDataPartIdxB = totalParts; @@ -1512,7 +1512,7 @@ void EoBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TD // Restore the fast way if all data parts are present if (missingDataPartCount == 0 || - (!restoreParts && missingDataPartIdxA >= p.TotalParts - 2)) { + (!restoreParts && missingDataPartIdxA >= p.TotalParts - 2)) { VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); if (isStripe) { p.PrepareInputDataPointers<isStripe>(outBuffer.Detach()); @@ -1580,7 +1580,7 @@ void EoBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TD // TODO: 1-pass // just glue the data // use 'eo split' to restore the missing part - if (missingDataPartIdxA == p.TotalParts - 1 && missingDataPartIdxB == p.TotalParts) { + if (missingDataPartIdxA == p.TotalParts - 1 && missingDataPartIdxB == p.TotalParts) { TRACE("case# c" << Endl); VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); TString temp; @@ -1651,139 +1651,139 @@ void EoBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TD template <bool isStripe, bool restoreParts, bool restoreFullData, bool restoreParityParts> void StarBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TDataPartSet& partSet) { TString &outBuffer = partSet.FullDataFragment.OwnedString; - - ui32 totalParts = type.TotalPartCount(); - Y_VERIFY(partSet.Parts.size() == totalParts); - - ui32 missingDataPartIdxA = totalParts; - ui32 missingDataPartIdxB = totalParts; - ui32 missingDataPartIdxC = totalParts; - ui32 missingDataPartCount = 0; + + ui32 totalParts = type.TotalPartCount(); + Y_VERIFY(partSet.Parts.size() == totalParts); + + ui32 missingDataPartIdxA = totalParts; + ui32 missingDataPartIdxB = totalParts; + ui32 missingDataPartIdxC = totalParts; + ui32 missingDataPartCount = 0; ui64 expectedPartSize = type.PartSize(crcMode, partSet.FullDataSize); // ??? - ui32 i = 0; - for (; i < totalParts; ++i) { - if (!(partSet.PartsMask & (1 << i))) { - missingDataPartIdxA = i; - ++missingDataPartCount; - break; - } else { - Y_VERIFY(partSet.Parts[i].size() == expectedPartSize, "partSet.Parts[%" PRIu32 "].size(): %" PRIu64 - " expectedPartSize: %" PRIu64 " erasure: %s partSet.FullDataSize: %" PRIu64, + ui32 i = 0; + for (; i < totalParts; ++i) { + if (!(partSet.PartsMask & (1 << i))) { + missingDataPartIdxA = i; + ++missingDataPartCount; + break; + } else { + Y_VERIFY(partSet.Parts[i].size() == expectedPartSize, "partSet.Parts[%" PRIu32 "].size(): %" PRIu64 + " expectedPartSize: %" PRIu64 " erasure: %s partSet.FullDataSize: %" PRIu64, (ui32)i, (ui64)partSet.Parts[i].size(), expectedPartSize, type.ErasureName[type.GetErasure()].data(), - (ui64)partSet.FullDataSize); - } - } - ++i; - for (; i < totalParts; ++i) { - if (!(partSet.PartsMask & (1 << i))) { - missingDataPartIdxB = i; - ++missingDataPartCount; - break; - } else { - Y_VERIFY(partSet.Parts[i].size() == expectedPartSize, "partSet.Parts[%" PRIu32 "].size()# %" PRIu32 - " != expectedPartSize# %" PRIu32 " erasure: %s partSet.FullDataSize: %" PRIu64, + (ui64)partSet.FullDataSize); + } + } + ++i; + for (; i < totalParts; ++i) { + if (!(partSet.PartsMask & (1 << i))) { + missingDataPartIdxB = i; + ++missingDataPartCount; + break; + } else { + Y_VERIFY(partSet.Parts[i].size() == expectedPartSize, "partSet.Parts[%" PRIu32 "].size()# %" PRIu32 + " != expectedPartSize# %" PRIu32 " erasure: %s partSet.FullDataSize: %" PRIu64, (ui32)i, (ui32)partSet.Parts[i].size(), (ui32)expectedPartSize, type.ErasureName[type.GetErasure()].data(), - (ui64)partSet.FullDataSize); - } - } - ++i; - for (; i < totalParts; ++i) { - if (!(partSet.PartsMask & (1 << i))) { - missingDataPartIdxC = i; - ++missingDataPartCount; - break; - } else { - Y_VERIFY(partSet.Parts[i].size() == expectedPartSize, "partSet.Parts[%" PRIu32 "].size()# %" PRIu32 - " != expectedPartSize# %" PRIu32 " erasure: %s partSet.FullDataSize: %" PRIu64, + (ui64)partSet.FullDataSize); + } + } + ++i; + for (; i < totalParts; ++i) { + if (!(partSet.PartsMask & (1 << i))) { + missingDataPartIdxC = i; + ++missingDataPartCount; + break; + } else { + Y_VERIFY(partSet.Parts[i].size() == expectedPartSize, "partSet.Parts[%" PRIu32 "].size()# %" PRIu32 + " != expectedPartSize# %" PRIu32 " erasure: %s partSet.FullDataSize: %" PRIu64, (ui32)i, (ui32)partSet.Parts[i].size(), (ui32)expectedPartSize, type.ErasureName[type.GetErasure()].data(), - (ui64)partSet.FullDataSize); - } - } - Y_VERIFY(missingDataPartCount <= 3); - - if (restoreParts) { - if (missingDataPartIdxA != totalParts) { + (ui64)partSet.FullDataSize); + } + } + Y_VERIFY(missingDataPartCount <= 3); + + if (restoreParts) { + if (missingDataPartIdxA != totalParts) { TRACE("Line# " << __LINE__ << Endl); Refurbish(partSet.Parts[missingDataPartIdxA], expectedPartSize); - } - if (missingDataPartIdxB != totalParts) { + } + if (missingDataPartIdxB != totalParts) { TRACE("Line# " << __LINE__ << Endl); Refurbish(partSet.Parts[missingDataPartIdxB], expectedPartSize); - } - if (missingDataPartIdxC != totalParts) { + } + if (missingDataPartIdxC != totalParts) { TRACE("Line# " << __LINE__ << Endl); Refurbish(partSet.Parts[missingDataPartIdxC], expectedPartSize); - } - } - if (missingDataPartCount == 3) { - VERBOSE_COUT("missing parts " << missingDataPartIdxA << " and " << missingDataPartIdxB << - " and " << missingDataPartIdxC << Endl); - } else if (missingDataPartCount == 2) { - VERBOSE_COUT("missing parts " << missingDataPartIdxA << " and " << missingDataPartIdxB << Endl); - } else if (missingDataPartCount == 1) { - VERBOSE_COUT("missing part " << missingDataPartIdxA << Endl); - } - - ui64 dataSize = partSet.FullDataSize; + } + } + if (missingDataPartCount == 3) { + VERBOSE_COUT("missing parts " << missingDataPartIdxA << " and " << missingDataPartIdxB << + " and " << missingDataPartIdxC << Endl); + } else if (missingDataPartCount == 2) { + VERBOSE_COUT("missing parts " << missingDataPartIdxA << " and " << missingDataPartIdxB << Endl); + } else if (missingDataPartCount == 1) { + VERBOSE_COUT("missing part " << missingDataPartIdxA << Endl); + } + + ui64 dataSize = partSet.FullDataSize; TBlockParams p(crcMode, type, dataSize); - if (restoreFullData) { + if (restoreFullData) { Refurbish(outBuffer, dataSize); p.PrepareInputDataPointers<isStripe>(outBuffer.Detach()); - } else if (missingDataPartCount == 0) { - return; - } - - // Restore the fast way if all data parts are present - if (missingDataPartCount == 0 || - (!restoreParts && missingDataPartIdxA >= p.DataParts)) { - VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); - if (isStripe) { + } else if (missingDataPartCount == 0) { + return; + } + + // Restore the fast way if all data parts are present + if (missingDataPartCount == 0 || + (!restoreParts && missingDataPartIdxA >= p.DataParts)) { + VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); + if (isStripe) { p.PrepareInputDataPointers<isStripe>(outBuffer.Detach()); p.XorRestorePart<isStripe, false, true, false>(partSet, p.DataParts); - } else { + } else { p.GlueBlockParts(outBuffer.Detach(), partSet); - } - return; - } - - - // All possible failures of 2 disks which EVENODD capable to handle - if (missingDataPartCount <= 2 && missingDataPartIdxA != p.TotalParts - 1 - && missingDataPartIdxB != p.TotalParts - 1) { - if (p.DataParts == 4) { - TErasureType typeEO(TErasureType::EErasureSpecies::Erasure4Plus2Block); + } + return; + } + + + // All possible failures of 2 disks which EVENODD capable to handle + if (missingDataPartCount <= 2 && missingDataPartIdxA != p.TotalParts - 1 + && missingDataPartIdxB != p.TotalParts - 1) { + if (p.DataParts == 4) { + TErasureType typeEO(TErasureType::EErasureSpecies::Erasure4Plus2Block); EoBlockRestore<isStripe, restoreParts, restoreFullData, restoreParityParts>(crcMode, typeEO, partSet); - } else if (p.DataParts == 3) { - TErasureType typeEO(TErasureType::EErasureSpecies::Erasure3Plus2Block); + } else if (p.DataParts == 3) { + TErasureType typeEO(TErasureType::EErasureSpecies::Erasure3Plus2Block); EoBlockRestore<isStripe, restoreParts, restoreFullData, restoreParityParts>(crcMode, typeEO, partSet); - } else if (p.DataParts == 2) { - TErasureType typeEO(TErasureType::EErasureSpecies::Erasure2Plus2Block); + } else if (p.DataParts == 2) { + TErasureType typeEO(TErasureType::EErasureSpecies::Erasure2Plus2Block); EoBlockRestore<isStripe, restoreParts, restoreFullData, restoreParityParts>(crcMode, typeEO, partSet); - } - return; - } - if (missingDataPartIdxA == p.TotalParts - 1 - || missingDataPartIdxB == p.TotalParts - 1 - || missingDataPartIdxC == p.TotalParts - 1) { - // Possible combinations handled in this branch + } + return; + } + if (missingDataPartIdxA == p.TotalParts - 1 + || missingDataPartIdxB == p.TotalParts - 1 + || missingDataPartIdxC == p.TotalParts - 1) { + // Possible combinations handled in this branch // '+' stands for part, which is present for sure, // '-' stands for part, which is missing for sure, // series of 0, 1 and 2 means that there are n missing parts in this region - // 0 0 0 0 0 0 - or 1 1 1 1 1 1 - or 2 2 2 2 2 2 - - if (p.DataParts == 4) { - TErasureType typeEO(TErasureType::EErasureSpecies::Erasure4Plus2Block); + // 0 0 0 0 0 0 - or 1 1 1 1 1 1 - or 2 2 2 2 2 2 - + if (p.DataParts == 4) { + TErasureType typeEO(TErasureType::EErasureSpecies::Erasure4Plus2Block); EoBlockRestore<isStripe, restoreParts, restoreFullData, restoreParityParts>(crcMode, typeEO, partSet); - } else if (p.DataParts == 3) { - TErasureType typeEO(TErasureType::EErasureSpecies::Erasure3Plus2Block); + } else if (p.DataParts == 3) { + TErasureType typeEO(TErasureType::EErasureSpecies::Erasure3Plus2Block); EoBlockRestore<isStripe, restoreParts, restoreFullData, restoreParityParts>(crcMode, typeEO, partSet); - } else if (p.DataParts == 2) { - TErasureType typeEO(TErasureType::EErasureSpecies::Erasure2Plus2Block); + } else if (p.DataParts == 2) { + TErasureType typeEO(TErasureType::EErasureSpecies::Erasure2Plus2Block); EoBlockRestore<isStripe, restoreParts, restoreFullData, restoreParityParts>(crcMode, typeEO, partSet); - } - if (restoreParts) { + } + if (restoreParts) { if (restoreParityParts) { p.StarSplit<isStripe, true>(partSet); - } + } if (missingDataPartIdxA < (restoreParityParts ? p.TotalParts : p.DataParts)) { p.PadAndCrcPart(partSet, missingDataPartIdxA); } @@ -1793,21 +1793,21 @@ void StarBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, if (missingDataPartIdxC < (restoreParityParts ? p.TotalParts : p.DataParts)) { p.PadAndCrcPart(partSet, missingDataPartIdxC); } - } - return; - } - // There are remain only cases with missingDataPartCount == 3 - if ( missingDataPartIdxC == p.DataParts + 1) { - if (missingDataPartIdxB < p.DataParts) { - // 2 2 2 2 + - + - // "It can be decoded with slightly modification of the EVENODD decoding" (c) + } + return; + } + // There are remain only cases with missingDataPartCount == 3 + if ( missingDataPartIdxC == p.DataParts + 1) { + if (missingDataPartIdxB < p.DataParts) { + // 2 2 2 2 + - + + // "It can be decoded with slightly modification of the EVENODD decoding" (c) p.EoMainRestoreParts<isStripe, restoreParts, restoreFullData, true, restoreParityParts>(partSet, missingDataPartIdxA, - missingDataPartIdxB); - } else { - // 1 1 1 1 - - + + missingDataPartIdxB); + } else { + // 1 1 1 1 - - + p.EoDiagonalRestorePart<isStripe, restoreParts, restoreFullData, true, restoreParityParts>(partSet, missingDataPartIdxA); - } - if (restoreParts) { + } + if (restoreParts) { if (restoreParityParts) { p.StarSplit<isStripe, !restoreFullData>(partSet); } @@ -1820,17 +1820,17 @@ void StarBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, if (missingDataPartIdxC < (restoreParityParts ? p.TotalParts : p.DataParts)) { p.PadAndCrcPart(partSet, missingDataPartIdxC); } - } - return; - } - if (missingDataPartIdxC == p.DataParts) { - // 2 2 2 2 - + + - if (! restoreParts) { + } + return; + } + if (missingDataPartIdxC == p.DataParts) { + // 2 2 2 2 - + + + if (! restoreParts) { TRACE("Line# " << __LINE__ << Endl); Refurbish(partSet.Parts[missingDataPartIdxC], expectedPartSize); - } + } p.StarRestoreHorizontalPart<isStripe, restoreParts, restoreFullData, restoreParityParts>(partSet, - missingDataPartIdxA, missingDataPartIdxB); + missingDataPartIdxA, missingDataPartIdxB); if (restoreParts) { if (missingDataPartIdxA < (restoreParityParts ? p.TotalParts : p.DataParts)) { p.PadAndCrcPart(partSet, missingDataPartIdxA); @@ -1842,29 +1842,29 @@ void StarBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, p.PadAndCrcPart(partSet, missingDataPartIdxC); } } - return; - } - - VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); - Y_VERIFY(missingDataPartIdxA < p.DataParts && missingDataPartIdxB < p.DataParts - && missingDataPartIdxC < p.DataParts); - // Two possible cases: - // - Symmetric - // - Asymmetric - // But for m = 5 it is always possible to change asymmetric to symmetric by shifting - ui32 m = ErasureSpeciesParameters[TErasureType::EErasureSpecies::Erasure4Plus3Block].Prime; - while ((m + missingDataPartIdxB - missingDataPartIdxA) % m != (m + missingDataPartIdxC - missingDataPartIdxB) % m ) { - ui32 tmp = missingDataPartIdxA; - missingDataPartIdxA = missingDataPartIdxB; - missingDataPartIdxB = missingDataPartIdxC; - missingDataPartIdxC = tmp; - } - if (! restoreParts) { + return; + } + + VERBOSE_COUT(__LINE__ << " of " << __FILE__ << Endl); + Y_VERIFY(missingDataPartIdxA < p.DataParts && missingDataPartIdxB < p.DataParts + && missingDataPartIdxC < p.DataParts); + // Two possible cases: + // - Symmetric + // - Asymmetric + // But for m = 5 it is always possible to change asymmetric to symmetric by shifting + ui32 m = ErasureSpeciesParameters[TErasureType::EErasureSpecies::Erasure4Plus3Block].Prime; + while ((m + missingDataPartIdxB - missingDataPartIdxA) % m != (m + missingDataPartIdxC - missingDataPartIdxB) % m ) { + ui32 tmp = missingDataPartIdxA; + missingDataPartIdxA = missingDataPartIdxB; + missingDataPartIdxB = missingDataPartIdxC; + missingDataPartIdxC = tmp; + } + if (! restoreParts) { TRACE("Line# " << __LINE__ << Endl); Refurbish(partSet.Parts[missingDataPartIdxB], expectedPartSize); - } + } p.StarMainRestorePartsSymmetric<isStripe, restoreParts, restoreFullData, restoreParityParts>(partSet, - missingDataPartIdxA, missingDataPartIdxB, missingDataPartIdxC); + missingDataPartIdxA, missingDataPartIdxB, missingDataPartIdxC); if (restoreParts) { if (missingDataPartIdxA < (restoreParityParts ? p.TotalParts : p.DataParts)) { p.PadAndCrcPart(partSet, missingDataPartIdxA); @@ -1876,8 +1876,8 @@ void StarBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, p.PadAndCrcPart(partSet, missingDataPartIdxC); } } -} - +} + template <bool isStripe, bool restoreParts, bool restoreFullData, bool restoreParityParts> void XorBlockRestore(TErasureType::ECrcMode crcMode, const TErasureType &type, TDataPartSet &partSet) { TString &outBuffer = partSet.FullDataFragment.OwnedString; @@ -1944,15 +1944,15 @@ const std::array<TString, TErasureType::ErasureSpeciesCount> TErasureType::Erasu "stripe-4-2", "stripe-3-2", "mirror-3-2", - "mirror-3-dc", - "block-4-3", - "stripe-4-3", - "block-3-3", - "stripe-3-3", - "block-2-3", - "stripe-2-3", - "block-2-2", - "stripe-2-2", + "mirror-3-dc", + "block-4-3", + "stripe-4-3", + "block-3-3", + "stripe-3-3", + "block-2-3", + "stripe-2-3", + "block-2-2", + "stripe-2-2", "mirror-3of4", }}; @@ -2025,13 +2025,13 @@ ui32 TErasureType::MinimalBlockSize() const { if (erasure.ParityParts == 1) { return erasure.DataParts * sizeof(ui64); } - if (erasure.ParityParts == 2) { - return (erasure.Prime - 1) * erasure.DataParts * sizeof(ui64); - } - if (erasure.ParityParts == 3) { - return (erasure.Prime - 1) * erasure.DataParts * sizeof(ui64); - } - ythrow TWithBackTrace<yexception>() << "Unsupported partiy part count = " << erasure.ParityParts << + if (erasure.ParityParts == 2) { + return (erasure.Prime - 1) * erasure.DataParts * sizeof(ui64); + } + if (erasure.ParityParts == 3) { + return (erasure.Prime - 1) * erasure.DataParts * sizeof(ui64); + } + ythrow TWithBackTrace<yexception>() << "Unsupported partiy part count = " << erasure.ParityParts << " for ErasureFamily = " << (i32)erasure.ErasureFamily; } ythrow TWithBackTrace<yexception>() << "Unknown ErasureFamily = " << (i32)erasure.ErasureFamily; @@ -2607,9 +2607,9 @@ void TErasureType::IncrementalSplitData(ECrcMode crcMode, const TString& buffer, case 2: EoBlockSplit<true>(crcMode, *this, buffer, outPartSet); break; - case 3: + case 3: StarBlockSplit<true>(crcMode, *this, buffer, outPartSet); - break; + break; default: ythrow TWithBackTrace<yexception>() << "Unsupported number of parity parts: " << erasure.ParityParts; @@ -2624,9 +2624,9 @@ void TErasureType::IncrementalSplitData(ECrcMode crcMode, const TString& buffer, case 2: EoBlockSplit<false>(crcMode, *this, buffer, outPartSet); break; - case 3: + case 3: StarBlockSplit<false>(crcMode, *this, buffer, outPartSet); - break; + break; default: ythrow TWithBackTrace<yexception>() << "Unsupported number of parity parts: " << erasure.ParityParts; @@ -2987,7 +2987,7 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res << " while expected " << (erasure.DataParts + erasure.ParityParts); } Y_VERIFY_DEBUG(restoreFullData || restoreParts); - Y_VERIFY_DEBUG(erasure.Prime <= MAX_LINES_IN_BLOCK); + Y_VERIFY_DEBUG(erasure.Prime <= MAX_LINES_IN_BLOCK); switch (erasure.ErasureFamily) { case TErasureType::ErasureMirror: if (restoreParts) { @@ -3056,9 +3056,9 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res EoBlockRestore<true, false, true, false>(crcMode, *this, partSet); } break; - case 3: - if (restoreParts) { - if (restoreFullData) { + case 3: + if (restoreParts) { + if (restoreFullData) { if (restoreParityParts) { StarBlockRestore<true, true, true, true>(crcMode, *this, partSet); VerifyPartSizes(partSet, Max<size_t>()); @@ -3066,7 +3066,7 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res StarBlockRestore<true, true, true, false>(crcMode, *this, partSet); VerifyPartSizes(partSet, erasure.DataParts); } - } else { + } else { if (restoreParityParts) { StarBlockRestore<true, true, false, true>(crcMode, *this, partSet); VerifyPartSizes(partSet, Max<size_t>()); @@ -3074,12 +3074,12 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res StarBlockRestore<true, true, false, false>(crcMode, *this, partSet); VerifyPartSizes(partSet, erasure.DataParts); } - } + } partSet.MemoryConsumed = partSet.Parts[0].MemoryConsumed() * partSet.Parts.size(); - } else if (restoreFullData) { + } else if (restoreFullData) { StarBlockRestore<true, false, true, false>(crcMode, *this, partSet); - } - break; + } + break; default: ythrow TWithBackTrace<yexception>() << "Unsupported number of parity parts: " << erasure.ParityParts; @@ -3136,9 +3136,9 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res EoBlockRestore<false, false, true, false>(crcMode, *this, partSet); } break; - case 3: - if (restoreParts) { - if (restoreFullData) { + case 3: + if (restoreParts) { + if (restoreFullData) { if (restoreParityParts) { // isStripe, restoreParts, restoreFullData, restoreParityParts StarBlockRestore<false, true, true, true>(crcMode, *this, partSet); @@ -3147,7 +3147,7 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res StarBlockRestore<false, true, true, false>(crcMode, *this, partSet); VerifyPartSizes(partSet, erasure.DataParts); } - } else { + } else { if (restoreParityParts) { StarBlockRestore<false, true, false, true>(crcMode, *this, partSet); VerifyPartSizes(partSet, Max<size_t>()); @@ -3155,12 +3155,12 @@ void TErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool res StarBlockRestore<false, true, false, false>(crcMode, *this, partSet); VerifyPartSizes(partSet, erasure.DataParts); } - } + } partSet.MemoryConsumed = partSet.Parts[0].MemoryConsumed() * partSet.Parts.size(); - } else if (restoreFullData) { + } else if (restoreFullData) { StarBlockRestore<false, false, true, false>(crcMode, *this, partSet); - } - break; + } + break; default: ythrow TWithBackTrace<yexception>() << "Unsupported number of parity parts: " << erasure.ParityParts; diff --git a/ydb/core/erasure/erasure.h b/ydb/core/erasure/erasure.h index cfac9b00df..35bd68c2d2 100644 --- a/ydb/core/erasure/erasure.h +++ b/ydb/core/erasure/erasure.h @@ -241,16 +241,16 @@ struct TErasureType { ErasureMirror3Plus2 = 8, ErasureMirror3dc = 9, - Erasure4Plus3Block = 10, - Erasure4Plus3Stripe = 11, - Erasure3Plus3Block = 12, - Erasure3Plus3Stripe = 13, - Erasure2Plus3Block = 14, - Erasure2Plus3Stripe = 15, - - Erasure2Plus2Block = 16, - Erasure2Plus2Stripe = 17, - + Erasure4Plus3Block = 10, + Erasure4Plus3Stripe = 11, + Erasure3Plus3Block = 12, + Erasure3Plus3Stripe = 13, + Erasure2Plus3Block = 14, + Erasure2Plus3Stripe = 15, + + Erasure2Plus2Block = 16, + Erasure2Plus2Stripe = 17, + ErasureMirror3of4 = 18, ErasureSpeciesCount = 19 @@ -349,7 +349,7 @@ protected: ui32 ColumnSize() const; }; -bool CheckCrcAtTheEnd(TErasureType::ECrcMode crcMode, const TString& buf); - +bool CheckCrcAtTheEnd(TErasureType::ECrcMode crcMode, const TString& buf); + } diff --git a/ydb/core/erasure/erasure_perf_test.cpp b/ydb/core/erasure/erasure_perf_test.cpp index 7a86074eb8..2aaee89334 100644 --- a/ydb/core/erasure/erasure_perf_test.cpp +++ b/ydb/core/erasure/erasure_perf_test.cpp @@ -1,297 +1,297 @@ -#include "erasure.h" -#include "erasure_rope.h" - +#include "erasure.h" +#include "erasure_rope.h" + #include <library/cpp/testing/unittest/registar.h> -#include <util/generic/ymath.h> -#include <util/random/entropy.h> -#include <util/random/mersenne64.h> -#include <util/stream/null.h> -#include <util/string/printf.h> -#include <util/system/hp_timer.h> -#include <iostream> -#include <numeric> +#include <util/generic/ymath.h> +#include <util/random/entropy.h> +#include <util/random/mersenne64.h> +#include <util/stream/null.h> +#include <util/string/printf.h> +#include <util/system/hp_timer.h> +#include <iostream> +#include <numeric> #include <library/cpp/digest/crc32c/crc32c.h> - -#define SHORT_TEST -//#define LONG_TEST - - -#ifdef SHORT_TEST -# define N_REPEATS 3 -# define BUFFER_SIZE (1024 * 1024) -# define ATTEMPTS 100 -#elif defined( LONG_TEST ) -# define N_REPEATS 5 -# define BUFFER_SIZE (1ull * 1024 * 1024 * 1024) -# define ATTEMPTS 100000 -#endif - - -namespace NKikimr { - -TString GenerateData(ui32 dataSize) { - TString testString; - testString.resize(dataSize); - NPrivate::TMersenne64 randGen(Seed()); - ui64 *writePos64 = (ui64 *)testString.data(); - ui32 ui64Parts = testString.size() / sizeof(ui64); - for (ui32 i = 0; i < ui64Parts; ++i) { - *writePos64++ = randGen.GenRand(); - } - - char *writePosChar = (char *)writePos64; + +#define SHORT_TEST +//#define LONG_TEST + + +#ifdef SHORT_TEST +# define N_REPEATS 3 +# define BUFFER_SIZE (1024 * 1024) +# define ATTEMPTS 100 +#elif defined( LONG_TEST ) +# define N_REPEATS 5 +# define BUFFER_SIZE (1ull * 1024 * 1024 * 1024) +# define ATTEMPTS 100000 +#endif + + +namespace NKikimr { + +TString GenerateData(ui32 dataSize) { + TString testString; + testString.resize(dataSize); + NPrivate::TMersenne64 randGen(Seed()); + ui64 *writePos64 = (ui64 *)testString.data(); + ui32 ui64Parts = testString.size() / sizeof(ui64); + for (ui32 i = 0; i < ui64Parts; ++i) { + *writePos64++ = randGen.GenRand(); + } + + char *writePosChar = (char *)writePos64; ui32 charParts = testString.size() % sizeof(ui64); - for (ui32 i = 0; i < charParts; ++i) { - writePosChar[i] = (char)randGen.GenRand(); - } - return testString; -} - + for (ui32 i = 0; i < charParts; ++i) { + writePosChar[i] = (char)randGen.GenRand(); + } + return testString; +} + std::pair<double, double> CalcAvgSd(TVector<double> ×) { - double avg = 0; - double sd = 0; - auto min = std::min_element(times.begin(), times.end()); - *min = times.back(); - times.pop_back(); - auto max = std::max_element(times.begin(), times.end()); - *max = times.back(); - times.pop_back(); - // Calc average and standard deviation - for (const double &time : times) { - avg += time; - } - avg /= times.size(); - for (const double &time : times) { - sd += (avg - time) * (avg - time); - } - sd = sqrt(sd / times.size()); - return std::make_pair(avg, sd); -} - -template <bool measureSplit, bool measureRestore> + double avg = 0; + double sd = 0; + auto min = std::min_element(times.begin(), times.end()); + *min = times.back(); + times.pop_back(); + auto max = std::max_element(times.begin(), times.end()); + *max = times.back(); + times.pop_back(); + // Calc average and standard deviation + for (const double &time : times) { + avg += time; + } + avg /= times.size(); + for (const double &time : times) { + sd += (avg - time) * (avg - time); + } + sd = sqrt(sd / times.size()); + return std::make_pair(avg, sd); +} + +template <bool measureSplit, bool measureRestore> std::pair<double, double> MeasureTime(TErasureType &type, TVector<ui32> &missedParts, ui32 dataSize, - bool isRestoreParts, bool isRestoreFullData) { - - const size_t attempts = dataSize < 10000 ? ATTEMPTS : 10; - - THPTimer timer; + bool isRestoreParts, bool isRestoreFullData) { + + const size_t attempts = dataSize < 10000 ? ATTEMPTS : 10; + + THPTimer timer; TVector<double> times; - ui32 partMask = ~(ui32)0; - for (const ui32 &part : missedParts) { - partMask &= ~(ui32)(1ul << part); - } - for (ui32 i = 0; i < N_REPEATS; ++i) { - std::vector<TString> originalData; - std::vector<TString> testData; - originalData.resize(attempts); - testData.resize(attempts); - for (size_t i = 0; i < attempts; ++i) { - originalData[i] = GenerateData(dataSize); - testData[i] = originalData[i]; - } - - double time = 0; - // Split the data into parts - std::vector<TDataPartSet> partSets; - partSets.resize(attempts); - - for (auto& partSet : partSets) { - const ui32 partSize = type.PartSize(TErasureType::CrcModeNone, dataSize); - partSet.Parts.resize(type.TotalPartCount()); - for (ui32 i = 0; i < type.TotalPartCount(); ++i) { - partSet.Parts[i].ReferenceTo(partSet.Parts[i].OwnedString.resize(partSize)); - } - } - if (measureSplit) { - timer.Reset(); - } - for (size_t i = 0; i < attempts; ++i) { - type.SplitData(TErasureType::CrcModeNone, testData[i], partSets[i]); - } - - if (measureSplit) { - time += timer.PassedReset() / attempts; - } - if (isRestoreFullData) { - std::vector<TString> restoredData; - restoredData.resize(attempts); - for (auto& restored : restoredData) { - restored.resize(dataSize); - for (ui64 i = 0; i < dataSize; ++i) { - restored[i] = 0; - } - } - // Remove the 'missing' parts - for (auto& partSet : partSets) { - partSet.PartsMask &= partMask; - } - if (measureRestore) { - timer.Reset(); - } - for (size_t i = 0; i < attempts; ++i) { - type.RestoreData(TErasureType::CrcModeNone, partSets[i], restoredData[i], - isRestoreParts, isRestoreFullData, isRestoreParts); - } - if (measureRestore) { - time += timer.PassedReset() / attempts; - } - for (size_t i = 0; i < attempts; ++i) { - UNIT_ASSERT_EQUAL(originalData[i], restoredData[i]); - } - } - times.push_back(time); - } - return CalcAvgSd(times); -} - + ui32 partMask = ~(ui32)0; + for (const ui32 &part : missedParts) { + partMask &= ~(ui32)(1ul << part); + } + for (ui32 i = 0; i < N_REPEATS; ++i) { + std::vector<TString> originalData; + std::vector<TString> testData; + originalData.resize(attempts); + testData.resize(attempts); + for (size_t i = 0; i < attempts; ++i) { + originalData[i] = GenerateData(dataSize); + testData[i] = originalData[i]; + } + + double time = 0; + // Split the data into parts + std::vector<TDataPartSet> partSets; + partSets.resize(attempts); + + for (auto& partSet : partSets) { + const ui32 partSize = type.PartSize(TErasureType::CrcModeNone, dataSize); + partSet.Parts.resize(type.TotalPartCount()); + for (ui32 i = 0; i < type.TotalPartCount(); ++i) { + partSet.Parts[i].ReferenceTo(partSet.Parts[i].OwnedString.resize(partSize)); + } + } + if (measureSplit) { + timer.Reset(); + } + for (size_t i = 0; i < attempts; ++i) { + type.SplitData(TErasureType::CrcModeNone, testData[i], partSets[i]); + } + + if (measureSplit) { + time += timer.PassedReset() / attempts; + } + if (isRestoreFullData) { + std::vector<TString> restoredData; + restoredData.resize(attempts); + for (auto& restored : restoredData) { + restored.resize(dataSize); + for (ui64 i = 0; i < dataSize; ++i) { + restored[i] = 0; + } + } + // Remove the 'missing' parts + for (auto& partSet : partSets) { + partSet.PartsMask &= partMask; + } + if (measureRestore) { + timer.Reset(); + } + for (size_t i = 0; i < attempts; ++i) { + type.RestoreData(TErasureType::CrcModeNone, partSets[i], restoredData[i], + isRestoreParts, isRestoreFullData, isRestoreParts); + } + if (measureRestore) { + time += timer.PassedReset() / attempts; + } + for (size_t i = 0; i < attempts; ++i) { + UNIT_ASSERT_EQUAL(originalData[i], restoredData[i]); + } + } + times.push_back(time); + } + return CalcAvgSd(times); +} + TVector<TVector<ui32>> ChooseCombinationCase(TErasureType &type) { - if (type.GetErasure() == TErasureType::EErasureSpecies::Erasure4Plus2Stripe || - type.GetErasure() == TErasureType::EErasureSpecies::Erasure4Plus2Block ) { - return { {0, 1} - ,{0, 4} - ,{0, 5} - ,{4, 5} }; - } - return {}; -} - -const char *Bool2str(bool val) { - return val ? "true " : "false"; -} - -void MeasureRestoreTime(TErasureType &type) { - Cout << "EErasureType = " << type.ToString() << " Measuring restore time, time in milliseconds" << Endl; + if (type.GetErasure() == TErasureType::EErasureSpecies::Erasure4Plus2Stripe || + type.GetErasure() == TErasureType::EErasureSpecies::Erasure4Plus2Block ) { + return { {0, 1} + ,{0, 4} + ,{0, 5} + ,{4, 5} }; + } + return {}; +} + +const char *Bool2str(bool val) { + return val ? "true " : "false"; +} + +void MeasureRestoreTime(TErasureType &type) { + Cout << "EErasureType = " << type.ToString() << " Measuring restore time, time in milliseconds" << Endl; TVector<TVector<ui32>> testCombin = ChooseCombinationCase(type); - TVector<ui64> dataSizes {100, 4*1024, 4111, 8*1024, 8207, 4062305, 4*1024*1024}; - for (const ui64 &size : dataSizes) { - Cout << " size=" << size << Endl; + TVector<ui64> dataSizes {100, 4*1024, 4111, 8*1024, 8207, 4062305, 4*1024*1024}; + for (const ui64 &size : dataSizes) { + Cout << " size=" << size << Endl; for (TVector<ui32> &combination : testCombin) { - for (ui32 variant = 1; variant < 2; ++variant) { - bool isRestoreParts = false; - bool isRestoreFullData = false; - switch (variant) { - case 0: - isRestoreParts = true; - isRestoreFullData = true; - break; - case 1: - isRestoreFullData = true; - break; - case 2: - isRestoreParts = true; - break; - } - TStringStream mode; - mode << " combination = "; - for (const ui32 &part : combination) { - mode << part << ", "; - } - mode << "mode = " << Bool2str(isRestoreParts) << "," << Bool2str(isRestoreFullData) << " "; - std::pair<double, double> time = MeasureTime<false, true>(type, combination, size, - isRestoreParts, isRestoreFullData); - Cout << mode.Str() << Sprintf(" %9.6lf +- %9.6lf", - 1000 * time.first, 1000 * time.second) << Endl; - } - } - } -} - -void MeasureSplitTime(TErasureType &type) { - Cout << "EErasureType = " << type.ToString() << Endl; + for (ui32 variant = 1; variant < 2; ++variant) { + bool isRestoreParts = false; + bool isRestoreFullData = false; + switch (variant) { + case 0: + isRestoreParts = true; + isRestoreFullData = true; + break; + case 1: + isRestoreFullData = true; + break; + case 2: + isRestoreParts = true; + break; + } + TStringStream mode; + mode << " combination = "; + for (const ui32 &part : combination) { + mode << part << ", "; + } + mode << "mode = " << Bool2str(isRestoreParts) << "," << Bool2str(isRestoreFullData) << " "; + std::pair<double, double> time = MeasureTime<false, true>(type, combination, size, + isRestoreParts, isRestoreFullData); + Cout << mode.Str() << Sprintf(" %9.6lf +- %9.6lf", + 1000 * time.first, 1000 * time.second) << Endl; + } + } + } +} + +void MeasureSplitTime(TErasureType &type) { + Cout << "EErasureType = " << type.ToString() << Endl; TVector<ui32> combination {0, 1}; - //TVector<ui64> dataSizes {100, 4012, 4*1024, 4111, 8*1024, 8207, 4062305, 4*1024*1024}; - TVector<ui64> dataSizes {100, 4012, 4*1024, 4111}; - for (const ui64 &size : dataSizes) { - TStringStream mode; - std::pair<double, double> time = MeasureTime<true, false>(type, combination, size, false, false); - Cout << size << "\n " - << mode.Str() << Sprintf("time = %.3lf +- %.3lf (us)", 1000000*time.first, 1000000*time.second) - << Sprintf(", speed=%.3fGB/s", size / time.first / 1000000000.0) << Endl; - } -} - + //TVector<ui64> dataSizes {100, 4012, 4*1024, 4111, 8*1024, 8207, 4062305, 4*1024*1024}; + TVector<ui64> dataSizes {100, 4012, 4*1024, 4111}; + for (const ui64 &size : dataSizes) { + TStringStream mode; + std::pair<double, double> time = MeasureTime<true, false>(type, combination, size, false, false); + Cout << size << "\n " + << mode.Str() << Sprintf("time = %.3lf +- %.3lf (us)", 1000000*time.first, 1000000*time.second) + << Sprintf(", speed=%.3fGB/s", size / time.first / 1000000000.0) << Endl; + } +} + Y_UNIT_TEST_SUITE(TErasurePerfTest) { Y_UNIT_TEST(Split) { - TErasureType type(TErasureType::EErasureSpecies::Erasure4Plus2Block); - MeasureSplitTime(type); - } - + TErasureType type(TErasureType::EErasureSpecies::Erasure4Plus2Block); + MeasureSplitTime(type); + } + Y_UNIT_TEST(Restore) { - TErasureType type(TErasureType::EErasureSpecies::Erasure4Plus2Block); - MeasureRestoreTime(type); - } -} - -inline TRope RopeFromStringReference(TString string) { - if (string.Empty()) { - return TRope(); - } - return TRope(std::move(string)); -} - -template <bool convertToRope> -void RopeMeasureSplitTime(TErasureType &type, ui64 dataSize, const TString& buffer) { - NPrivate::TMersenne64 randGen(Seed()); - THPTimer timer; - - const size_t attempts = dataSize < 10000 ? ATTEMPTS : 10; - double time = 0; - for (ui64 i = 0; i < attempts; ++i) { - ui64 begin = randGen.GenRand() % (buffer.size() - dataSize); - TString rope = TString(buffer.data() + begin, buffer.data() + begin + dataSize); - - TRope ropes[6]; - - timer.Reset(); - - TDataPartSet partSet; - type.SplitData(TErasureType::CrcModeNone, rope, partSet); - if (convertToRope) { - for (int i = 0; i < 6; ++i) { - ropes[i] = RopeFromStringReference(std::move(partSet.Parts[i].OwnedString)); - } - } - - time += timer.PassedReset(); - } - - -#ifdef LONG_TEST - double bs = (dataSize * attempts) / time; - Cerr << bs << " byte / s"; - if (convertToRope) { - Cerr << " (convert to rope mode)\n"; - } else { - Cerr << " (string erasure mode)\n"; - } -#endif -} - -TString GenerateTestBuffer(ui64 size) { - NPrivate::TMersenne64 randGen(Seed()); - TString buffer = TString::Uninitialized(size); - ui64 a = randGen.GenRand(); - ui64 b = randGen.GenRand(); - for (ui64 i = 0; i < size / sizeof(ui64); ++i) { - *((ui64*) buffer.Detach() + i) = a * i + b; - } - return buffer; -} - -Y_UNIT_TEST_SUITE(TErasureSmallBlobSizePerfTest) { - - TString buffer = GenerateTestBuffer(BUFFER_SIZE); - - Y_UNIT_TEST(StringErasureMode) { - TErasureType type(TErasureType::EErasureSpecies::Erasure4Plus2Block); - RopeMeasureSplitTime<false>(type, 128, buffer); - } - - Y_UNIT_TEST(ConvertToRopeMode) { - TErasureType type(TErasureType::EErasureSpecies::Erasure4Plus2Block); - RopeMeasureSplitTime<true>(type, 128, buffer); - } -} - - -} + TErasureType type(TErasureType::EErasureSpecies::Erasure4Plus2Block); + MeasureRestoreTime(type); + } +} + +inline TRope RopeFromStringReference(TString string) { + if (string.Empty()) { + return TRope(); + } + return TRope(std::move(string)); +} + +template <bool convertToRope> +void RopeMeasureSplitTime(TErasureType &type, ui64 dataSize, const TString& buffer) { + NPrivate::TMersenne64 randGen(Seed()); + THPTimer timer; + + const size_t attempts = dataSize < 10000 ? ATTEMPTS : 10; + double time = 0; + for (ui64 i = 0; i < attempts; ++i) { + ui64 begin = randGen.GenRand() % (buffer.size() - dataSize); + TString rope = TString(buffer.data() + begin, buffer.data() + begin + dataSize); + + TRope ropes[6]; + + timer.Reset(); + + TDataPartSet partSet; + type.SplitData(TErasureType::CrcModeNone, rope, partSet); + if (convertToRope) { + for (int i = 0; i < 6; ++i) { + ropes[i] = RopeFromStringReference(std::move(partSet.Parts[i].OwnedString)); + } + } + + time += timer.PassedReset(); + } + + +#ifdef LONG_TEST + double bs = (dataSize * attempts) / time; + Cerr << bs << " byte / s"; + if (convertToRope) { + Cerr << " (convert to rope mode)\n"; + } else { + Cerr << " (string erasure mode)\n"; + } +#endif +} + +TString GenerateTestBuffer(ui64 size) { + NPrivate::TMersenne64 randGen(Seed()); + TString buffer = TString::Uninitialized(size); + ui64 a = randGen.GenRand(); + ui64 b = randGen.GenRand(); + for (ui64 i = 0; i < size / sizeof(ui64); ++i) { + *((ui64*) buffer.Detach() + i) = a * i + b; + } + return buffer; +} + +Y_UNIT_TEST_SUITE(TErasureSmallBlobSizePerfTest) { + + TString buffer = GenerateTestBuffer(BUFFER_SIZE); + + Y_UNIT_TEST(StringErasureMode) { + TErasureType type(TErasureType::EErasureSpecies::Erasure4Plus2Block); + RopeMeasureSplitTime<false>(type, 128, buffer); + } + + Y_UNIT_TEST(ConvertToRopeMode) { + TErasureType type(TErasureType::EErasureSpecies::Erasure4Plus2Block); + RopeMeasureSplitTime<true>(type, 128, buffer); + } +} + + +} diff --git a/ydb/core/erasure/erasure_rope.cpp b/ydb/core/erasure/erasure_rope.cpp index 5739b649f5..b1de833d93 100644 --- a/ydb/core/erasure/erasure_rope.cpp +++ b/ydb/core/erasure/erasure_rope.cpp @@ -42,7 +42,7 @@ static TString DebugFormatBits(ui64 value) { #endif namespace NKikimr { -namespace NErasureRope { +namespace NErasureRope { static void Refurbish(TRope &str, ui64 size) { if (str.GetSize() != size) { @@ -2635,7 +2635,7 @@ static void VerifyPartSizes(TDataPartSet& partSet, size_t definedPartEndIdx) { for (size_t idx = 0; idx < partSet.Parts.size(); ++idx) { Y_VERIFY(partSet.Parts[idx].size() == partSize); if (partSize && idx < definedPartEndIdx) { - CHECK_ROPE_IS_DEFINED(partSet.Parts[idx].FastViewer.GetCurrent(partSet.Parts[idx].Offset), + CHECK_ROPE_IS_DEFINED(partSet.Parts[idx].FastViewer.GetCurrent(partSet.Parts[idx].Offset), partSet.Parts[idx].Size); } } @@ -2801,8 +2801,8 @@ void TRopeErasureType::RestoreData(ECrcMode crcMode, TDataPartSet& partSet, bool } } // NKikimr -} // NErasureRope +} // NErasureRope -Y_DECLARE_OUT_SPEC(, NKikimr::NErasureRope::TRopeErasureType::EErasureSpecies, stream, value) { - stream << NKikimr::NErasureRope::TRopeErasureType::ErasureSpeciesToStr(value); +Y_DECLARE_OUT_SPEC(, NKikimr::NErasureRope::TRopeErasureType::EErasureSpecies, stream, value) { + stream << NKikimr::NErasureRope::TRopeErasureType::ErasureSpeciesToStr(value); } diff --git a/ydb/core/erasure/erasure_rope.h b/ydb/core/erasure/erasure_rope.h index 793bdf1e17..a4835ce834 100644 --- a/ydb/core/erasure/erasure_rope.h +++ b/ydb/core/erasure/erasure_rope.h @@ -15,7 +15,7 @@ #include <library/cpp/digest/crc32c/crc32c.h> namespace NKikimr { -namespace NErasureRope { +namespace NErasureRope { class TRopeHelpers { public: @@ -521,6 +521,6 @@ protected: bool CheckCrcAtTheEnd(TRopeErasureType::ECrcMode crcMode, const TRope& buf); -} // NKikimr -} // NErasureRope +} // NKikimr +} // NErasureRope diff --git a/ydb/core/erasure/erasure_rope_ut.cpp b/ydb/core/erasure/erasure_rope_ut.cpp index 82442dd306..62a6242ea9 100644 --- a/ydb/core/erasure/erasure_rope_ut.cpp +++ b/ydb/core/erasure/erasure_rope_ut.cpp @@ -3,7 +3,7 @@ namespace NKikimr { -namespace NErasureRope { +namespace NErasureRope { TRope GenerateRandomRope(size_t dataSize) { NPrivate::TMersenne64 randGen(Seed()); @@ -952,4 +952,4 @@ Y_UNIT_TEST_SUITE(TErasureTypeTest) { } } // namespace NKikimr -} // NErasureRope +} // NErasureRope diff --git a/ydb/core/erasure/erasure_ut.cpp b/ydb/core/erasure/erasure_ut.cpp index 6dcbdec587..2473bd1650 100644 --- a/ydb/core/erasure/erasure_ut.cpp +++ b/ydb/core/erasure/erasure_ut.cpp @@ -4,124 +4,124 @@ namespace NKikimr { -void TestMissingPartWithRandomData(TErasureType &groupType, ui32 *missingPartIdx, ui32 missingParts, - ui32 dataSize, bool isRestoreParts, bool isRestoreFullData, TString &info) { - - NPrivate::TMersenne64 randGen(Seed()); - - ui32 partMask = ~(ui32)0; - for (ui32 i = 0; i < missingParts; ++i) { - partMask &= ~(ui32)(1ul << missingPartIdx[i]); - } - - TString mode = Sprintf(" restoreParts=%s restoreFullData=%s ", - (isRestoreParts ? "true" : "false"), - (isRestoreFullData ? "true" : "false")); - VERBOSE_COUT(" dataSize# " << dataSize << Endl); - TString testString; - testString.resize(dataSize); +void TestMissingPartWithRandomData(TErasureType &groupType, ui32 *missingPartIdx, ui32 missingParts, + ui32 dataSize, bool isRestoreParts, bool isRestoreFullData, TString &info) { + + NPrivate::TMersenne64 randGen(Seed()); + + ui32 partMask = ~(ui32)0; + for (ui32 i = 0; i < missingParts; ++i) { + partMask &= ~(ui32)(1ul << missingPartIdx[i]); + } + + TString mode = Sprintf(" restoreParts=%s restoreFullData=%s ", + (isRestoreParts ? "true" : "false"), + (isRestoreFullData ? "true" : "false")); + VERBOSE_COUT(" dataSize# " << dataSize << Endl); + TString testString; + testString.resize(dataSize); char *writePosChar = (char *)testString.data(); ui32 charParts = testString.size() % sizeof(ui64); - for (ui32 i = 0; i < charParts; ++i) { - writePosChar[i] = (char)randGen.GenRand(); - } - ui64 *writePos64 = (ui64 *)writePosChar; + for (ui32 i = 0; i < charParts; ++i) { + writePosChar[i] = (char)randGen.GenRand(); + } + ui64 *writePos64 = (ui64 *)writePosChar; ui32 ui64Parts = testString.size() / sizeof(ui64); - for (ui32 i = 0; i < ui64Parts; ++i) { - writePos64[i] = randGen.GenRand(); - } - - // Split the data into parts - TDataPartSet partSet; + for (ui32 i = 0; i < ui64Parts; ++i) { + writePos64[i] = randGen.GenRand(); + } + + // Split the data into parts + TDataPartSet partSet; groupType.SplitData(TErasureType::CrcModeNone, testString, partSet); ui64 partSize = groupType.PartSize(TErasureType::CrcModeNone, dataSize); - for (ui32 part = 0; part < groupType.TotalPartCount(); ++part) { - UNIT_ASSERT_EQUAL(partSize, partSet.Parts[part].size()); - } - - // Save the original parts for the future checks - TDataPartSet originalPartSet = partSet; - - // Remove the 'missing' parts - partSet.PartsMask &= partMask; - for (ui32 i = 0; i < missingParts; ++i) { + for (ui32 part = 0; part < groupType.TotalPartCount(); ++part) { + UNIT_ASSERT_EQUAL(partSize, partSet.Parts[part].size()); + } + + // Save the original parts for the future checks + TDataPartSet originalPartSet = partSet; + + // Remove the 'missing' parts + partSet.PartsMask &= partMask; + for (ui32 i = 0; i < missingParts; ++i) { partSet.Parts[missingPartIdx[i]].clear(); - } - // Restore the data - TString restoredString; + } + // Restore the data + TString restoredString; groupType.RestoreData(TErasureType::CrcModeNone, partSet, restoredString, isRestoreParts, isRestoreFullData, isRestoreParts); - - // Make sure the restored data matches the original - TString errorInfo = Sprintf("dataSize=%d partMask=0x%x", dataSize, partMask); - if (isRestoreFullData) { - UNIT_ASSERT_EQUAL_C(testString.size(), restoredString.size(), errorInfo); - UNIT_ASSERT_EQUAL(testString, restoredString); - } - - if (isRestoreParts) { - for (ui32 idx = 0; idx < missingParts; ++idx) { - if (missingPartIdx[idx] < partSet.Parts.size()) { - UNIT_ASSERT_EQUAL_C(partSet.Parts[missingPartIdx[idx]].size(), - originalPartSet.Parts[missingPartIdx[idx]].size(), info + errorInfo); - ui32 size = (ui32)originalPartSet.Parts[missingPartIdx[idx]].size(); + + // Make sure the restored data matches the original + TString errorInfo = Sprintf("dataSize=%d partMask=0x%x", dataSize, partMask); + if (isRestoreFullData) { + UNIT_ASSERT_EQUAL_C(testString.size(), restoredString.size(), errorInfo); + UNIT_ASSERT_EQUAL(testString, restoredString); + } + + if (isRestoreParts) { + for (ui32 idx = 0; idx < missingParts; ++idx) { + if (missingPartIdx[idx] < partSet.Parts.size()) { + UNIT_ASSERT_EQUAL_C(partSet.Parts[missingPartIdx[idx]].size(), + originalPartSet.Parts[missingPartIdx[idx]].size(), info + errorInfo); + ui32 size = (ui32)originalPartSet.Parts[missingPartIdx[idx]].size(); char *restored = (char*)partSet.Parts[missingPartIdx[idx]].GetDataAt(0); char *original = (char*)originalPartSet.Parts[missingPartIdx[idx]].GetDataAt(0); - for (ui32 i = 0; i < size; ++i) { - UNIT_ASSERT_EQUAL_C(restored[i], original[i], - (info + errorInfo + mode + Sprintf(" (part %d byte %d)", missingPartIdx[idx], i))); - } - } - } - } -} - -template <ui32 maxMissingParts> -void TestAllLossesDifferentSizes(TErasureType &groupType, ui32 maxParts) { - for (ui32 missingParts = 0; missingParts <= maxMissingParts; ++missingParts) { - ui32 missingPartIdx[maxMissingParts]; - GenFirstCombination(&missingPartIdx[0], missingParts); - ui32 maxMissingVariants = Fact(maxParts)/Fact(missingParts)/Fact(maxParts-missingParts); - //printf("k=%u, n=%u, variants=%u\n", missingParts, maxParts, maxMissingVariants); - for (ui32 missingVariant = 0; missingVariant < maxMissingVariants; ++missingVariant) { - VERBOSE_COUT(PrintArr(missingPartIdx, missingParts)); - ui32 partMask = ~(ui32)0; - for (ui32 i = 0; i < missingParts; ++i) { - partMask &= ~(ui32)(1ul << missingPartIdx[i]); - } - for (ui32 dataSize = 1; dataSize < 600; ++dataSize) { - VERBOSE_COUT("dataSize# " << dataSize << Endl); - for (ui32 type = 0; type < 3; ++type) { - bool isRestoreParts = false; - bool isRestoreFullData = false; - switch (type) { - case 0: - isRestoreParts = true; - isRestoreFullData = true; - break; - case 1: - isRestoreFullData = true; - break; - case 2: - isRestoreParts = true; - break; - } - TStringStream info; - info << "Type# " << groupType.ToString() << " "; - info << "maxMissingParts# " << maxMissingParts << " "; - info << "missingVariant# " << missingVariant << " "; - info << "dataSize# " << dataSize << " "; - info << "case# " << BoolToStr(isRestoreParts) << "," << BoolToStr(isRestoreFullData) << " "; - VERBOSE_COUT(info.Str() << Endl); - TestMissingPartWithRandomData(groupType, missingPartIdx, missingParts, dataSize, - isRestoreParts, isRestoreFullData, info.Str()); - } - } // dataSize - GenNextCombination(&missingPartIdx[0], missingParts, maxParts); - } - } // missingVariant -} - + for (ui32 i = 0; i < size; ++i) { + UNIT_ASSERT_EQUAL_C(restored[i], original[i], + (info + errorInfo + mode + Sprintf(" (part %d byte %d)", missingPartIdx[idx], i))); + } + } + } + } +} + +template <ui32 maxMissingParts> +void TestAllLossesDifferentSizes(TErasureType &groupType, ui32 maxParts) { + for (ui32 missingParts = 0; missingParts <= maxMissingParts; ++missingParts) { + ui32 missingPartIdx[maxMissingParts]; + GenFirstCombination(&missingPartIdx[0], missingParts); + ui32 maxMissingVariants = Fact(maxParts)/Fact(missingParts)/Fact(maxParts-missingParts); + //printf("k=%u, n=%u, variants=%u\n", missingParts, maxParts, maxMissingVariants); + for (ui32 missingVariant = 0; missingVariant < maxMissingVariants; ++missingVariant) { + VERBOSE_COUT(PrintArr(missingPartIdx, missingParts)); + ui32 partMask = ~(ui32)0; + for (ui32 i = 0; i < missingParts; ++i) { + partMask &= ~(ui32)(1ul << missingPartIdx[i]); + } + for (ui32 dataSize = 1; dataSize < 600; ++dataSize) { + VERBOSE_COUT("dataSize# " << dataSize << Endl); + for (ui32 type = 0; type < 3; ++type) { + bool isRestoreParts = false; + bool isRestoreFullData = false; + switch (type) { + case 0: + isRestoreParts = true; + isRestoreFullData = true; + break; + case 1: + isRestoreFullData = true; + break; + case 2: + isRestoreParts = true; + break; + } + TStringStream info; + info << "Type# " << groupType.ToString() << " "; + info << "maxMissingParts# " << maxMissingParts << " "; + info << "missingVariant# " << missingVariant << " "; + info << "dataSize# " << dataSize << " "; + info << "case# " << BoolToStr(isRestoreParts) << "," << BoolToStr(isRestoreFullData) << " "; + VERBOSE_COUT(info.Str() << Endl); + TestMissingPartWithRandomData(groupType, missingPartIdx, missingParts, dataSize, + isRestoreParts, isRestoreFullData, info.Str()); + } + } // dataSize + GenNextCombination(&missingPartIdx[0], missingParts, maxParts); + } + } // missingVariant +} + void PrintBuffer(const TString &buffer) { Cerr << " ["; for (ui32 idx = 0; idx < buffer.size(); ++idx) { @@ -229,82 +229,82 @@ TVector<TDiff> GenerateRandomDiff(NPrivate::TMersenne64 &randGen, ui32 dataSize, } Y_UNIT_TEST_SUITE(TErasureTypeTest) { -// Test if new version is capable to restore data splited by current version (which is right by definition) +// Test if new version is capable to restore data splited by current version (which is right by definition) Y_UNIT_TEST(isSplittedDataEqualsToOldVerion) { TVector<TVector<ui8>> dataPool { - {49,184,130,19,181,231,130}, - - {249,122,57,146,140,30,69,51,88,81,92,29,220,192,18,14,195,162,244,139,59,141,161,14, - 202,194,28,123,179,195,60,101,56,157,176,150,23,105,123,62,101,19,56,168,222,81,172, - 251,199,223,85,60,99,184,45,90,84,68,1,131,199,36,64,103,150,221,18,236,86,15,142}, - - {46,173,157,247,36,205,150,116,82,10,212,7,45,29,93,90,49,233,170,207,198,219,215, - 187,220,220,48,228,83,53,50,37,153,214,149,28,231,171,92,176,230,139,168,126, - 138,227,106,92,38,23,87,62,20,192,151,15,170,34,248,199,220,250,108,47,54,217,36, - 56,146,224,21,148,133,155,49,199,101,250,173,93,104,205,67,222,132,104,187,231,53, - 206,247,46,22,73,11,70,87,124,4,242,9,165,99,82,83,40,165,55,53,187,238,96,248,16, - 103,197,132,216,107,191,229,140,90,129,81,63,232,85,19,232,59,96,193,5,133,139,251, - 148,144,0,147,22,247,36,221,244,117,144,98,173,40} }; + {49,184,130,19,181,231,130}, + + {249,122,57,146,140,30,69,51,88,81,92,29,220,192,18,14,195,162,244,139,59,141,161,14, + 202,194,28,123,179,195,60,101,56,157,176,150,23,105,123,62,101,19,56,168,222,81,172, + 251,199,223,85,60,99,184,45,90,84,68,1,131,199,36,64,103,150,221,18,236,86,15,142}, + + {46,173,157,247,36,205,150,116,82,10,212,7,45,29,93,90,49,233,170,207,198,219,215, + 187,220,220,48,228,83,53,50,37,153,214,149,28,231,171,92,176,230,139,168,126, + 138,227,106,92,38,23,87,62,20,192,151,15,170,34,248,199,220,250,108,47,54,217,36, + 56,146,224,21,148,133,155,49,199,101,250,173,93,104,205,67,222,132,104,187,231,53, + 206,247,46,22,73,11,70,87,124,4,242,9,165,99,82,83,40,165,55,53,187,238,96,248,16, + 103,197,132,216,107,191,229,140,90,129,81,63,232,85,19,232,59,96,193,5,133,139,251, + 148,144,0,147,22,247,36,221,244,117,144,98,173,40} }; TVector<TVector<TVector<ui8>>> partsPool { - { - {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, - {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, - {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, - {49,184,130,19,181,231,130,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, - {49,184,130,19,181,231,130,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, - {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,49,184,130,19,181,231,130,0, }, - },{ - {249,122,57,146,140,30,69,51,88,81,92,29,220,192,18,14,195,162,244,139,59,141,161,14,202, - 194,28,123,179,195,60,101,}, - {56,157,176,150,23,105,123,62,101,19,56,168,222,81,172,251,199,223,85,60,99,184,45,90,84, - 68,1,131,199,36,64,103,}, - {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, - - {150,221,18,236,86,15,142,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, - {87,58,155,232,205,120,176,13,61,66,100,181,2,145,190,245,4,125,161,183,88,53,140,84, - 158,134,29,248,116,231,124,2,}, - {173,62,56,17,75,58,5,84,52,136,237,8,12,141,41,87,242,245,205,160,34,248,77,146,207, - 132,90,40,65,80,223,88,}, - },{ - {46,173,157,247,36,205,150,116,82,10,212,7,45,29,93,90,49,233,170,207,198,219,215,187,220,220, - 48,228,83,53,50,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, - {153,214,149,28,231,171,92,176,230,139,168,126,138,227,106,92,38,23,87,62,20,192,151,15,170, - 34,248,199,220,250,108,47,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, - {54,217,36,56,146,224,21,148,133,155,49,199,101,250,173,93,104,205,67,222,132,104,187,231,53, - 206,247,46,22,73,11,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, - {87,124,4,242,9,165,99,82,83,40,165,55,53,187,238,96,248,16,103,197,132,216,107,191,229,140, - 90,129,81,63,232,85,19,232,59,96,193,5,133,139,251,148,144,0,147,22,247,36,221,244,117,144, - 98,173,40,0,0,0,0,0,0,0,0,0,}, - {214,222,40,33,88,35,188,2,98,50,232,137,247,191,116,59,135,35,217,234,210,171,144,236,166, - 188,101,140,200,185,189,25,19,232,59,96,193,5,133,139,251,148,144,0,147,22,247,36,221, - 244,117,144,98,173,40,0,0,0,0,0,0,0,0,0,}, - {114,180,19,50,219,117,207,37,191,151,5,180,246,160,208,23,112,124,56,167,179,241,145,219, - 185,235,76,193,70,131,82,141,38,96,229,144,241,187,223,36,251,148,144,0,147,22,247,36, - 251,148,144,0,147,22,247,36,232,124,171,96,82,19,114,175,}, - } - }; - TErasureType type(TErasureType::EErasureSpecies::Erasure4Plus2Block); - for (ui32 variant = 0; variant < dataPool.size(); ++variant) { + { + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, + {49,184,130,19,181,231,130,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, + {49,184,130,19,181,231,130,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,49,184,130,19,181,231,130,0, }, + },{ + {249,122,57,146,140,30,69,51,88,81,92,29,220,192,18,14,195,162,244,139,59,141,161,14,202, + 194,28,123,179,195,60,101,}, + {56,157,176,150,23,105,123,62,101,19,56,168,222,81,172,251,199,223,85,60,99,184,45,90,84, + 68,1,131,199,36,64,103,}, + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, + + {150,221,18,236,86,15,142,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, + {87,58,155,232,205,120,176,13,61,66,100,181,2,145,190,245,4,125,161,183,88,53,140,84, + 158,134,29,248,116,231,124,2,}, + {173,62,56,17,75,58,5,84,52,136,237,8,12,141,41,87,242,245,205,160,34,248,77,146,207, + 132,90,40,65,80,223,88,}, + },{ + {46,173,157,247,36,205,150,116,82,10,212,7,45,29,93,90,49,233,170,207,198,219,215,187,220,220, + 48,228,83,53,50,37,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, + {153,214,149,28,231,171,92,176,230,139,168,126,138,227,106,92,38,23,87,62,20,192,151,15,170, + 34,248,199,220,250,108,47,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, + {54,217,36,56,146,224,21,148,133,155,49,199,101,250,173,93,104,205,67,222,132,104,187,231,53, + 206,247,46,22,73,11,70,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,}, + {87,124,4,242,9,165,99,82,83,40,165,55,53,187,238,96,248,16,103,197,132,216,107,191,229,140, + 90,129,81,63,232,85,19,232,59,96,193,5,133,139,251,148,144,0,147,22,247,36,221,244,117,144, + 98,173,40,0,0,0,0,0,0,0,0,0,}, + {214,222,40,33,88,35,188,2,98,50,232,137,247,191,116,59,135,35,217,234,210,171,144,236,166, + 188,101,140,200,185,189,25,19,232,59,96,193,5,133,139,251,148,144,0,147,22,247,36,221, + 244,117,144,98,173,40,0,0,0,0,0,0,0,0,0,}, + {114,180,19,50,219,117,207,37,191,151,5,180,246,160,208,23,112,124,56,167,179,241,145,219, + 185,235,76,193,70,131,82,141,38,96,229,144,241,187,223,36,251,148,144,0,147,22,247,36, + 251,148,144,0,147,22,247,36,232,124,171,96,82,19,114,175,}, + } + }; + TErasureType type(TErasureType::EErasureSpecies::Erasure4Plus2Block); + for (ui32 variant = 0; variant < dataPool.size(); ++variant) { TVector<ui8> &data = dataPool[variant]; TVector<TVector<ui8>> &expectedParts = partsPool[variant]; - TString testString; - testString.resize(data.size()); + TString testString; + testString.resize(data.size()); for (ui32 i = 0; i < testString.size(); ++i) { - testString[i] = (char)data[i]; - } - TDataPartSet partSet; + testString[i] = (char)data[i]; + } + TDataPartSet partSet; type.SplitData(TErasureType::CrcModeNone, testString, partSet); - for (ui32 i = 0; i < 6; ++i) { + for (ui32 i = 0; i < 6; ++i) { UNIT_ASSERT_EQUAL_C(partSet.Parts[i].size(), expectedParts[i].size(), Sprintf("%lu == %lu", partSet.Parts[i].size(), expectedParts[i].size())); for (ui32 j = 0; j < partSet.Parts[i].size(); ++j) { UNIT_ASSERT_EQUAL( (ui8)partSet.Parts[i].OwnedString[j], expectedParts[i][j]); - } - } - } - } - - + } + } + } + } + + Y_UNIT_TEST(TestEo) { ui32 species = (ui32)TErasureType::Erasure4Plus2Block; { @@ -400,14 +400,14 @@ Y_UNIT_TEST_SUITE(TErasureTypeTest) { throw ex; } - VERBOSE_COUT("testing " << errorInfo << mode << " (full data)" << Endl); + VERBOSE_COUT("testing " << errorInfo << mode << " (full data)" << Endl); if (isRestoreFullData) { UNIT_ASSERT_EQUAL_C(testString.size(), restoredString.size(), errorInfo); for (ui32 i = 0; i < testString.size(); ++i) { UNIT_ASSERT_EQUAL_C(((char*)testString.data())[i], ((char*)restoredString.data())[i], (errorInfo + mode + " (full data)")); if (((char*)testString.data())[i] != ((char*)restoredString.data())[i]) { - VERBOSE_COUT("mismatch " << errorInfo << mode << " (full data)" << Endl); + VERBOSE_COUT("mismatch " << errorInfo << mode << " (full data)" << Endl); break; } } @@ -424,8 +424,8 @@ Y_UNIT_TEST_SUITE(TErasureTypeTest) { UNIT_ASSERT_EQUAL_C(restored[i], original[i], (errorInfo + mode + Sprintf(" (part %d byte %d)", missingPartIdx[idx], i))); if (restored[i] != original[i]) { - VERBOSE_COUT(" wrong part " << errorInfo << mode << - Sprintf(" (part %d byte %d)", missingPartIdx[idx], i) << Endl); + VERBOSE_COUT(" wrong part " << errorInfo << mode << + Sprintf(" (part %d byte %d)", missingPartIdx[idx], i) << Endl); break; } } @@ -494,132 +494,132 @@ Y_UNIT_TEST_SUITE(TErasureTypeTest) { RunTestDiff(groupType, dataSize, testString, diffs); } - // Mirror tests + // Mirror tests Y_UNIT_TEST(TestMirror3LossOfAllPossible3) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::ErasureMirror3); - constexpr ui32 maxMissingParts = 2; - constexpr ui32 maxParts = 1 + 2; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - - // Block tests + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::ErasureMirror3); + constexpr ui32 maxMissingParts = 2; + constexpr ui32 maxParts = 1 + 2; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + + // Block tests Y_UNIT_TEST(TestBlock31LossOfAllPossible1) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure3Plus1Block); - constexpr ui32 maxMissingParts = 1; - constexpr ui32 maxParts = 3 + 1; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure3Plus1Block); + constexpr ui32 maxMissingParts = 1; + constexpr ui32 maxParts = 3 + 1; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + Y_UNIT_TEST(TestBlock42LossOfAllPossible2) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure4Plus2Block); - // Specify - constexpr ui32 maxMissingParts = 2; - constexpr ui32 maxParts = 4 + 2; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure4Plus2Block); + // Specify + constexpr ui32 maxMissingParts = 2; + constexpr ui32 maxParts = 4 + 2; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + Y_UNIT_TEST(TestBlock32LossOfAllPossible2) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure3Plus2Block); - constexpr ui32 maxMissingParts = 2; - constexpr ui32 maxParts = 3 + 2; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure3Plus2Block); + constexpr ui32 maxMissingParts = 2; + constexpr ui32 maxParts = 3 + 2; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + Y_UNIT_TEST(TestBlock43LossOfAllPossible3) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure4Plus3Block); - constexpr ui32 maxMissingParts = 3; - constexpr ui32 maxParts = 4 + 3; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure4Plus3Block); + constexpr ui32 maxMissingParts = 3; + constexpr ui32 maxParts = 4 + 3; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + Y_UNIT_TEST(TestBlock33LossOfAllPossible3) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure3Plus3Block); - constexpr ui32 maxMissingParts = 3; - constexpr ui32 maxParts = 3 + 3; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure3Plus3Block); + constexpr ui32 maxMissingParts = 3; + constexpr ui32 maxParts = 3 + 3; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + Y_UNIT_TEST(TestBlock23LossOfAllPossible3) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure2Plus3Block); - constexpr ui32 maxMissingParts = 3; - constexpr ui32 maxParts = 2 + 3; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure2Plus3Block); + constexpr ui32 maxMissingParts = 3; + constexpr ui32 maxParts = 2 + 3; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + Y_UNIT_TEST(TestBlock22LossOfAllPossible2) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure2Plus2Block); - constexpr ui32 maxMissingParts = 2; - constexpr ui32 maxParts = 2 + 2; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - - - // Stripe tests + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure2Plus2Block); + constexpr ui32 maxMissingParts = 2; + constexpr ui32 maxParts = 2 + 2; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + + + // Stripe tests Y_UNIT_TEST(TestStripe31LossOfAllPossible1) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure3Plus1Stripe); - constexpr ui32 maxMissingParts = 1; - constexpr ui32 maxParts = 3 + 1; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure3Plus1Stripe); + constexpr ui32 maxMissingParts = 1; + constexpr ui32 maxParts = 3 + 1; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + Y_UNIT_TEST(TestStripe42LossOfAllPossible2) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure4Plus2Stripe); - // Specify - constexpr ui32 maxMissingParts = 2; - constexpr ui32 maxParts = 4 + 2; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure4Plus2Stripe); + // Specify + constexpr ui32 maxMissingParts = 2; + constexpr ui32 maxParts = 4 + 2; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + Y_UNIT_TEST(TestStripe32LossOfAllPossible2) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure3Plus2Stripe); - constexpr ui32 maxMissingParts = 2; - constexpr ui32 maxParts = 3 + 2; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure3Plus2Stripe); + constexpr ui32 maxMissingParts = 2; + constexpr ui32 maxParts = 3 + 2; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + Y_UNIT_TEST(TestStripe43LossOfAllPossible3) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure4Plus3Stripe); - constexpr ui32 maxMissingParts = 3; - constexpr ui32 maxParts = 4 + 3; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure4Plus3Stripe); + constexpr ui32 maxMissingParts = 3; + constexpr ui32 maxParts = 4 + 3; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + Y_UNIT_TEST(TestStripe33LossOfAllPossible3) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure3Plus3Stripe); - constexpr ui32 maxMissingParts = 3; - constexpr ui32 maxParts = 3 + 3; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure3Plus3Stripe); + constexpr ui32 maxMissingParts = 3; + constexpr ui32 maxParts = 3 + 3; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + Y_UNIT_TEST(TestStripe23LossOfAllPossible3) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure2Plus3Stripe); - constexpr ui32 maxMissingParts = 3; - constexpr ui32 maxParts = 2 + 3; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure2Plus3Stripe); + constexpr ui32 maxMissingParts = 3; + constexpr ui32 maxParts = 2 + 3; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + Y_UNIT_TEST(TestStripe22LossOfAllPossible2) { - // Set up the erasure - TErasureType groupType(TErasureType::EErasureSpecies::Erasure2Plus2Stripe); - constexpr ui32 maxMissingParts = 2; - constexpr ui32 maxParts = 2 + 2; - TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); - } - + // Set up the erasure + TErasureType groupType(TErasureType::EErasureSpecies::Erasure2Plus2Stripe); + constexpr ui32 maxMissingParts = 2; + constexpr ui32 maxParts = 2 + 2; + TestAllLossesDifferentSizes<maxMissingParts>(groupType, maxParts); + } + void TestErasure(TErasureType::ECrcMode crcMode, ui32 species) { TErasureType groupType((TErasureType::EErasureSpecies)species); TString erasureName = TErasureType::ErasureName[species]; diff --git a/ydb/core/erasure/ut_util.h b/ydb/core/erasure/ut_util.h index 859204b92c..ef2f723655 100644 --- a/ydb/core/erasure/ut_util.h +++ b/ydb/core/erasure/ut_util.h @@ -3,14 +3,14 @@ #include <library/cpp/testing/unittest/registar.h> #include <util/random/entropy.h> #include <util/random/mersenne64.h> -#include <util/stream/null.h> +#include <util/stream/null.h> #include <util/string/printf.h> -IOutputStream& Ctest = Cnull; +IOutputStream& Ctest = Cnull; -#define VERBOSE_COUT(a) Ctest << a +#define VERBOSE_COUT(a) Ctest << a -inline TString PrintArr(ui32 *arr, ui32 n) { +inline TString PrintArr(ui32 *arr, ui32 n) { TStringStream out; if (n == 0) { out << "-"; @@ -22,7 +22,7 @@ inline TString PrintArr(ui32 *arr, ui32 n) { return out.Str(); } -inline const char *BoolToStr(bool val) { +inline const char *BoolToStr(bool val) { return val ? "true " : "false"; } diff --git a/ydb/core/erasure/ya.make b/ydb/core/erasure/ya.make index e4a78c0142..63483c1b96 100644 --- a/ydb/core/erasure/ya.make +++ b/ydb/core/erasure/ya.make @@ -4,7 +4,7 @@ OWNER( cthulhu ddoarn fomichev - va-kuznecov + va-kuznecov g:kikimr ) diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp index a642234e74..7995c58aaf 100644 --- a/ydb/core/health_check/health_check.cpp +++ b/ydb/core/health_check/health_check.cpp @@ -1515,10 +1515,10 @@ public: double avail = (double)pDiskInfo.GetAvailableSize() / pDiskInfo.GetTotalSize(); if (avail < 0.06) { context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Available size is less than 6%", "pdisk-space"); - } else if (avail < 0.09) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Available size is less than 9%", "pdisk-space"); - } else if (avail < 0.12) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Available size is less than 12%", "pdisk-space"); + } else if (avail < 0.09) { + context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Available size is less than 9%", "pdisk-space"); + } else if (avail < 0.12) { + context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Available size is less than 12%", "pdisk-space"); } } } else { diff --git a/ydb/core/keyvalue/keyvalue_collector.h b/ydb/core/keyvalue/keyvalue_collector.h index a256c51a96..9e4d848ab7 100644 --- a/ydb/core/keyvalue/keyvalue_collector.h +++ b/ydb/core/keyvalue/keyvalue_collector.h @@ -1,6 +1,6 @@ #pragma once #include "defs.h" -#include "keyvalue_collect_operation.h" +#include "keyvalue_collect_operation.h" namespace NKikimr { namespace NKeyValue { diff --git a/ydb/core/keyvalue/keyvalue_flat_impl.h b/ydb/core/keyvalue/keyvalue_flat_impl.h index c7c851cf52..292f2feff9 100644 --- a/ydb/core/keyvalue/keyvalue_flat_impl.h +++ b/ydb/core/keyvalue/keyvalue_flat_impl.h @@ -2,11 +2,11 @@ #include "defs.h" #include "keyvalue.h" -#include "keyvalue_collector.h" +#include "keyvalue_collector.h" #include "keyvalue_scheme_flat.h" #include "keyvalue_simple_db.h" #include "keyvalue_simple_db_flat.h" -#include "keyvalue_state.h" +#include "keyvalue_state.h" #include <ydb/core/tablet_flat/tablet_flat_executed.h> #include <ydb/core/tablet_flat/flat_database.h> #include <ydb/core/engine/minikql/flat_local_tx_factory.h> diff --git a/ydb/core/kqp/host/kqp_run_data.cpp b/ydb/core/kqp/host/kqp_run_data.cpp index 5d49b10615..b2bc80d71b 100644 --- a/ydb/core/kqp/host/kqp_run_data.cpp +++ b/ydb/core/kqp/host/kqp_run_data.cpp @@ -106,8 +106,8 @@ protected: if (execResult.HasLocks()) { YQL_ENSURE(!commit); - if (!MergeLocks(execResult.GetLocks().GetType(), execResult.GetLocks().GetValue(), TxState->Tx(), ctx)) { - return false; + if (!MergeLocks(execResult.GetLocks().GetType(), execResult.GetLocks().GetValue(), TxState->Tx(), ctx)) { + return false; } } diff --git a/ydb/core/kqp/host/kqp_run_prepared.cpp b/ydb/core/kqp/host/kqp_run_prepared.cpp index 2a913d99f9..e71a3bacee 100644 --- a/ydb/core/kqp/host/kqp_run_prepared.cpp +++ b/ydb/core/kqp/host/kqp_run_prepared.cpp @@ -75,8 +75,8 @@ public: mkqlResult->Swap(&result.Result); if (AcquireLocks) { - if (!UnpackMergeLocks(*mkqlResult, TxState->Tx(), ctx)) { - return TStatus::Error; + if (!UnpackMergeLocks(*mkqlResult, TxState->Tx(), ctx)) { + return TStatus::Error; } } diff --git a/ydb/core/kqp/kqp_compile_request.cpp b/ydb/core/kqp/kqp_compile_request.cpp index 19952b5335..2c18af159b 100644 --- a/ydb/core/kqp/kqp_compile_request.cpp +++ b/ydb/core/kqp/kqp_compile_request.cpp @@ -262,7 +262,7 @@ private: << ", at state:" << state, ctx); } - void InternalError(const TString& message, const TActorContext &ctx) { + void InternalError(const TString& message, const TActorContext &ctx) { LOG_ERROR_S(ctx, NKikimrServices::KQP_COMPILE_REQUEST, "Internal error" << ", self: " << ctx.SelfID << ", message: " << message); diff --git a/ydb/core/kqp/kqp_impl.h b/ydb/core/kqp/kqp_impl.h index 1ae3fc4038..7d6f172899 100644 --- a/ydb/core/kqp/kqp_impl.h +++ b/ydb/core/kqp/kqp_impl.h @@ -78,10 +78,10 @@ IActor* CreateKqpWorkerActor(const TActorId& owner, const TString& sessionId, const TKqpSettings::TConstPtr& kqpSettings, const TKqpWorkerSettings& workerSettings, TIntrusivePtr<TModuleResolverState> moduleResolverState, TIntrusivePtr<TKqpCounters> counters); -IActor* CreateKqpSessionActor(const TActorId& owner, const TString& sessionId, - const TKqpSettings::TConstPtr& kqpSettings, const TKqpWorkerSettings& workerSettings, - TIntrusivePtr<TModuleResolverState> moduleResolverState, TIntrusivePtr<TKqpCounters> counters); - +IActor* CreateKqpSessionActor(const TActorId& owner, const TString& sessionId, + const TKqpSettings::TConstPtr& kqpSettings, const TKqpWorkerSettings& workerSettings, + TIntrusivePtr<TModuleResolverState> moduleResolverState, TIntrusivePtr<TKqpCounters> counters); + TIntrusivePtr<IKqpGateway> CreateKikimrIcGateway(const TString& cluster, const TString& database, std::shared_ptr<IKqpGateway::IKqpTableMetadataLoader>&& metadataLoader, NActors::TActorSystem* actorSystem, ui32 nodeId, TKqpRequestCounters::TPtr counters, const TActorId& MkqlCompileService); diff --git a/ydb/core/kqp/kqp_session_actor.cpp b/ydb/core/kqp/kqp_session_actor.cpp index b669dbf86e..3627b2b42d 100644 --- a/ydb/core/kqp/kqp_session_actor.cpp +++ b/ydb/core/kqp/kqp_session_actor.cpp @@ -1,518 +1,518 @@ -#include "kqp_impl.h" -#include "provider/yql_kikimr_provider.h" -#include "common/kqp_timeouts.h" -#include "common/kqp_ru_calc.h" - -#include <ydb/core/actorlib_impl/long_timer.h> +#include "kqp_impl.h" +#include "provider/yql_kikimr_provider.h" +#include "common/kqp_timeouts.h" +#include "common/kqp_ru_calc.h" + +#include <ydb/core/actorlib_impl/long_timer.h> #include <ydb/core/base/appdata.h> #include <ydb/core/base/cputime.h> #include <ydb/core/protos/kqp.pb.h> #include <ydb/library/yql/utils/actor_log/log.h> - -#include <library/cpp/actors/core/actor_bootstrapped.h> -#include <library/cpp/actors/core/event_pb.h> -#include <library/cpp/actors/core/hfunc.h> -#include <library/cpp/actors/core/log.h> - -#include <util/string/printf.h> - -namespace NKikimr { -namespace NKqp { - -using namespace NYql; - -namespace { - -#define LOG_C(msg) LOG_CRIT_S(*TlsActivationContext, NKikimrServices::KQP_SESSION, msg) -#define LOG_E(msg) LOG_ERROR_S(*TlsActivationContext, NKikimrServices::KQP_SESSION, msg) -#define LOG_W(msg) LOG_WARN_S(*TlsActivationContext, NKikimrServices::KQP_SESSION, msg) -#define LOG_N(msg) LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::KQP_SESSION, msg) -#define LOG_I(msg) LOG_INFO_S(*TlsActivationContext, NKikimrServices::KQP_SESSION, msg) -#define LOG_D(msg) LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::KQP_SESSION, msg) - - -struct TKqpQueryState { - TActorId Sender; - ui64 ProxyRequestId = 0; - NKikimrKqp::TQueryRequest Request; - TString TraceId; - - TInstant StartTime; - NYql::TKikimrQueryDeadlines QueryDeadlines; - - TString UserToken; -}; - -EKikimrStatsMode GetStatsMode(const NKikimrKqp::TQueryRequest& queryRequest, EKikimrStatsMode minMode) { - switch (queryRequest.GetStatsMode()) { - case NYql::NDqProto::DQ_STATS_MODE_BASIC: - return EKikimrStatsMode::Basic; - case NYql::NDqProto::DQ_STATS_MODE_PROFILE: - return EKikimrStatsMode::Profile; - default: - return std::max(EKikimrStatsMode::None, minMode); - } -} - -class TKqpSessionActor : public TActorBootstrapped<TKqpSessionActor> { -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::KQP_SESSION_ACTOR; - } - - TKqpSessionActor(const TActorId& owner, const TString& sessionId, const TKqpSettings::TConstPtr& kqpSettings, - const TKqpWorkerSettings& workerSettings, TIntrusivePtr<TModuleResolverState> moduleResolverState, - TIntrusivePtr<TKqpCounters> counters) - : Owner(owner) - , SessionId(sessionId) - , Settings(workerSettings) - , WorkerActor(CreateKqpWorkerActor(owner, sessionId, kqpSettings, workerSettings, - moduleResolverState, counters)) - { - TKikimrConfiguration::TPtr config = MakeIntrusive<TKikimrConfiguration>(); - config->Init(kqpSettings->DefaultSettings.GetDefaultSettings(), Settings.Cluster, kqpSettings->Settings, false); - IdleDuration = TDuration::Seconds(*config->_KqpSessionIdleTimeoutSec.Get()); - } - - void Bootstrap() { - WorkerId = RegisterWithSameMailbox(WorkerActor.release()); - Become(&TKqpSessionActor::MainState); - StartIdleTimer(); - } - - NYql::TKikimrQueryDeadlines GetQueryDeadlines(const NKikimrKqp::TQueryRequest& queryRequest) { - NYql::TKikimrQueryDeadlines res; - - auto now = TAppData::TimeProvider->Now(); - if (queryRequest.GetCancelAfterMs()) { - res.CancelAt = now + TDuration::MilliSeconds(queryRequest.GetCancelAfterMs()); - } - - auto timeoutMs = GetQueryTimeout(queryRequest.GetType(), queryRequest.GetTimeoutMs(), Settings.Service); - res.TimeoutAt = now + timeoutMs; - return res; - } - - void MakeNewQueryState() { - ++QueryId; - Y_ENSURE(!QueryState); - QueryState = std::make_unique<TKqpQueryState>(); - } - - void Handle(TEvKqp::TEvQueryRequest::TPtr &ev) { - ui64 proxyRequestId = ev->Cookie; - auto& event = ev->Get()->Record; - auto requestInfo = TKqpRequestInfo(event.GetTraceId(), event.GetRequest().GetSessionId()); - Y_ENSURE(requestInfo.GetSessionId() == SessionId, - "Invalid session, expected: " << SessionId << ", got: " << requestInfo.GetSessionId()); - - MakeNewQueryState(); - QueryState->Request.Swap(event.MutableRequest()); - auto& queryRequest = QueryState->Request; - - if (queryRequest.GetDatabase() != Settings.Database) { - TString message = TStringBuilder() << "Wrong database, expected:" << Settings.Database - << ", got: " << queryRequest.GetDatabase(); - ReplyProcessError(requestInfo, Ydb::StatusIds::BAD_REQUEST, message); - QueryState.reset(); - return; - } - - Y_ENSURE(queryRequest.HasAction()); - auto action = queryRequest.GetAction(); - Y_ENSURE(queryRequest.HasType()); - auto type = queryRequest.GetType(); - - LOG_D(requestInfo << "Received request," - << " proxyRequestId: " << proxyRequestId - << " query: " << (queryRequest.HasQuery() ? queryRequest.GetQuery().Quote() : "") - << " prepared: " << queryRequest.HasPreparedQuery() - << " tx_control: " << queryRequest.HasTxControl() - << " action: " << action - << " type: " << type - ); - - switch (action) { - case NKikimrKqp::QUERY_ACTION_EXECUTE: - case NKikimrKqp::QUERY_ACTION_PREPARE: - case NKikimrKqp::QUERY_ACTION_EXECUTE_PREPARED: - - switch (type) { - case NKikimrKqp::QUERY_TYPE_SQL_DML: - case NKikimrKqp::QUERY_TYPE_PREPARED_DML: - break; - - // not supported yet - case NKikimrKqp::QUERY_TYPE_AST_DML: - case NKikimrKqp::QUERY_TYPE_SQL_SCAN: - case NKikimrKqp::QUERY_TYPE_AST_SCAN: - // should not be compiled. TODO: forward to request executer - case NKikimrKqp::QUERY_TYPE_SQL_DDL: - case NKikimrKqp::QUERY_TYPE_SQL_SCRIPT: - case NKikimrKqp::QUERY_TYPE_SQL_SCRIPT_STREAMING: - default: - Y_ENSURE(false, "type: " << type << " is not supported"); - return; - } - break; - - // not supported yet - case NKikimrKqp::QUERY_ACTION_EXPLAIN: - case NKikimrKqp::QUERY_ACTION_VALIDATE: - case NKikimrKqp::QUERY_ACTION_BEGIN_TX: - case NKikimrKqp::QUERY_ACTION_COMMIT_TX: - case NKikimrKqp::QUERY_ACTION_ROLLBACK_TX: - case NKikimrKqp::QUERY_ACTION_PARSE: - default: - Y_ENSURE(false, "action: " << action << " is not supported"); - return; - } - - QueryState->Sender = ev->Sender; - QueryState->ProxyRequestId = proxyRequestId; - QueryState->TraceId = requestInfo.GetTraceId(); - QueryState->StartTime = TInstant::Now(); - QueryState->UserToken = event.GetUserToken(); - QueryState->QueryDeadlines = GetQueryDeadlines(queryRequest); - - StopIdleTimer(); - - CompileQuery(); - } - - void CompileQuery() { - Y_ENSURE(QueryState); - auto& queryRequest = QueryState->Request; - - TMaybe<TKqpQueryId> query; - TMaybe<TString> uid; - - bool keepInCache = false; - switch (queryRequest.GetAction()) { - case NKikimrKqp::QUERY_ACTION_EXECUTE: - query = TKqpQueryId(Settings.Cluster, Settings.Database, queryRequest.GetQuery()); - keepInCache = queryRequest.GetQueryCachePolicy().keep_in_cache(); - break; - - case NKikimrKqp::QUERY_ACTION_PREPARE: - query = TKqpQueryId(Settings.Cluster, Settings.Database, queryRequest.GetQuery()); - keepInCache = true; - break; - - case NKikimrKqp::QUERY_ACTION_EXECUTE_PREPARED: - uid = queryRequest.GetPreparedQuery(); - keepInCache = queryRequest.GetQueryCachePolicy().keep_in_cache(); - break; - - default: - Y_ENSURE(false); - } - - auto compileDeadline = QueryState->QueryDeadlines.TimeoutAt; - if (QueryState->QueryDeadlines.CancelAt) { - compileDeadline = Min(compileDeadline, QueryState->QueryDeadlines.CancelAt); - } - - auto compileRequestActor = CreateKqpCompileRequestActor(SelfId(), QueryState->UserToken, uid, - std::move(query), keepInCache, compileDeadline, Settings.DbCounters); - TlsActivationContext->ExecutorThread.RegisterActor(compileRequestActor); - } - - void Handle(TEvKqp::TEvCompileResponse::TPtr &ev) { - auto compileResult = ev->Get()->CompileResult; - - Y_ENSURE(compileResult); - Y_ENSURE(QueryState); - - if (compileResult->Status != Ydb::StatusIds::SUCCESS) { - if (ReplyQueryCompileError(compileResult)) { - StartIdleTimer(); - } else { - FinalCleanup(); - } - - return; - } - - const ui32 compiledVersion = compileResult->PreparedQuery->GetVersion(); - Y_ENSURE(compiledVersion == NKikimrKqp::TPreparedQuery::VERSION_PHYSICAL_V1, "invalid compiled version"); - - auto& queryRequest = QueryState->Request; - if (queryRequest.GetAction() == NKikimrKqp::QUERY_ACTION_PREPARE) { - if (ReplyPrepareResult(compileResult, ev->Get()->Stats)) { - StartIdleTimer(); - } else { - FinalCleanup(); - } - return; - } - - PerformQuery(compileResult); - } - - void PerformQuery(TKqpCompileResult::TConstPtr compileResult) { - Y_ENSURE(QueryState); - auto requestInfo = TKqpRequestInfo(QueryState->TraceId, SessionId); - - auto& queryRequest = QueryState->Request; - bool nonInteractive = false; - - if (queryRequest.HasTxControl()) { - // TODO Create transaction handle context - TString out; - NProtoBuf::TextFormat::PrintToString(queryRequest.GetTxControl(), &out); - LOG_D("queryRequest TxControl: " << out); - } - - auto action = queryRequest.GetAction(); - auto queryType = queryRequest.GetType(); - - if (action == NKikimrKqp::QUERY_ACTION_EXECUTE) { - Y_ENSURE(queryType == NKikimrKqp::QUERY_TYPE_SQL_DML); - queryType = NKikimrKqp::QUERY_TYPE_PREPARED_DML; - action = NKikimrKqp::QUERY_ACTION_EXECUTE_PREPARED; - } - - if (action != NKikimrKqp::QUERY_ACTION_EXECUTE_PREPARED) { - const TString& message = "Unknown query action"; - ReplyProcessError(requestInfo, Ydb::StatusIds::BAD_REQUEST, message); - QueryState.reset(); - return; - } - - LOG_D("nonInteractive: " << nonInteractive - << ", serializable_rw: " << queryRequest.GetTxControl().begin_tx().has_serializable_read_write()); - Y_ENSURE(queryType == NKikimrKqp::QUERY_TYPE_PREPARED_DML); - - TPreparedQueryConstPtr preparedQuery = compileResult->PreparedQuery; - Y_ENSURE(preparedQuery); - QueryState->Request.SetQuery(preparedQuery->GetText()); - - ExecutePreparedQuery(preparedQuery); - } - - bool ReplyPrepareResult(const TKqpCompileResult::TConstPtr& compileResult, - const NKqpProto::TKqpStatsCompile& compileStats) { - auto responseEv = std::make_unique<TEvKqp::TEvQueryResponse>(); - FillCompileStatus(compileResult, responseEv->Record); - auto ru = NRuCalc::CpuTimeToUnit(TDuration::MicroSeconds(compileStats.GetCpuTimeUs())); - responseEv->Record.GetRef().SetConsumedRu(ru); - return Reply(std::move(responseEv)); - } - - void ExecutePreparedQuery(TPreparedQueryConstPtr& query) { - Y_UNUSED(query); - auto& queryRequest = QueryState->Request; - if (false) { - NKikimrMiniKQL::TParams parameters = std::move(*queryRequest.MutableParameters()); - Y_UNUSED(parameters); - } else { - EKikimrStatsMode statsMode = GetStatsMode(queryRequest, EKikimrStatsMode::Basic); - Y_UNUSED(statsMode); - //ReplyProcessError(requestInfo, status, message); - } - Y_VERIFY(false, "Success!!!"); - } - - bool ReplyQueryCompileError(const TKqpCompileResult::TConstPtr& compileResult) { - auto responseEv = std::make_unique<TEvKqp::TEvQueryResponse>(); - FillCompileStatus(compileResult, responseEv->Record); - responseEv->Record.GetRef().SetConsumedRu(1); - return Reply(std::move(responseEv)); - } - - bool Reply(std::unique_ptr<TEvKqp::TEvQueryResponse> responseEv) { - Y_ENSURE(QueryState); - - auto requestInfo = TKqpRequestInfo(QueryState->TraceId, SessionId); - - auto& record = responseEv->Record.GetRef(); - auto& response = *record.MutableResponse(); - const auto& status = record.GetYdbStatus(); - - response.SetSessionId(SessionId); - - Send(QueryState->Sender, responseEv.release(), 0, QueryState->ProxyRequestId); - LOG_D(requestInfo << "Sent query response back to proxy, proxyRequestId: " << QueryState->ProxyRequestId - << ", proxyId: " << QueryState->Sender.ToString()); - - QueryState.reset(); - - if (status == Ydb::StatusIds::INTERNAL_ERROR) { - LOG_D(requestInfo << "Worker destroyed due to internal error"); - //Counters->ReportWorkerClosedError(Settings.DbCounters); - return false; - } - if (status == Ydb::StatusIds::BAD_SESSION) { - LOG_D(requestInfo << "Worker destroyed due to session error"); - //Counters->ReportWorkerClosedError(Settings.DbCounters); - return false; - } - - return true; - } - - void FillCompileStatus(const TKqpCompileResult::TConstPtr& compileResult, - TEvKqp::TProtoArenaHolder<NKikimrKqp::TEvQueryResponse>& record) - { - auto& ev = record.GetRef(); - - ev.SetYdbStatus(compileResult->Status); - - auto& response = *ev.MutableResponse(); - AddQueryIssues(response, compileResult->Issues); - - if (compileResult->Status == Ydb::StatusIds::SUCCESS) { - response.SetPreparedQuery(compileResult->Uid); - - auto& preparedQuery = compileResult->PreparedQuery; - response.MutableQueryParameters()->CopyFrom(preparedQuery->GetParameters()); - - if (preparedQuery->KqlsSize() > 0) { - response.SetQueryAst(preparedQuery->GetKqls(0).GetAst()); - response.SetQueryPlan(preparedQuery->GetKqls(0).GetPlan()); - } - } - } - - void Handle(TEvKqp::TEvPingSessionRequest::TPtr &ev) { - ui64 proxyRequestId = ev->Cookie; - auto& evRecord = ev->Get()->Record; - auto requestInfo = TKqpRequestInfo(evRecord.GetTraceId(), evRecord.GetRequest().GetSessionId()); - Y_ENSURE(requestInfo.GetSessionId() == SessionId, - "Invalid session, expected: " << SessionId << ", got: " << requestInfo.GetSessionId()); - - auto result = std::make_unique<TEvKqp::TEvPingSessionResponse>(); - auto& record = result->Record; - record.SetStatus(Ydb::StatusIds::SUCCESS); - auto sessionStatus = QueryState - ? Ydb::Table::KeepAliveResult::SESSION_STATUS_BUSY - : Ydb::Table::KeepAliveResult::SESSION_STATUS_READY; - record.MutableResponse()->SetSessionStatus(sessionStatus); - - Send(ev->Sender, result.release(), 0, proxyRequestId); - } - - void StartIdleTimer() { - StopIdleTimer(); - - ++IdleTimerId; - IdleTimerActorId = CreateLongTimer(TlsActivationContext->AsActorContext(), IdleDuration, - new IEventHandle(SelfId(), SelfId(), new TEvKqp::TEvIdleTimeout(IdleTimerId))); - LOG_D("Created long timer for idle timeout, timer id: " << IdleTimerId - << ", duration: " << IdleDuration << ", actor: " << IdleTimerActorId); - } - - void StopIdleTimer() { - if (IdleTimerActorId) { - LOG_D("Destroying long timer actor for idle timout: " << IdleTimerActorId); - Send(IdleTimerActorId, new TEvents::TEvPoisonPill()); - } - IdleTimerActorId = TActorId(); - } - - void Handle(TEvKqp::TEvIdleTimeout::TPtr &ev) { - auto timerId = ev->Get()->TimerId; - LOG_D("Received TEvIdleTimeout in ready state, timer id: " - << timerId << ", sender: " << ev->Sender); - - if (timerId == IdleTimerId) { - LOG_N(TKqpRequestInfo("", SessionId) << "Worker idle timeout, worker destroyed"); - //Counters->ReportWorkerClosedIdle(Settings.DbCounters); - FinalCleanup(); - } - } - - void FinalCleanup() { - Cleanup(true); - } - - void Cleanup(bool isFinal = false) { - // 1. Cleanup transactions -- QueryState->TxId - - // 2. Reply to kqp_proxy - if (isFinal) { - auto closeEv = std::make_unique<TEvKqp::TEvCloseSessionResponse>(); - closeEv->Record.SetStatus(Ydb::StatusIds::SUCCESS); - closeEv->Record.MutableResponse()->SetSessionId(SessionId); - closeEv->Record.MutableResponse()->SetClosed(true); - Send(Owner, closeEv.release()); - PassAway(); - } else { - StartIdleTimer(); - QueryState.reset(); - } - - // 3. check tx locks - } - - bool ReplyProcessError(const TKqpRequestInfo& requestInfo, Ydb::StatusIds::StatusCode ydbStatus, - const TString& message) - { - LOG_W(requestInfo << message); - - auto ev = std::make_unique<TEvKqp::TEvQueryResponse>(); - ev->Record.GetRef().SetYdbStatus(ydbStatus); - - auto& response = *ev->Record.GetRef().MutableResponse(); - - AddQueryIssues(response, {TIssue{message}}); - - return Reply(std::move(ev)); - } - - STATEFN(MainState) { - try { - switch (ev->GetTypeRewrite()) { - hFunc(TEvKqp::TEvQueryRequest, Handle); - hFunc(TEvKqp::TEvCompileResponse, Handle); - hFunc(TEvKqp::TEvIdleTimeout, Handle); - hFunc(TEvKqp::TEvPingSessionRequest, Handle); - - //hFunc(TEvKqp::TEvCloseSessionRequest, Handle); - //hFunc(TEvKqp::TEvInitiateSessionShutdown, Handle); - default: - UnexpectedEvent("MainState", ev); - } - } catch (const yexception& ex) { - InternalError(ex.what()); - } - } - -private: - void UnexpectedEvent(const TString& state, TAutoPtr<NActors::IEventHandle>& ev) { - InternalError(TStringBuilder() << "TKqpSessionActor in state " << state << " recieve unexpected event " << - TypeName(*ev.Get()->GetBase()) << Sprintf("(0x%08" PRIx32 ")", ev->GetTypeRewrite())); - } - - void InternalError(const TString& message) { - LOG_E("Internal error, SelfId: " << SelfId() << ", message: " << message); - PassAway(); - } - -private: - TActorId Owner; - TString SessionId; - TKqpWorkerSettings Settings; - std::unique_ptr<IActor> WorkerActor; - std::unique_ptr<TKqpQueryState> QueryState; - ui32 QueryId = 0; - - TActorId IdleTimerActorId; - ui32 IdleTimerId = 0; - TDuration IdleDuration; - - TActorId WorkerId; -}; - -} - -IActor* CreateKqpSessionActor(const TActorId& owner, const TString& sessionId, - const TKqpSettings::TConstPtr& kqpSettings, const TKqpWorkerSettings& workerSettings, - TIntrusivePtr<TModuleResolverState> moduleResolverState, TIntrusivePtr<TKqpCounters> counters) -{ - return new TKqpSessionActor(owner, sessionId, kqpSettings, workerSettings, moduleResolverState, counters); -} - -} -} + +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/actors/core/event_pb.h> +#include <library/cpp/actors/core/hfunc.h> +#include <library/cpp/actors/core/log.h> + +#include <util/string/printf.h> + +namespace NKikimr { +namespace NKqp { + +using namespace NYql; + +namespace { + +#define LOG_C(msg) LOG_CRIT_S(*TlsActivationContext, NKikimrServices::KQP_SESSION, msg) +#define LOG_E(msg) LOG_ERROR_S(*TlsActivationContext, NKikimrServices::KQP_SESSION, msg) +#define LOG_W(msg) LOG_WARN_S(*TlsActivationContext, NKikimrServices::KQP_SESSION, msg) +#define LOG_N(msg) LOG_NOTICE_S(*TlsActivationContext, NKikimrServices::KQP_SESSION, msg) +#define LOG_I(msg) LOG_INFO_S(*TlsActivationContext, NKikimrServices::KQP_SESSION, msg) +#define LOG_D(msg) LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::KQP_SESSION, msg) + + +struct TKqpQueryState { + TActorId Sender; + ui64 ProxyRequestId = 0; + NKikimrKqp::TQueryRequest Request; + TString TraceId; + + TInstant StartTime; + NYql::TKikimrQueryDeadlines QueryDeadlines; + + TString UserToken; +}; + +EKikimrStatsMode GetStatsMode(const NKikimrKqp::TQueryRequest& queryRequest, EKikimrStatsMode minMode) { + switch (queryRequest.GetStatsMode()) { + case NYql::NDqProto::DQ_STATS_MODE_BASIC: + return EKikimrStatsMode::Basic; + case NYql::NDqProto::DQ_STATS_MODE_PROFILE: + return EKikimrStatsMode::Profile; + default: + return std::max(EKikimrStatsMode::None, minMode); + } +} + +class TKqpSessionActor : public TActorBootstrapped<TKqpSessionActor> { +public: + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { + return NKikimrServices::TActivity::KQP_SESSION_ACTOR; + } + + TKqpSessionActor(const TActorId& owner, const TString& sessionId, const TKqpSettings::TConstPtr& kqpSettings, + const TKqpWorkerSettings& workerSettings, TIntrusivePtr<TModuleResolverState> moduleResolverState, + TIntrusivePtr<TKqpCounters> counters) + : Owner(owner) + , SessionId(sessionId) + , Settings(workerSettings) + , WorkerActor(CreateKqpWorkerActor(owner, sessionId, kqpSettings, workerSettings, + moduleResolverState, counters)) + { + TKikimrConfiguration::TPtr config = MakeIntrusive<TKikimrConfiguration>(); + config->Init(kqpSettings->DefaultSettings.GetDefaultSettings(), Settings.Cluster, kqpSettings->Settings, false); + IdleDuration = TDuration::Seconds(*config->_KqpSessionIdleTimeoutSec.Get()); + } + + void Bootstrap() { + WorkerId = RegisterWithSameMailbox(WorkerActor.release()); + Become(&TKqpSessionActor::MainState); + StartIdleTimer(); + } + + NYql::TKikimrQueryDeadlines GetQueryDeadlines(const NKikimrKqp::TQueryRequest& queryRequest) { + NYql::TKikimrQueryDeadlines res; + + auto now = TAppData::TimeProvider->Now(); + if (queryRequest.GetCancelAfterMs()) { + res.CancelAt = now + TDuration::MilliSeconds(queryRequest.GetCancelAfterMs()); + } + + auto timeoutMs = GetQueryTimeout(queryRequest.GetType(), queryRequest.GetTimeoutMs(), Settings.Service); + res.TimeoutAt = now + timeoutMs; + return res; + } + + void MakeNewQueryState() { + ++QueryId; + Y_ENSURE(!QueryState); + QueryState = std::make_unique<TKqpQueryState>(); + } + + void Handle(TEvKqp::TEvQueryRequest::TPtr &ev) { + ui64 proxyRequestId = ev->Cookie; + auto& event = ev->Get()->Record; + auto requestInfo = TKqpRequestInfo(event.GetTraceId(), event.GetRequest().GetSessionId()); + Y_ENSURE(requestInfo.GetSessionId() == SessionId, + "Invalid session, expected: " << SessionId << ", got: " << requestInfo.GetSessionId()); + + MakeNewQueryState(); + QueryState->Request.Swap(event.MutableRequest()); + auto& queryRequest = QueryState->Request; + + if (queryRequest.GetDatabase() != Settings.Database) { + TString message = TStringBuilder() << "Wrong database, expected:" << Settings.Database + << ", got: " << queryRequest.GetDatabase(); + ReplyProcessError(requestInfo, Ydb::StatusIds::BAD_REQUEST, message); + QueryState.reset(); + return; + } + + Y_ENSURE(queryRequest.HasAction()); + auto action = queryRequest.GetAction(); + Y_ENSURE(queryRequest.HasType()); + auto type = queryRequest.GetType(); + + LOG_D(requestInfo << "Received request," + << " proxyRequestId: " << proxyRequestId + << " query: " << (queryRequest.HasQuery() ? queryRequest.GetQuery().Quote() : "") + << " prepared: " << queryRequest.HasPreparedQuery() + << " tx_control: " << queryRequest.HasTxControl() + << " action: " << action + << " type: " << type + ); + + switch (action) { + case NKikimrKqp::QUERY_ACTION_EXECUTE: + case NKikimrKqp::QUERY_ACTION_PREPARE: + case NKikimrKqp::QUERY_ACTION_EXECUTE_PREPARED: + + switch (type) { + case NKikimrKqp::QUERY_TYPE_SQL_DML: + case NKikimrKqp::QUERY_TYPE_PREPARED_DML: + break; + + // not supported yet + case NKikimrKqp::QUERY_TYPE_AST_DML: + case NKikimrKqp::QUERY_TYPE_SQL_SCAN: + case NKikimrKqp::QUERY_TYPE_AST_SCAN: + // should not be compiled. TODO: forward to request executer + case NKikimrKqp::QUERY_TYPE_SQL_DDL: + case NKikimrKqp::QUERY_TYPE_SQL_SCRIPT: + case NKikimrKqp::QUERY_TYPE_SQL_SCRIPT_STREAMING: + default: + Y_ENSURE(false, "type: " << type << " is not supported"); + return; + } + break; + + // not supported yet + case NKikimrKqp::QUERY_ACTION_EXPLAIN: + case NKikimrKqp::QUERY_ACTION_VALIDATE: + case NKikimrKqp::QUERY_ACTION_BEGIN_TX: + case NKikimrKqp::QUERY_ACTION_COMMIT_TX: + case NKikimrKqp::QUERY_ACTION_ROLLBACK_TX: + case NKikimrKqp::QUERY_ACTION_PARSE: + default: + Y_ENSURE(false, "action: " << action << " is not supported"); + return; + } + + QueryState->Sender = ev->Sender; + QueryState->ProxyRequestId = proxyRequestId; + QueryState->TraceId = requestInfo.GetTraceId(); + QueryState->StartTime = TInstant::Now(); + QueryState->UserToken = event.GetUserToken(); + QueryState->QueryDeadlines = GetQueryDeadlines(queryRequest); + + StopIdleTimer(); + + CompileQuery(); + } + + void CompileQuery() { + Y_ENSURE(QueryState); + auto& queryRequest = QueryState->Request; + + TMaybe<TKqpQueryId> query; + TMaybe<TString> uid; + + bool keepInCache = false; + switch (queryRequest.GetAction()) { + case NKikimrKqp::QUERY_ACTION_EXECUTE: + query = TKqpQueryId(Settings.Cluster, Settings.Database, queryRequest.GetQuery()); + keepInCache = queryRequest.GetQueryCachePolicy().keep_in_cache(); + break; + + case NKikimrKqp::QUERY_ACTION_PREPARE: + query = TKqpQueryId(Settings.Cluster, Settings.Database, queryRequest.GetQuery()); + keepInCache = true; + break; + + case NKikimrKqp::QUERY_ACTION_EXECUTE_PREPARED: + uid = queryRequest.GetPreparedQuery(); + keepInCache = queryRequest.GetQueryCachePolicy().keep_in_cache(); + break; + + default: + Y_ENSURE(false); + } + + auto compileDeadline = QueryState->QueryDeadlines.TimeoutAt; + if (QueryState->QueryDeadlines.CancelAt) { + compileDeadline = Min(compileDeadline, QueryState->QueryDeadlines.CancelAt); + } + + auto compileRequestActor = CreateKqpCompileRequestActor(SelfId(), QueryState->UserToken, uid, + std::move(query), keepInCache, compileDeadline, Settings.DbCounters); + TlsActivationContext->ExecutorThread.RegisterActor(compileRequestActor); + } + + void Handle(TEvKqp::TEvCompileResponse::TPtr &ev) { + auto compileResult = ev->Get()->CompileResult; + + Y_ENSURE(compileResult); + Y_ENSURE(QueryState); + + if (compileResult->Status != Ydb::StatusIds::SUCCESS) { + if (ReplyQueryCompileError(compileResult)) { + StartIdleTimer(); + } else { + FinalCleanup(); + } + + return; + } + + const ui32 compiledVersion = compileResult->PreparedQuery->GetVersion(); + Y_ENSURE(compiledVersion == NKikimrKqp::TPreparedQuery::VERSION_PHYSICAL_V1, "invalid compiled version"); + + auto& queryRequest = QueryState->Request; + if (queryRequest.GetAction() == NKikimrKqp::QUERY_ACTION_PREPARE) { + if (ReplyPrepareResult(compileResult, ev->Get()->Stats)) { + StartIdleTimer(); + } else { + FinalCleanup(); + } + return; + } + + PerformQuery(compileResult); + } + + void PerformQuery(TKqpCompileResult::TConstPtr compileResult) { + Y_ENSURE(QueryState); + auto requestInfo = TKqpRequestInfo(QueryState->TraceId, SessionId); + + auto& queryRequest = QueryState->Request; + bool nonInteractive = false; + + if (queryRequest.HasTxControl()) { + // TODO Create transaction handle context + TString out; + NProtoBuf::TextFormat::PrintToString(queryRequest.GetTxControl(), &out); + LOG_D("queryRequest TxControl: " << out); + } + + auto action = queryRequest.GetAction(); + auto queryType = queryRequest.GetType(); + + if (action == NKikimrKqp::QUERY_ACTION_EXECUTE) { + Y_ENSURE(queryType == NKikimrKqp::QUERY_TYPE_SQL_DML); + queryType = NKikimrKqp::QUERY_TYPE_PREPARED_DML; + action = NKikimrKqp::QUERY_ACTION_EXECUTE_PREPARED; + } + + if (action != NKikimrKqp::QUERY_ACTION_EXECUTE_PREPARED) { + const TString& message = "Unknown query action"; + ReplyProcessError(requestInfo, Ydb::StatusIds::BAD_REQUEST, message); + QueryState.reset(); + return; + } + + LOG_D("nonInteractive: " << nonInteractive + << ", serializable_rw: " << queryRequest.GetTxControl().begin_tx().has_serializable_read_write()); + Y_ENSURE(queryType == NKikimrKqp::QUERY_TYPE_PREPARED_DML); + + TPreparedQueryConstPtr preparedQuery = compileResult->PreparedQuery; + Y_ENSURE(preparedQuery); + QueryState->Request.SetQuery(preparedQuery->GetText()); + + ExecutePreparedQuery(preparedQuery); + } + + bool ReplyPrepareResult(const TKqpCompileResult::TConstPtr& compileResult, + const NKqpProto::TKqpStatsCompile& compileStats) { + auto responseEv = std::make_unique<TEvKqp::TEvQueryResponse>(); + FillCompileStatus(compileResult, responseEv->Record); + auto ru = NRuCalc::CpuTimeToUnit(TDuration::MicroSeconds(compileStats.GetCpuTimeUs())); + responseEv->Record.GetRef().SetConsumedRu(ru); + return Reply(std::move(responseEv)); + } + + void ExecutePreparedQuery(TPreparedQueryConstPtr& query) { + Y_UNUSED(query); + auto& queryRequest = QueryState->Request; + if (false) { + NKikimrMiniKQL::TParams parameters = std::move(*queryRequest.MutableParameters()); + Y_UNUSED(parameters); + } else { + EKikimrStatsMode statsMode = GetStatsMode(queryRequest, EKikimrStatsMode::Basic); + Y_UNUSED(statsMode); + //ReplyProcessError(requestInfo, status, message); + } + Y_VERIFY(false, "Success!!!"); + } + + bool ReplyQueryCompileError(const TKqpCompileResult::TConstPtr& compileResult) { + auto responseEv = std::make_unique<TEvKqp::TEvQueryResponse>(); + FillCompileStatus(compileResult, responseEv->Record); + responseEv->Record.GetRef().SetConsumedRu(1); + return Reply(std::move(responseEv)); + } + + bool Reply(std::unique_ptr<TEvKqp::TEvQueryResponse> responseEv) { + Y_ENSURE(QueryState); + + auto requestInfo = TKqpRequestInfo(QueryState->TraceId, SessionId); + + auto& record = responseEv->Record.GetRef(); + auto& response = *record.MutableResponse(); + const auto& status = record.GetYdbStatus(); + + response.SetSessionId(SessionId); + + Send(QueryState->Sender, responseEv.release(), 0, QueryState->ProxyRequestId); + LOG_D(requestInfo << "Sent query response back to proxy, proxyRequestId: " << QueryState->ProxyRequestId + << ", proxyId: " << QueryState->Sender.ToString()); + + QueryState.reset(); + + if (status == Ydb::StatusIds::INTERNAL_ERROR) { + LOG_D(requestInfo << "Worker destroyed due to internal error"); + //Counters->ReportWorkerClosedError(Settings.DbCounters); + return false; + } + if (status == Ydb::StatusIds::BAD_SESSION) { + LOG_D(requestInfo << "Worker destroyed due to session error"); + //Counters->ReportWorkerClosedError(Settings.DbCounters); + return false; + } + + return true; + } + + void FillCompileStatus(const TKqpCompileResult::TConstPtr& compileResult, + TEvKqp::TProtoArenaHolder<NKikimrKqp::TEvQueryResponse>& record) + { + auto& ev = record.GetRef(); + + ev.SetYdbStatus(compileResult->Status); + + auto& response = *ev.MutableResponse(); + AddQueryIssues(response, compileResult->Issues); + + if (compileResult->Status == Ydb::StatusIds::SUCCESS) { + response.SetPreparedQuery(compileResult->Uid); + + auto& preparedQuery = compileResult->PreparedQuery; + response.MutableQueryParameters()->CopyFrom(preparedQuery->GetParameters()); + + if (preparedQuery->KqlsSize() > 0) { + response.SetQueryAst(preparedQuery->GetKqls(0).GetAst()); + response.SetQueryPlan(preparedQuery->GetKqls(0).GetPlan()); + } + } + } + + void Handle(TEvKqp::TEvPingSessionRequest::TPtr &ev) { + ui64 proxyRequestId = ev->Cookie; + auto& evRecord = ev->Get()->Record; + auto requestInfo = TKqpRequestInfo(evRecord.GetTraceId(), evRecord.GetRequest().GetSessionId()); + Y_ENSURE(requestInfo.GetSessionId() == SessionId, + "Invalid session, expected: " << SessionId << ", got: " << requestInfo.GetSessionId()); + + auto result = std::make_unique<TEvKqp::TEvPingSessionResponse>(); + auto& record = result->Record; + record.SetStatus(Ydb::StatusIds::SUCCESS); + auto sessionStatus = QueryState + ? Ydb::Table::KeepAliveResult::SESSION_STATUS_BUSY + : Ydb::Table::KeepAliveResult::SESSION_STATUS_READY; + record.MutableResponse()->SetSessionStatus(sessionStatus); + + Send(ev->Sender, result.release(), 0, proxyRequestId); + } + + void StartIdleTimer() { + StopIdleTimer(); + + ++IdleTimerId; + IdleTimerActorId = CreateLongTimer(TlsActivationContext->AsActorContext(), IdleDuration, + new IEventHandle(SelfId(), SelfId(), new TEvKqp::TEvIdleTimeout(IdleTimerId))); + LOG_D("Created long timer for idle timeout, timer id: " << IdleTimerId + << ", duration: " << IdleDuration << ", actor: " << IdleTimerActorId); + } + + void StopIdleTimer() { + if (IdleTimerActorId) { + LOG_D("Destroying long timer actor for idle timout: " << IdleTimerActorId); + Send(IdleTimerActorId, new TEvents::TEvPoisonPill()); + } + IdleTimerActorId = TActorId(); + } + + void Handle(TEvKqp::TEvIdleTimeout::TPtr &ev) { + auto timerId = ev->Get()->TimerId; + LOG_D("Received TEvIdleTimeout in ready state, timer id: " + << timerId << ", sender: " << ev->Sender); + + if (timerId == IdleTimerId) { + LOG_N(TKqpRequestInfo("", SessionId) << "Worker idle timeout, worker destroyed"); + //Counters->ReportWorkerClosedIdle(Settings.DbCounters); + FinalCleanup(); + } + } + + void FinalCleanup() { + Cleanup(true); + } + + void Cleanup(bool isFinal = false) { + // 1. Cleanup transactions -- QueryState->TxId + + // 2. Reply to kqp_proxy + if (isFinal) { + auto closeEv = std::make_unique<TEvKqp::TEvCloseSessionResponse>(); + closeEv->Record.SetStatus(Ydb::StatusIds::SUCCESS); + closeEv->Record.MutableResponse()->SetSessionId(SessionId); + closeEv->Record.MutableResponse()->SetClosed(true); + Send(Owner, closeEv.release()); + PassAway(); + } else { + StartIdleTimer(); + QueryState.reset(); + } + + // 3. check tx locks + } + + bool ReplyProcessError(const TKqpRequestInfo& requestInfo, Ydb::StatusIds::StatusCode ydbStatus, + const TString& message) + { + LOG_W(requestInfo << message); + + auto ev = std::make_unique<TEvKqp::TEvQueryResponse>(); + ev->Record.GetRef().SetYdbStatus(ydbStatus); + + auto& response = *ev->Record.GetRef().MutableResponse(); + + AddQueryIssues(response, {TIssue{message}}); + + return Reply(std::move(ev)); + } + + STATEFN(MainState) { + try { + switch (ev->GetTypeRewrite()) { + hFunc(TEvKqp::TEvQueryRequest, Handle); + hFunc(TEvKqp::TEvCompileResponse, Handle); + hFunc(TEvKqp::TEvIdleTimeout, Handle); + hFunc(TEvKqp::TEvPingSessionRequest, Handle); + + //hFunc(TEvKqp::TEvCloseSessionRequest, Handle); + //hFunc(TEvKqp::TEvInitiateSessionShutdown, Handle); + default: + UnexpectedEvent("MainState", ev); + } + } catch (const yexception& ex) { + InternalError(ex.what()); + } + } + +private: + void UnexpectedEvent(const TString& state, TAutoPtr<NActors::IEventHandle>& ev) { + InternalError(TStringBuilder() << "TKqpSessionActor in state " << state << " recieve unexpected event " << + TypeName(*ev.Get()->GetBase()) << Sprintf("(0x%08" PRIx32 ")", ev->GetTypeRewrite())); + } + + void InternalError(const TString& message) { + LOG_E("Internal error, SelfId: " << SelfId() << ", message: " << message); + PassAway(); + } + +private: + TActorId Owner; + TString SessionId; + TKqpWorkerSettings Settings; + std::unique_ptr<IActor> WorkerActor; + std::unique_ptr<TKqpQueryState> QueryState; + ui32 QueryId = 0; + + TActorId IdleTimerActorId; + ui32 IdleTimerId = 0; + TDuration IdleDuration; + + TActorId WorkerId; +}; + +} + +IActor* CreateKqpSessionActor(const TActorId& owner, const TString& sessionId, + const TKqpSettings::TConstPtr& kqpSettings, const TKqpWorkerSettings& workerSettings, + TIntrusivePtr<TModuleResolverState> moduleResolverState, TIntrusivePtr<TKqpCounters> counters) +{ + return new TKqpSessionActor(owner, sessionId, kqpSettings, workerSettings, moduleResolverState, counters); +} + +} +} diff --git a/ydb/core/kqp/prepare/kqp_prepare.h b/ydb/core/kqp/prepare/kqp_prepare.h index 8ab0ed748f..d114a44945 100644 --- a/ydb/core/kqp/prepare/kqp_prepare.h +++ b/ydb/core/kqp/prepare/kqp_prepare.h @@ -154,16 +154,16 @@ bool AddDeferredEffect(NYql::NNodes::TExprBase effect, const TVector<NKikimrKqp: bool AddDeferredEffect(NYql::NNodes::TExprBase effect, NYql::TExprContext& ctx, TKqpTransactionState& txState, TKqlTransformContext& transformCtx, bool preserveParamValues); -NYql::TIssue GetLocksInvalidatedIssue(const TKqpTransactionContext& txCtx, const TMaybe<TKqpTxLock>& invalidatedLock); +NYql::TIssue GetLocksInvalidatedIssue(const TKqpTransactionContext& txCtx, const TMaybe<TKqpTxLock>& invalidatedLock); -bool MergeLocks(const NKikimrMiniKQL::TType& type, const NKikimrMiniKQL::TValue& value, TKqpTransactionContext& txCtx, - NYql::TExprContext& ctx); +bool MergeLocks(const NKikimrMiniKQL::TType& type, const NKikimrMiniKQL::TValue& value, TKqpTransactionContext& txCtx, + NYql::TExprContext& ctx); -std::pair<bool, std::vector<NYql::TIssue>> MergeLocks(const NKikimrMiniKQL::TType& type, const NKikimrMiniKQL::TValue& value, - TKqpTransactionContext& txCtx); +std::pair<bool, std::vector<NYql::TIssue>> MergeLocks(const NKikimrMiniKQL::TType& type, const NKikimrMiniKQL::TValue& value, + TKqpTransactionContext& txCtx); + +bool UnpackMergeLocks(const NKikimrMiniKQL::TResult& result, TKqpTransactionContext& txCtx, NYql::TExprContext& ctx); -bool UnpackMergeLocks(const NKikimrMiniKQL::TResult& result, TKqpTransactionContext& txCtx, NYql::TExprContext& ctx); - TKqpParamsMap BuildParamsMap(const TVector<NKikimrKqp::TParameterBinding>& bindings, TIntrusivePtr<TKqpTransactionState> txState, TIntrusivePtr<TKqlTransformContext> transformCtx, bool acquireLocks); diff --git a/ydb/core/kqp/prepare/kqp_query_exec.cpp b/ydb/core/kqp/prepare/kqp_query_exec.cpp index d73572b85e..bc2d4e94ce 100644 --- a/ydb/core/kqp/prepare/kqp_query_exec.cpp +++ b/ydb/core/kqp/prepare/kqp_query_exec.cpp @@ -355,8 +355,8 @@ public: TransformCtx->AddMkqlStats(MkqlExecuteResult.Program, std::move(result.TxStats)); if (TxState->Tx().EffectiveIsolationLevel == NKikimrKqp::ISOLATION_LEVEL_SERIALIZABLE) { - if (!UnpackMergeLocks(*mkqlResult, TxState->Tx(), ctx)) { - return TStatus::Error; + if (!UnpackMergeLocks(*mkqlResult, TxState->Tx(), ctx)) { + return TStatus::Error; } } } @@ -682,14 +682,14 @@ TKqpParamsMap BuildParamsMap(const TVector<NKikimrKqp::TParameterBinding>& bindi return paramsMap; } -TIssue GetLocksInvalidatedIssue(const TKqpTransactionContext& txCtx, const TMaybe<TKqpTxLock>& invalidatedLock) { +TIssue GetLocksInvalidatedIssue(const TKqpTransactionContext& txCtx, const TMaybe<TKqpTxLock>& invalidatedLock) { TStringBuilder message; message << "Transaction locks invalidated."; TMaybe<TString> tableName; if (invalidatedLock) { TKikimrPathId id(invalidatedLock->GetSchemeShard(), invalidatedLock->GetPathId()); - auto table = txCtx.TableByIdMap.FindPtr(id); + auto table = txCtx.TableByIdMap.FindPtr(id); if (table) { tableName = *table; } @@ -702,12 +702,12 @@ TIssue GetLocksInvalidatedIssue(const TKqpTransactionContext& txCtx, const TMayb return YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message); } -std::pair<bool, std::vector<TIssue>> MergeLocks(const NKikimrMiniKQL::TType& type, const NKikimrMiniKQL::TValue& value, - TKqpTransactionContext& txCtx) { +std::pair<bool, std::vector<TIssue>> MergeLocks(const NKikimrMiniKQL::TType& type, const NKikimrMiniKQL::TValue& value, + TKqpTransactionContext& txCtx) { + + std::pair<bool, std::vector<TIssue>> res; + auto& locks = txCtx.Locks; - std::pair<bool, std::vector<TIssue>> res; - auto& locks = txCtx.Locks; - YQL_ENSURE(type.GetKind() == NKikimrMiniKQL::ETypeKind::List); auto locksListType = type.GetList(); @@ -726,25 +726,25 @@ std::pair<bool, std::vector<TIssue>> MergeLocks(const NKikimrMiniKQL::TType& typ YQL_ENSURE(lockType.GetMember(4).GetName() == "PathId"); YQL_ENSURE(lockType.GetMember(5).GetName() == "SchemeShard"); - res.first = true; + res.first = true; for (auto& lockValue : value.GetList()) { TKqpTxLock txLock(lockValue); if (auto counter = txLock.GetCounter(); counter >= NKikimr::TSysTables::TLocksTable::TLock::ErrorMin) { switch (counter) { case NKikimr::TSysTables::TLocksTable::TLock::ErrorAlreadyBroken: case NKikimr::TSysTables::TLocksTable::TLock::ErrorBroken: - res.second.emplace_back(GetLocksInvalidatedIssue(txCtx, txLock)); + res.second.emplace_back(GetLocksInvalidatedIssue(txCtx, txLock)); break; default: - res.second.emplace_back(YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_ACQUIRE_FAILURE)); + res.second.emplace_back(YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_ACQUIRE_FAILURE)); break; } - res.first = false; + res.first = false; } else if (auto curTxLock = locks.LocksMap.FindPtr(txLock.GetKey())) { if (curTxLock->Invalidated(txLock)) { - res.second.emplace_back(GetLocksInvalidatedIssue(txCtx, txLock)); - res.first = false; + res.second.emplace_back(GetLocksInvalidatedIssue(txCtx, txLock)); + res.first = false; } } else { // despite there were some errors we need to proceed merge to erase remaining locks properly @@ -755,37 +755,37 @@ std::pair<bool, std::vector<TIssue>> MergeLocks(const NKikimrMiniKQL::TType& typ return res; } -bool MergeLocks(const NKikimrMiniKQL::TType& type, const NKikimrMiniKQL::TValue& value, TKqpTransactionContext& txCtx, - TExprContext& ctx) { - auto [success, issues] = MergeLocks(type, value, txCtx); - if (!success) { - if (!txCtx.GetSnapshot().IsValid()) { - for (auto& issue : issues) { - ctx.AddError(std::move(issue)); - } - return false; - } else { - txCtx.Locks.MarkBroken(issues.back()); - if (!txCtx.DeferredEffects.Empty()) { - txCtx.Locks.ReportIssues(ctx); - return false; - } - } - } - return true; -} - -bool UnpackMergeLocks(const NKikimrMiniKQL::TResult& result, TKqpTransactionContext& txCtx, TExprContext& ctx) { +bool MergeLocks(const NKikimrMiniKQL::TType& type, const NKikimrMiniKQL::TValue& value, TKqpTransactionContext& txCtx, + TExprContext& ctx) { + auto [success, issues] = MergeLocks(type, value, txCtx); + if (!success) { + if (!txCtx.GetSnapshot().IsValid()) { + for (auto& issue : issues) { + ctx.AddError(std::move(issue)); + } + return false; + } else { + txCtx.Locks.MarkBroken(issues.back()); + if (!txCtx.DeferredEffects.Empty()) { + txCtx.Locks.ReportIssues(ctx); + return false; + } + } + } + return true; +} + +bool UnpackMergeLocks(const NKikimrMiniKQL::TResult& result, TKqpTransactionContext& txCtx, TExprContext& ctx) { auto structType = result.GetType().GetStruct(); ui32 locksIndex; bool found = GetRunResultIndex(structType, TString(NKikimr::NMiniKQL::TxLocksResultLabel2), locksIndex); - YQL_ENSURE(found ^ txCtx.Locks.Broken()); + YQL_ENSURE(found ^ txCtx.Locks.Broken()); if (found) { auto locksType = structType.GetMember(locksIndex).GetType().GetOptional().GetItem(); auto locksValue = result.GetValue().GetStruct(locksIndex).GetOptional(); - return MergeLocks(locksType, locksValue, txCtx, ctx); + return MergeLocks(locksType, locksValue, txCtx, ctx); } return false; diff --git a/ydb/core/kqp/prepare/kqp_query_finalize.cpp b/ydb/core/kqp/prepare/kqp_query_finalize.cpp index 7a5957c211..5e442780f7 100644 --- a/ydb/core/kqp/prepare/kqp_query_finalize.cpp +++ b/ydb/core/kqp/prepare/kqp_query_finalize.cpp @@ -213,7 +213,7 @@ public: result.ReportIssues(ctx.IssueManager); if (!locksOk) { - ctx.AddError(GetLocksInvalidatedIssue(TxState->Tx(), invalidatedLock)); + ctx.AddError(GetLocksInvalidatedIssue(TxState->Tx(), invalidatedLock)); } } else { result.ReportIssues(ctx.IssueManager); diff --git a/ydb/core/kqp/proxy/kqp_proxy_service.cpp b/ydb/core/kqp/proxy/kqp_proxy_service.cpp index 0c9d4efd79..decd2cb301 100644 --- a/ydb/core/kqp/proxy/kqp_proxy_service.cpp +++ b/ydb/core/kqp/proxy/kqp_proxy_service.cpp @@ -1160,9 +1160,9 @@ private: TKqpWorkerSettings workerSettings(cluster, database, TableServiceConfig, dbCounters); workerSettings.LongSession = longSession; - IActor* workerActor = AppData()->FeatureFlags.GetEnableKqpSessionActor() - ? CreateKqpSessionActor(SelfId(), sessionId, KqpSettings, workerSettings, ModuleResolverState, Counters) - : CreateKqpWorkerActor(SelfId(), sessionId, KqpSettings, workerSettings, ModuleResolverState, Counters); + IActor* workerActor = AppData()->FeatureFlags.GetEnableKqpSessionActor() + ? CreateKqpSessionActor(SelfId(), sessionId, KqpSettings, workerSettings, ModuleResolverState, Counters) + : CreateKqpWorkerActor(SelfId(), sessionId, KqpSettings, workerSettings, ModuleResolverState, Counters); auto workerId = TlsActivationContext->ExecutorThread.RegisterActor(workerActor, TMailboxType::HTSwap, AppData()->UserPoolId); TKqpSessionInfo* sessionInfo = LocalSessions.Create(sessionId, workerId, database, dbCounters); diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.cpp b/ydb/core/kqp/ut/common/kqp_ut_common.cpp index 1e4f4552cd..9bc7889f92 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.cpp +++ b/ydb/core/kqp/ut/common/kqp_ut_common.cpp @@ -289,8 +289,8 @@ void TKikimrRunner::CreateSampleTables() { )").GetValueSync()); AssertSuccessResult(session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; - + PRAGMA kikimr.UseNewEngine = "true"; + REPLACE INTO `TwoShard` (Key, Value1, Value2) VALUES (1u, "One", -1), (2u, "Two", 0), @@ -387,7 +387,7 @@ void TKikimrRunner::Initialize(const TKikimrSettings& settings) { // Server->GetRuntime()->SetLogPriority(NKikimrServices::TX_PROXY_SCHEME_CACHE, NActors::NLog::PRI_DEBUG); // Server->GetRuntime()->SetLogPriority(NKikimrServices::SCHEME_BOARD_REPLICA, NActors::NLog::PRI_DEBUG); // Server->GetRuntime()->SetLogPriority(NKikimrServices::KQP_WORKER, NActors::NLog::PRI_DEBUG); - // Server->GetRuntime()->SetLogPriority(NKikimrServices::KQP_SESSION, NActors::NLog::PRI_DEBUG); + // Server->GetRuntime()->SetLogPriority(NKikimrServices::KQP_SESSION, NActors::NLog::PRI_DEBUG); // Server->GetRuntime()->SetLogPriority(NKikimrServices::TABLET_EXECUTOR, NActors::NLog::PRI_DEBUG); // Server->GetRuntime()->SetLogPriority(NKikimrServices::KQP_SLOW_LOG, NActors::NLog::PRI_TRACE); // Server->GetRuntime()->SetLogPriority(NKikimrServices::KQP_PROXY, NActors::NLog::PRI_DEBUG); @@ -818,8 +818,8 @@ void CreateSampleTablesWithIndex(TSession& session) { UNIT_ASSERT_C(res.IsSuccess(), res.GetIssues().ToString()); auto result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; - + PRAGMA kikimr.UseNewEngine = "true"; + REPLACE INTO `KeyValue` (Key, Value) VALUES (3u, "Three"), (4u, "Four"), @@ -861,11 +861,11 @@ void WaitForKqpProxyInit(const NYdb::TDriver& driver) { NYdb::NTable::TTableClient client(driver); while (true) { - auto it = client.RetryOperationSync([=](TSession session) { - return session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; - SELECT 1; - )", + auto it = client.RetryOperationSync([=](TSession session) { + return session.ExecuteDataQuery(R"( + PRAGMA kikimr.UseNewEngine = "true"; + SELECT 1; + )", TTxControl::BeginTx().CommitTx() ).GetValueSync(); }); diff --git a/ydb/core/kqp/ut/kqp_newengine_ut.cpp b/ydb/core/kqp/ut/kqp_newengine_ut.cpp index 640b4a9afb..3737687d8d 100644 --- a/ydb/core/kqp/ut/kqp_newengine_ut.cpp +++ b/ydb/core/kqp/ut/kqp_newengine_ut.cpp @@ -7,20 +7,20 @@ namespace NKikimr::NKqp { using namespace NYdb; using namespace NYdb::NTable; -namespace { - -TKikimrRunner KikimrRunnerWithSessionActor() { - NKikimrConfig::TFeatureFlags featureFlags; - featureFlags.SetEnableKqpSessionActor(true); - - return TKikimrRunner{featureFlags}; -} - -} - +namespace { + +TKikimrRunner KikimrRunnerWithSessionActor() { + NKikimrConfig::TFeatureFlags featureFlags; + featureFlags.SetEnableKqpSessionActor(true); + + return TKikimrRunner{featureFlags}; +} + +} + Y_UNIT_TEST_SUITE(KqpNewEngine) { - void TestSimpleSelect(const TKikimrRunner& kikimr) { + void TestSimpleSelect(const TKikimrRunner& kikimr) { auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -40,15 +40,15 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { )", FormatResultSetYson(result.GetResultSet(0))); } - Y_UNIT_TEST(SimpleSelectWithSessionActor) { - return; - TestSimpleSelect(KikimrRunnerWithSessionActor()); - } - - Y_UNIT_TEST(SimpleSelect) { - TestSimpleSelect(TKikimrRunner{}); - } - + Y_UNIT_TEST(SimpleSelectWithSessionActor) { + return; + TestSimpleSelect(KikimrRunnerWithSessionActor()); + } + + Y_UNIT_TEST(SimpleSelect) { + TestSimpleSelect(TKikimrRunner{}); + } + Y_UNIT_TEST(PkSelect1) { TKikimrRunner kikimr; auto db = kikimr.GetTableClient(); @@ -344,7 +344,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { AssertSuccessResult(result); result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; + PRAGMA kikimr.UseNewEngine = "true"; SELECT * FROM [/Root/TwoShard] ORDER BY Key; )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); AssertSuccessResult(result); @@ -374,7 +374,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { AssertSuccessResult(result); result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; + PRAGMA kikimr.UseNewEngine = "true"; SELECT * FROM [/Root/TwoShard] WHERE Value2 > 10 ORDER BY Key; )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); AssertSuccessResult(result); @@ -485,7 +485,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { AssertSuccessResult(result); result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; + PRAGMA kikimr.UseNewEngine = "true"; SELECT * FROM [/Root/TwoShard] WHERE Value2 <= -10 ORDER BY Key; )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); AssertSuccessResult(result); @@ -535,7 +535,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { AssertSuccessResult(result); result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; + PRAGMA kikimr.UseNewEngine = "true"; SELECT * FROM [/Root/TwoShard] WHERE Value1 = "New" ORDER BY Key; )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); AssertSuccessResult(result); @@ -585,7 +585,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { AssertSuccessResult(result); result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; + PRAGMA kikimr.UseNewEngine = "true"; SELECT * FROM [/Root/TwoShard] WHERE Value1 = "New" ORDER BY Key; )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); AssertSuccessResult(result); @@ -892,7 +892,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { UNIT_ASSERT(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED)); result = session2.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; + PRAGMA kikimr.UseNewEngine = "true"; SELECT * FROM [/Root/TwoShard] WHERE Key <= 2; )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).GetValueSync(); AssertSuccessResult(result); @@ -1105,7 +1105,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::SUCCESS, commitResult.GetIssues().ToString()); result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; + PRAGMA kikimr.UseNewEngine = "true"; SELECT * FROM [/Root/TwoShard] ORDER BY Key; )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); @@ -1226,7 +1226,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access(0).updates().rows(), 1); result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; + PRAGMA kikimr.UseNewEngine = "true"; SELECT * FROM [/Root/TwoShard] WHERE Value2 <= -10 ORDER BY Key; )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); AssertSuccessResult(result); @@ -1427,7 +1427,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { // add nulls auto result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; + PRAGMA kikimr.UseNewEngine = "true"; REPLACE INTO `/Root/KeyValue` (Key, Value) VALUES (4u, "Four"), (NULL, "Null"); REPLACE INTO `/Root/Join2` (Key1, Key2, Name, Value2) VALUES (1, NULL, "Name Null", "Value Null"); )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); @@ -1576,7 +1576,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { UNIT_ASSERT(stats.query_phases(1).duration_us() > 0); result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; + PRAGMA kikimr.UseNewEngine = "true"; SELECT * FROM [/Root/TwoShard] ORDER BY Key; )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); @@ -1619,7 +1619,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { UNIT_ASSERT(stats.query_phases(0).duration_us() > 0); result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; + PRAGMA kikimr.UseNewEngine = "true"; SELECT * FROM [/Root/TwoShard] ORDER BY Key; )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); @@ -2080,8 +2080,8 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { AssertSuccessResult(session.ExecuteDataQuery(R"( --!syntax_v1 - PRAGMA kikimr.UseNewEngine = "true"; - + PRAGMA kikimr.UseNewEngine = "true"; + REPLACE INTO `FollowersKv` (Key, Value) VALUES (1u, "One"), (11u, "Two"), @@ -2253,8 +2253,8 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { result = session.ExecuteDataQuery(R"( --!syntax_v1 - PRAGMA kikimr.UseNewEngine = "true"; - + PRAGMA kikimr.UseNewEngine = "true"; + REPLACE INTO `/Root/TableOne` (Key, Value) VALUES (1, 1), (2, 2), @@ -2507,8 +2507,8 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; - + PRAGMA kikimr.UseNewEngine = "true"; + REPLACE INTO [/Root/table1] (key, cached) VALUES ("Key1", "CachedValue1"), ("Key2", "CachedValue2"); @@ -2516,8 +2516,8 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); result = session.ExecuteDataQuery(R"( - PRAGMA kikimr.UseNewEngine = "true"; - + PRAGMA kikimr.UseNewEngine = "true"; + REPLACE INTO [/Root/table2] ( key, in_cache, value ) VALUES diff --git a/ydb/core/kqp/ya.make b/ydb/core/kqp/ya.make index 4cb754b501..795edda64f 100644 --- a/ydb/core/kqp/ya.make +++ b/ydb/core/kqp/ya.make @@ -18,7 +18,7 @@ SRCS( kqp_impl.h kqp_response.cpp kqp_worker_actor.cpp - kqp_session_actor.cpp + kqp_session_actor.cpp kqp.h ) diff --git a/ydb/core/mind/bscontroller/bsc.cpp b/ydb/core/mind/bscontroller/bsc.cpp index 04b27244c1..779381fd51 100644 --- a/ydb/core/mind/bscontroller/bsc.cpp +++ b/ydb/core/mind/bscontroller/bsc.cpp @@ -76,13 +76,13 @@ void TBlobStorageController::TGroupInfo::CalculateGroupStatus() { auto deriveStatus = [&](const auto& failed) { auto& checker = *Topology->QuorumChecker; if (!failed.GetNumSetItems()) { // all disks of group are operational - return NKikimrBlobStorage::TGroupStatus::FULL; + return NKikimrBlobStorage::TGroupStatus::FULL; } else if (!checker.CheckFailModelForGroup(failed)) { // fail model exceeded - return NKikimrBlobStorage::TGroupStatus::DISINTEGRATED; + return NKikimrBlobStorage::TGroupStatus::DISINTEGRATED; } else if (checker.IsDegraded(failed)) { // group degraded - return NKikimrBlobStorage::TGroupStatus::DEGRADED; + return NKikimrBlobStorage::TGroupStatus::DEGRADED; } else if (failed.GetNumSetItems()) { // group partially available, but not degraded - return NKikimrBlobStorage::TGroupStatus::PARTIAL; + return NKikimrBlobStorage::TGroupStatus::PARTIAL; } else { Y_FAIL("unexpected case"); } @@ -260,7 +260,7 @@ ui32 TBlobStorageController::GetEventPriority(IEventHandle *ev) { case TEvBlobStorage::EvControllerScrubQueryStartQuantum: return 2; case TEvBlobStorage::EvControllerScrubQuantumFinished: return 2; case TEvBlobStorage::EvControllerScrubReportQuantumInProgress: return 2; - case TEvBlobStorage::EvControllerUpdateNodeDrives: return 2; + case TEvBlobStorage::EvControllerUpdateNodeDrives: return 2; // hive-related commands case TEvBlobStorage::EvControllerSelectGroups: return 4; @@ -356,11 +356,11 @@ void Out<NKikimr::NBsController::TPDiskId>(IOutputStream &str, const NKikimr::NB } template<> -void Out<NKikimr::NBsController::TPDiskLocation>(IOutputStream &str, const NKikimr::NBsController::TPDiskLocation &value) { +void Out<NKikimr::NBsController::TPDiskLocation>(IOutputStream &str, const NKikimr::NBsController::TPDiskLocation &value) { str << value.NodeId << ":" << value.Path.Quote(); -} - -template<> +} + +template<> void Out<NKikimr::NBsController::TVSlotId>(IOutputStream &str, const NKikimr::NBsController::TVSlotId &value) { str << value.ToString(); } @@ -369,7 +369,7 @@ template<> void Out<NKikimr::NBsController::TResourceNormalizedValues>(IOutputStream &str, const NKikimr::NBsController::TResourceNormalizedValues &value) { str << value.ToString(); } - + template<> void Out<NKikimr::NBsController::TResourceRawValues>(IOutputStream &str, const NKikimr::NBsController::TResourceRawValues &value) { str << value.ToString(); diff --git a/ydb/core/mind/bscontroller/cmds_drive_status.cpp b/ydb/core/mind/bscontroller/cmds_drive_status.cpp index 0b220e1eba..8abaaa5979 100644 --- a/ydb/core/mind/bscontroller/cmds_drive_status.cpp +++ b/ydb/core/mind/bscontroller/cmds_drive_status.cpp @@ -69,94 +69,94 @@ namespace NKikimr::NBsController { host->SetNodeId(pdiskId.NodeId); item->SetPath(pdiskInfo.Path); item->SetStatus(pdiskInfo.Status); - item->SetPDiskId(pdiskId.PDiskId); - item->SetSerial(pdiskInfo.ExpectedSerial); + item->SetPDiskId(pdiskId.PDiskId); + item->SetSerial(pdiskInfo.ExpectedSerial); item->SetStatusChangeTimestamp(pdiskInfo.StatusTimestamp.GetValue()); } return true; }); } - void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TAddDriveSerial& cmd, - TStatus& /*status*/) { - - const TString& newSerial = cmd.GetSerial(); - - Schema::DriveSerial::BoxId::Type boxId = cmd.GetBoxId(); + void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TAddDriveSerial& cmd, + TStatus& /*status*/) { + + const TString& newSerial = cmd.GetSerial(); + + Schema::DriveSerial::BoxId::Type boxId = cmd.GetBoxId(); const TDriveSerialInfo *driveInfo = DrivesSerials.Find(newSerial); - + if (driveInfo && driveInfo->LifeStage != NKikimrBlobStorage::TDriveLifeStage::REMOVED) { - throw TExAlready() << "Device with such serial already exists in BSC database and not in lifeStage REMOVED"; - } - - if (auto it = NodeForSerial.find(newSerial); it != NodeForSerial.end()) { - // Serial of drive is known, but drive not present in DrivesSerial - // Check is it defined in HostConfigs - TNodeId nodeId = it->second; - const TNodeInfo& nodeInfo = Nodes.Get().at(nodeId); - TString path = nodeInfo.KnownDrives.at(newSerial).Path; - - TPDiskId from = TPDiskId::MinForNode(nodeId); - TPDiskId to = TPDiskId::MaxForNode(nodeId); - std::optional<TPDiskId> updatePDiskId; - PDisks.ForEachInRange(from, to, [&](const TPDiskId& pdiskId, const TPDiskInfo& pdiskInfo) { - if (pdiskInfo.Path == path) { - updatePDiskId = pdiskId; - return false; - } - return true; - }); - if (updatePDiskId) { - // PDisk is defined through HostConfigs, but there may be fictional row in DrivesSerials - // if row is present - delete it + throw TExAlready() << "Device with such serial already exists in BSC database and not in lifeStage REMOVED"; + } + + if (auto it = NodeForSerial.find(newSerial); it != NodeForSerial.end()) { + // Serial of drive is known, but drive not present in DrivesSerial + // Check is it defined in HostConfigs + TNodeId nodeId = it->second; + const TNodeInfo& nodeInfo = Nodes.Get().at(nodeId); + TString path = nodeInfo.KnownDrives.at(newSerial).Path; + + TPDiskId from = TPDiskId::MinForNode(nodeId); + TPDiskId to = TPDiskId::MaxForNode(nodeId); + std::optional<TPDiskId> updatePDiskId; + PDisks.ForEachInRange(from, to, [&](const TPDiskId& pdiskId, const TPDiskInfo& pdiskInfo) { + if (pdiskInfo.Path == path) { + updatePDiskId = pdiskId; + return false; + } + return true; + }); + if (updatePDiskId) { + // PDisk is defined through HostConfigs, but there may be fictional row in DrivesSerials + // if row is present - delete it if (driveInfo) { DrivesSerials.DeleteExistingEntry(newSerial); driveInfo = nullptr; - } - TPDiskInfo *pdiskInfo = PDisks.FindForUpdate(*updatePDiskId); - if (pdiskInfo->ExpectedSerial == newSerial) { - throw TExAlready() << "Device with such serial already exists in BSC database and is defined through " - << "HostConfigs"; - } - pdiskInfo->ExpectedSerial = newSerial; - if (pdiskInfo->BoxId != boxId) { - throw TExError() << "Drive is defind in host configs, but placed in another box# " << pdiskInfo->BoxId; - } - STLOG(PRI_NOTICE, BS_CONTROLLER_AUDIT, BSCA06, "Set new ExpectedSerial for HostConfigs drive", - (Serial, newSerial), (BoxId, boxId), (PDiskId, *updatePDiskId), (Path, path)); - return; - } - } - - { - // Additional check, may give false negative if ExpectedSerial for pdisk is unknown - TMaybe<TPDiskId> from; - TMaybe<TPDiskId> to; - if (auto it = NodeForSerial.find(newSerial); it != NodeForSerial.end()) { - from = TPDiskId::MinForNode(it->second); - to = TPDiskId::MaxForNode(it->second); - } - - std::optional<TPDiskId> existingPDisk; - PDisks.ForEachInRange(from, to, [&](const TPDiskId& pdiskId, const TPDiskInfo& pdiskInfo) { - if (newSerial == pdiskInfo.ExpectedSerial) { - existingPDisk = pdiskId; - return false; - } - return true; - }); - if (existingPDisk) { - throw TExAlready() << "Device with such serial already exists in BSC database and is defined in HostConfigs" - << " pdiskId# " << *existingPDisk; - } - } - + } + TPDiskInfo *pdiskInfo = PDisks.FindForUpdate(*updatePDiskId); + if (pdiskInfo->ExpectedSerial == newSerial) { + throw TExAlready() << "Device with such serial already exists in BSC database and is defined through " + << "HostConfigs"; + } + pdiskInfo->ExpectedSerial = newSerial; + if (pdiskInfo->BoxId != boxId) { + throw TExError() << "Drive is defind in host configs, but placed in another box# " << pdiskInfo->BoxId; + } + STLOG(PRI_NOTICE, BS_CONTROLLER_AUDIT, BSCA06, "Set new ExpectedSerial for HostConfigs drive", + (Serial, newSerial), (BoxId, boxId), (PDiskId, *updatePDiskId), (Path, path)); + return; + } + } + + { + // Additional check, may give false negative if ExpectedSerial for pdisk is unknown + TMaybe<TPDiskId> from; + TMaybe<TPDiskId> to; + if (auto it = NodeForSerial.find(newSerial); it != NodeForSerial.end()) { + from = TPDiskId::MinForNode(it->second); + to = TPDiskId::MaxForNode(it->second); + } + + std::optional<TPDiskId> existingPDisk; + PDisks.ForEachInRange(from, to, [&](const TPDiskId& pdiskId, const TPDiskInfo& pdiskInfo) { + if (newSerial == pdiskInfo.ExpectedSerial) { + existingPDisk = pdiskId; + return false; + } + return true; + }); + if (existingPDisk) { + throw TExAlready() << "Device with such serial already exists in BSC database and is defined in HostConfigs" + << " pdiskId# " << *existingPDisk; + } + } + // delete existing entry, if any, but keep its GUID std::optional<TMaybe<Schema::DriveSerial::Guid::Type>> guid = driveInfo ? std::make_optional(driveInfo->Guid) : std::nullopt; if (driveInfo) { DrivesSerials.DeleteExistingEntry(newSerial); - } - + } + TDriveSerialInfo *driveInfoNew = DrivesSerials.ConstructInplaceNewEntry(newSerial, boxId); if (guid) { driveInfoNew->Guid = *guid; @@ -164,142 +164,142 @@ namespace NKikimr::NBsController { driveInfoNew->Kind = cmd.GetKind(); driveInfoNew->PDiskType = cmd.GetPDiskType(); - TString config; - const bool success = cmd.GetPDiskConfig().SerializeToString(&config); - Y_VERIFY(success); + TString config; + const bool success = cmd.GetPDiskConfig().SerializeToString(&config); + Y_VERIFY(success); driveInfoNew->PDiskConfig = config; - - STLOG(PRI_NOTICE, BS_CONTROLLER_AUDIT, BSCA06, "AddDriveSerial", (Serial, newSerial), (BoxId, boxId)); - } - - void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TRemoveDriveSerial& cmd, - TStatus& /*status*/) { - - const TString& serial = cmd.GetSerial(); - + + STLOG(PRI_NOTICE, BS_CONTROLLER_AUDIT, BSCA06, "AddDriveSerial", (Serial, newSerial), (BoxId, boxId)); + } + + void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TRemoveDriveSerial& cmd, + TStatus& /*status*/) { + + const TString& serial = cmd.GetSerial(); + if (const TDriveSerialInfo *driveInfo = DrivesSerials.Find(serial); !driveInfo) { - // Drive is defined in HostConfigs - // - - // Fast search (works only for online nodes) - std::optional<TNodeId> nodeId; - if (auto it = NodeForSerial.find(serial); it != NodeForSerial.end()) { - nodeId = it->second; - } else { - // Slow PDisks fullscan - PDisks.ForEach([&](const TPDiskId& pdiskId, const TPDiskInfo& pdiskInfo) { - if (pdiskInfo.ExpectedSerial == serial) { - nodeId = pdiskId.NodeId; - return false; - } - return true; - }); - } - if (!nodeId) { - throw TExError() << "Device with such serial is unknown for BSC"; - } - - TPDiskId from = TPDiskId::MinForNode(*nodeId); - TPDiskId to = TPDiskId::MaxForNode(*nodeId); - std::optional<TPDiskId> removePDiskId; - PDisks.ForEachInRange(from, to, [&](const TPDiskId& pdiskId, const TPDiskInfo& pdiskInfo) { - if (pdiskInfo.ExpectedSerial == serial) { - if (pdiskInfo.NumActiveSlots) { - throw TExError() << "There are active vdisks on that drive"; - } - if (removePDiskId) { - throw TExError() << "has two pdisks defined in HostConfigs with same serial number"; - } - removePDiskId = pdiskId; - } - return true; - }); - if (!removePDiskId) { - throw TExError() << "The serial was seen in cluster on node# " << *nodeId - << " but now there are no pdisks with the serial"; - } - auto* pdiskUpdate = PDisks.FindForUpdate(*removePDiskId); - pdiskUpdate->ExpectedSerial = {}; - STLOG(PRI_NOTICE, BS_CONTROLLER_AUDIT, BSCA08, "Reset ExpectedSerial for HostConfig drive", - (Serial, serial), (PDiskId, *removePDiskId)); - - // create fictional row in DrivesSerials to be able to reply kAlready for already removed disk - // even if they are defined through HostConfig + // Drive is defined in HostConfigs + // + + // Fast search (works only for online nodes) + std::optional<TNodeId> nodeId; + if (auto it = NodeForSerial.find(serial); it != NodeForSerial.end()) { + nodeId = it->second; + } else { + // Slow PDisks fullscan + PDisks.ForEach([&](const TPDiskId& pdiskId, const TPDiskInfo& pdiskInfo) { + if (pdiskInfo.ExpectedSerial == serial) { + nodeId = pdiskId.NodeId; + return false; + } + return true; + }); + } + if (!nodeId) { + throw TExError() << "Device with such serial is unknown for BSC"; + } + + TPDiskId from = TPDiskId::MinForNode(*nodeId); + TPDiskId to = TPDiskId::MaxForNode(*nodeId); + std::optional<TPDiskId> removePDiskId; + PDisks.ForEachInRange(from, to, [&](const TPDiskId& pdiskId, const TPDiskInfo& pdiskInfo) { + if (pdiskInfo.ExpectedSerial == serial) { + if (pdiskInfo.NumActiveSlots) { + throw TExError() << "There are active vdisks on that drive"; + } + if (removePDiskId) { + throw TExError() << "has two pdisks defined in HostConfigs with same serial number"; + } + removePDiskId = pdiskId; + } + return true; + }); + if (!removePDiskId) { + throw TExError() << "The serial was seen in cluster on node# " << *nodeId + << " but now there are no pdisks with the serial"; + } + auto* pdiskUpdate = PDisks.FindForUpdate(*removePDiskId); + pdiskUpdate->ExpectedSerial = {}; + STLOG(PRI_NOTICE, BS_CONTROLLER_AUDIT, BSCA08, "Reset ExpectedSerial for HostConfig drive", + (Serial, serial), (PDiskId, *removePDiskId)); + + // create fictional row in DrivesSerials to be able to reply kAlready for already removed disk + // even if they are defined through HostConfig TDriveSerialInfo *driveInfoNew = DrivesSerials.ConstructInplaceNewEntry(serial, pdiskUpdate->BoxId); driveInfoNew->Guid = pdiskUpdate->Guid; driveInfoNew->Kind = pdiskUpdate->Kind.Kind(); driveInfoNew->PDiskType = PDiskTypeToPDiskType(pdiskUpdate->Kind.Type()); driveInfoNew->PDiskConfig = pdiskUpdate->PDiskConfig; driveInfoNew->LifeStage = NKikimrBlobStorage::TDriveLifeStage::REMOVED; - } else { + } else { if (driveInfo->LifeStage == NKikimrBlobStorage::TDriveLifeStage::REMOVED) { - throw TExAlready() << "Drive is already removed"; - } - + throw TExAlready() << "Drive is already removed"; + } + if (driveInfo->NodeId && driveInfo->PDiskId) { TPDiskId pdiskId(*driveInfo->NodeId, *driveInfo->PDiskId); - if (auto* pdiskInfo = PDisks.Find(pdiskId)) { - if (pdiskInfo->NumActiveSlots) { - throw TExError() << "There are active vdisks on that drive"; - } else { - // PDisk will be deleted automatically in FitPDisks - } - } - } - + if (auto* pdiskInfo = PDisks.Find(pdiskId)) { + if (pdiskInfo->NumActiveSlots) { + throw TExError() << "There are active vdisks on that drive"; + } else { + // PDisk will be deleted automatically in FitPDisks + } + } + } + TDriveSerialInfo *driveInfoMutable = DrivesSerials.FindForUpdate(serial); driveInfoMutable->NodeId.Clear(); driveInfoMutable->PDiskId.Clear(); driveInfoMutable->LifeStage = NKikimrBlobStorage::TDriveLifeStage::REMOVED; - - STLOG(PRI_NOTICE, BS_CONTROLLER_AUDIT, BSCA07, "RemoveDriveSerial", (Serial, serial)); - } - } - - void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TForgetDriveSerial& cmd, - TStatus& /*status*/) { - - const TString& serial = cmd.GetSerial(); - + + STLOG(PRI_NOTICE, BS_CONTROLLER_AUDIT, BSCA07, "RemoveDriveSerial", (Serial, serial)); + } + } + + void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TForgetDriveSerial& cmd, + TStatus& /*status*/) { + + const TString& serial = cmd.GetSerial(); + if (const TDriveSerialInfo *driveInfo = DrivesSerials.Find(serial)) { switch (driveInfo->LifeStage) { - case NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN: - [[fallthrough]]; + case NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN: + [[fallthrough]]; case NKikimrBlobStorage::TDriveLifeStage::REMOVED: DrivesSerials.DeleteExistingEntry(serial); - break; - default: { - throw TExError() << "Drive not in {NOT_SEEN, REMOVED} lifestage and cannot be forgotten. Remove it first"; - break; - } - } - } else { + break; + default: { + throw TExError() << "Drive not in {NOT_SEEN, REMOVED} lifestage and cannot be forgotten. Remove it first"; + break; + } + } + } else { throw TExAlready() << "Drive is unknown for BS_CONTROLLER and cannot be forgotten"; - } - } - - void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TMigrateToSerial& cmd, - TStatus& /*status*/) { - - const NKikimrBlobStorage::TSerialManagementStage::E newStage = cmd.GetStage(); - - switch (newStage) { - case NKikimrBlobStorage::TSerialManagementStage::DISCOVER_SERIAL: - break; - case NKikimrBlobStorage::TSerialManagementStage::CHECK_SERIAL: - PDisks.ForEach([&](const TPDiskId& pdiskId, const TPDiskInfo& pdiskInfo) { - TString expected = pdiskInfo.ExpectedSerial; - if (pdiskInfo.Path && (!expected || expected != pdiskInfo.LastSeenSerial)) { - throw TExError() << "pdisk has not ExpectedSerial or ExpectedSerial not equals to LastSeenSerial" - << " pdiskId# " << pdiskId << " expected# " << expected.Quote() - << " lastSeen# " << pdiskInfo.LastSeenSerial; - } - }); - break; - default: - throw TExError() << "serial management stage is unsupported"; - } - - SerialManagementStage.Unshare() = newStage; - } + } + } + + void TBlobStorageController::TConfigState::ExecuteStep(const NKikimrBlobStorage::TMigrateToSerial& cmd, + TStatus& /*status*/) { + + const NKikimrBlobStorage::TSerialManagementStage::E newStage = cmd.GetStage(); + + switch (newStage) { + case NKikimrBlobStorage::TSerialManagementStage::DISCOVER_SERIAL: + break; + case NKikimrBlobStorage::TSerialManagementStage::CHECK_SERIAL: + PDisks.ForEach([&](const TPDiskId& pdiskId, const TPDiskInfo& pdiskInfo) { + TString expected = pdiskInfo.ExpectedSerial; + if (pdiskInfo.Path && (!expected || expected != pdiskInfo.LastSeenSerial)) { + throw TExError() << "pdisk has not ExpectedSerial or ExpectedSerial not equals to LastSeenSerial" + << " pdiskId# " << pdiskId << " expected# " << expected.Quote() + << " lastSeen# " << pdiskInfo.LastSeenSerial; + } + }); + break; + default: + throw TExError() << "serial management stage is unsupported"; + } + + SerialManagementStage.Unshare() = newStage; + } } // NKikimr::NBsController diff --git a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp index 2f2529bc4b..7a61ac078c 100644 --- a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp +++ b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp @@ -458,8 +458,8 @@ namespace NKikimr::NBsController { x->SetType(PDiskTypeToPDiskType(pdisk.Category.Type())); x->SetKind(pdisk.Category.Kind()); if (pdisk.PDiskConfig) { - bool success = x->MutablePDiskConfig()->ParseFromString(pdisk.PDiskConfig); - Y_VERIFY(success); + bool success = x->MutablePDiskConfig()->ParseFromString(pdisk.PDiskConfig); + Y_VERIFY(success); } x->SetGuid(pdisk.Guid); x->SetNumStaticSlots(pdisk.StaticSlotUsage); diff --git a/ydb/core/mind/bscontroller/config.cpp b/ydb/core/mind/bscontroller/config.cpp index 62c93d564a..aa7d6efcee 100644 --- a/ydb/core/mind/bscontroller/config.cpp +++ b/ydb/core/mind/bscontroller/config.cpp @@ -94,19 +94,19 @@ namespace NKikimr::NBsController { NKikimrBlobStorage::TNodeWardenServiceSet::TPDisk *pdisk = service.AddPDisks(); pdisk->SetNodeID(nodeId); pdisk->SetPDiskID(pdiskId); - if (pdiskInfo.Path) { - pdisk->SetPath(pdiskInfo.Path); - } else if (pdiskInfo.LastSeenPath) { - pdisk->SetPath(pdiskInfo.LastSeenPath); - } + if (pdiskInfo.Path) { + pdisk->SetPath(pdiskInfo.Path); + } else if (pdiskInfo.LastSeenPath) { + pdisk->SetPath(pdiskInfo.LastSeenPath); + } pdisk->SetPDiskGuid(pdiskInfo.Guid); - pdisk->SetPDiskCategory(pdiskInfo.Kind.GetRaw()); - pdisk->SetExpectedSerial(pdiskInfo.ExpectedSerial); - pdisk->SetManagementStage(Self->SerialManagementStage); + pdisk->SetPDiskCategory(pdiskInfo.Kind.GetRaw()); + pdisk->SetExpectedSerial(pdiskInfo.ExpectedSerial); + pdisk->SetManagementStage(Self->SerialManagementStage); if (pdiskInfo.PDiskConfig && !pdisk->MutablePDiskConfig()->ParseFromString(pdiskInfo.PDiskConfig)) { // TODO(alexvru): report this somehow } - pdisk->SetSpaceColorBorder(Self->PDiskSpaceColorBorder); + pdisk->SetSpaceColorBorder(Self->PDiskSpaceColorBorder); return pdisk; } @@ -270,10 +270,10 @@ namespace NKikimr::NBsController { if (base && overlay->second) { const TGroupInfo::TGroupStatus& prev = base->second->Status; const TGroupInfo::TGroupStatus& status = overlay->second->Status; - if (status.ExpectedStatus == NKikimrBlobStorage::TGroupStatus::DISINTEGRATED && + if (status.ExpectedStatus == NKikimrBlobStorage::TGroupStatus::DISINTEGRATED && status.ExpectedStatus != prev.ExpectedStatus) { // status did really change *errorDescription = TStringBuilder() << "GroupId# " << overlay->first - << " ExpectedStatus# DISINTEGRATED"; + << " ExpectedStatus# DISINTEGRATED"; return false; } } @@ -376,10 +376,10 @@ namespace NKikimr::NBsController { if (state.NextStoragePoolId.Changed()) { db.Table<Schema::State>().Key(true).Update<Schema::State::NextStoragePoolId>(state.NextStoragePoolId.Get()); } - if (state.SerialManagementStage.Changed()) { + if (state.SerialManagementStage.Changed()) { db.Table<Schema::State>().Key(true).Update<Schema::State::SerialManagementStage>(state.SerialManagementStage.Get()); - } - + } + CommitSelfHealUpdates(state); CommitScrubUpdates(state, txc); CommitStoragePoolStatUpdates(state); diff --git a/ydb/core/mind/bscontroller/config.h b/ydb/core/mind/bscontroller/config.h index 68a44ce828..006b0578e7 100644 --- a/ydb/core/mind/bscontroller/config.h +++ b/ydb/core/mind/bscontroller/config.h @@ -62,9 +62,9 @@ namespace NKikimr { TCowHolder<Schema::State::NextStoragePoolId::Type> NextStoragePoolId; // helper classes - using TLocationMap = THashMap<TPDiskLocation, TPDiskId>; - TLocationMap PDiskLocationMap; - TLocationMap StaticPDiskLocationMap; + using TLocationMap = THashMap<TPDiskLocation, TPDiskId>; + TLocationMap PDiskLocationMap; + TLocationMap StaticPDiskLocationMap; THostRecordMap HostRecords; @@ -95,10 +95,10 @@ namespace NKikimr { // static pdisk/vdisk states std::map<TVSlotId, TStaticVSlotInfo>& StaticVSlots; std::map<TPDiskId, TStaticPDiskInfo>& StaticPDisks; - const std::map<TString, TNodeId>& NodeForSerial; + const std::map<TString, TNodeId>& NodeForSerial; + + TCowHolder<Schema::State::SerialManagementStage::Type> SerialManagementStage; - TCowHolder<Schema::State::SerialManagementStage::Type> SerialManagementStage; - TStoragePoolStat& StoragePoolStat; public: @@ -122,8 +122,8 @@ namespace NKikimr { , DefaultMaxSlots(controller.DefaultMaxSlots) , StaticVSlots(controller.StaticVSlots) , StaticPDisks(controller.StaticPDisks) - , NodeForSerial(controller.NodeForSerial) - , SerialManagementStage(&controller.SerialManagementStage) + , NodeForSerial(controller.NodeForSerial) + , SerialManagementStage(&controller.SerialManagementStage) , StoragePoolStat(*controller.StoragePoolStat) { Y_VERIFY(HostRecords); @@ -136,14 +136,14 @@ namespace NKikimr { StoragePools.Commit(); StoragePoolGroups.Commit(); PDisks.Commit(); - DrivesSerials.Commit(); + DrivesSerials.Commit(); Nodes.Commit(); VSlots.Commit(); Groups.Commit(); IndexGroupSpeciesToGroup.Commit(); NextGroupId.Commit(); NextStoragePoolId.Commit(); - SerialManagementStage.Commit(); + SerialManagementStage.Commit(); } void Rollback() { @@ -154,8 +154,8 @@ namespace NKikimr { bool Changed() const { return HostConfigs.Changed() || Boxes.Changed() || StoragePools.Changed() || - StoragePoolGroups.Changed() || PDisks.Changed() || DrivesSerials.Changed() || Nodes.Changed() || - VSlots.Changed() || Groups.Changed() || IndexGroupSpeciesToGroup.Changed() || NextGroupId.Changed() || + StoragePoolGroups.Changed() || PDisks.Changed() || DrivesSerials.Changed() || Nodes.Changed() || + VSlots.Changed() || Groups.Changed() || IndexGroupSpeciesToGroup.Changed() || NextGroupId.Changed() || NextStoragePoolId.Changed() || SerialManagementStage.Changed(); } @@ -202,12 +202,12 @@ namespace NKikimr { void Init() { PDisks.ForEach([this](const TPDiskId& pdiskId, const TPDiskInfo& pdiskInfo) { const TNodeId &nodeId = pdiskId.NodeId; - TPDiskLocation location{nodeId, pdiskInfo.PathOrSerial()}; - PDiskLocationMap.emplace(location, pdiskId); + TPDiskLocation location{nodeId, pdiskInfo.PathOrSerial()}; + PDiskLocationMap.emplace(location, pdiskId); }); for (const auto& [pdiskId, pdisk] : StaticPDisks) { - TPDiskLocation location{pdiskId.NodeId, pdisk.Path}; - StaticPDiskLocationMap.emplace(location, pdiskId); + TPDiskLocation location{pdiskId.NodeId, pdisk.Path}; + StaticPDiskLocationMap.emplace(location, pdiskId); } } @@ -242,10 +242,10 @@ namespace NKikimr { void ExecuteStep(const NKikimrBlobStorage::TMoveGroups& cmd, TStatus& status); void ExecuteStep(const NKikimrBlobStorage::TQueryBaseConfig& cmd, TStatus& status); void ExecuteStep(const NKikimrBlobStorage::TDropDonorDisk& cmd, TStatus& status); - void ExecuteStep(const NKikimrBlobStorage::TAddDriveSerial& cmd, TStatus& status); - void ExecuteStep(const NKikimrBlobStorage::TRemoveDriveSerial& cmd, TStatus& status); - void ExecuteStep(const NKikimrBlobStorage::TForgetDriveSerial& cmd, TStatus& status); - void ExecuteStep(const NKikimrBlobStorage::TMigrateToSerial& cmd, TStatus& status); + void ExecuteStep(const NKikimrBlobStorage::TAddDriveSerial& cmd, TStatus& status); + void ExecuteStep(const NKikimrBlobStorage::TRemoveDriveSerial& cmd, TStatus& status); + void ExecuteStep(const NKikimrBlobStorage::TForgetDriveSerial& cmd, TStatus& status); + void ExecuteStep(const NKikimrBlobStorage::TMigrateToSerial& cmd, TStatus& status); }; } // NBsController diff --git a/ydb/core/mind/bscontroller/config_cmd.cpp b/ydb/core/mind/bscontroller/config_cmd.cpp index 8ca9154ac7..2fc925a9cd 100644 --- a/ydb/core/mind/bscontroller/config_cmd.cpp +++ b/ydb/core/mind/bscontroller/config_cmd.cpp @@ -122,10 +122,10 @@ namespace NKikimr::NBsController { db.Table<T>().Key(true).Update<T::MaxScrubbedDisksAtOnce>(Self->MaxScrubbedDisksAtOnce); Self->ScrubState.OnMaxScrubbedDisksAtOnceChange(); } - for (auto value : settings.GetPDiskSpaceColorBorder()) { - Self->PDiskSpaceColorBorder = static_cast<T::PDiskSpaceColorBorder::Type>(value); - db.Table<T>().Key(true).Update<T::PDiskSpaceColorBorder>(Self->PDiskSpaceColorBorder); - } + for (auto value : settings.GetPDiskSpaceColorBorder()) { + Self->PDiskSpaceColorBorder = static_cast<T::PDiskSpaceColorBorder::Type>(value); + db.Table<T>().Key(true).Update<T::PDiskSpaceColorBorder>(Self->PDiskSpaceColorBorder); + } return true; } @@ -239,7 +239,7 @@ namespace NKikimr::NBsController { const bool doLogCommand = Success && State->Changed(); Success = Success && Self->CommitConfigUpdates(*State, Cmd.GetIgnoreGroupFailModelChecks(), Cmd.GetIgnoreDegradedGroupsChecks(), txc, &Error); - + Finish(); if (doLogCommand) { LogCommand(txc, TDuration::Seconds(timer.Passed())); @@ -311,10 +311,10 @@ namespace NKikimr::NBsController { HANDLE_COMMAND(MergeBoxes, false, false) HANDLE_COMMAND(MoveGroups, false, false) HANDLE_COMMAND(DropDonorDisk, false, false) - HANDLE_COMMAND(AddDriveSerial, true, false) - HANDLE_COMMAND(RemoveDriveSerial, true, false) - HANDLE_COMMAND(ForgetDriveSerial, false, false) - HANDLE_COMMAND(MigrateToSerial, false, false) + HANDLE_COMMAND(AddDriveSerial, true, false) + HANDLE_COMMAND(RemoveDriveSerial, true, false) + HANDLE_COMMAND(ForgetDriveSerial, false, false) + HANDLE_COMMAND(MigrateToSerial, false, false) default: throw TExError() << "unsupported command"; @@ -326,7 +326,7 @@ namespace NKikimr::NBsController { state->ApplyConfigUpdates(); } TActivationContext::Send(new IEventHandle(NotifyId, Self->SelfId(), Ev.Release(), 0, Cookie)); - Self->UpdatePDisksCounters(); + Self->UpdatePDisksCounters(); } }; diff --git a/ydb/core/mind/bscontroller/config_fit_groups.cpp b/ydb/core/mind/bscontroller/config_fit_groups.cpp index 591597e86a..cb8801b0f5 100644 --- a/ydb/core/mind/bscontroller/config_fit_groups.cpp +++ b/ydb/core/mind/bscontroller/config_fit_groups.cpp @@ -201,7 +201,7 @@ namespace NKikimr { break; case NKikimrBlobStorage::EDriveStatus::FAULTY: - case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED: + case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED: // groups are moved out asynchronously break; diff --git a/ydb/core/mind/bscontroller/config_fit_pdisks.cpp b/ydb/core/mind/bscontroller/config_fit_pdisks.cpp index 72e40038bc..3112a24317 100644 --- a/ydb/core/mind/bscontroller/config_fit_pdisks.cpp +++ b/ydb/core/mind/bscontroller/config_fit_pdisks.cpp @@ -3,44 +3,44 @@ namespace NKikimr { namespace NBsController { - TPDiskId FindFirstEmptyPDiskId(const TOverlayMap<TPDiskId, TBlobStorageController::TPDiskInfo>& pdisks, - TNodeId nodeId) { - Schema::PDisk::PDiskID::Type nextPDiskID = 1000; // start allocation from this number - // generate PDisk id; skip generated one if it already exists (e.g. user has added - // such PDisk by hand) - TPDiskId pdiskId; - do { - pdiskId = TPDiskId(nodeId, nextPDiskID++); // postincrement this number - } while (pdisks.Find(pdiskId)); - - return pdiskId; - } - - Schema::PDisk::Guid::Type TBlobStorageController::CheckStaticPDisk(TConfigState &state, TPDiskId pdiskId, + TPDiskId FindFirstEmptyPDiskId(const TOverlayMap<TPDiskId, TBlobStorageController::TPDiskInfo>& pdisks, + TNodeId nodeId) { + Schema::PDisk::PDiskID::Type nextPDiskID = 1000; // start allocation from this number + // generate PDisk id; skip generated one if it already exists (e.g. user has added + // such PDisk by hand) + TPDiskId pdiskId; + do { + pdiskId = TPDiskId(nodeId, nextPDiskID++); // postincrement this number + } while (pdisks.Find(pdiskId)); + + return pdiskId; + } + + Schema::PDisk::Guid::Type TBlobStorageController::CheckStaticPDisk(TConfigState &state, TPDiskId pdiskId, const TPDiskCategory& category, const TMaybe<Schema::PDisk::PDiskConfig::Type>& pdiskConfig, ui32 *staticSlotUsage) { - const TStaticPDiskInfo& info = state.StaticPDisks.at(pdiskId); - - // create new disk entry; the PDisk with this number MUST NOT exist, otherwise we can - // have a collision - if (state.PDisks.Find(pdiskId)) { - throw TExError() << "PDisk from static config collides with dynamic one" - << " PDiskId# " << pdiskId; - } - - // validate fields - if (pdiskConfig.GetOrElse(TString()) != info.PDiskConfig) { - throw TExError() << "PDiskConfig field doesn't match static one" - << " pdiskConfig# " << (pdiskConfig ? *pdiskConfig : "(empty)") - << " info.PDiskConfig# " << info.PDiskConfig; - } else if (category != info.Category) { - throw TExError() << "Type/Kind fields do not match static one"; - } - + const TStaticPDiskInfo& info = state.StaticPDisks.at(pdiskId); + + // create new disk entry; the PDisk with this number MUST NOT exist, otherwise we can + // have a collision + if (state.PDisks.Find(pdiskId)) { + throw TExError() << "PDisk from static config collides with dynamic one" + << " PDiskId# " << pdiskId; + } + + // validate fields + if (pdiskConfig.GetOrElse(TString()) != info.PDiskConfig) { + throw TExError() << "PDiskConfig field doesn't match static one" + << " pdiskConfig# " << (pdiskConfig ? *pdiskConfig : "(empty)") + << " info.PDiskConfig# " << info.PDiskConfig; + } else if (category != info.Category) { + throw TExError() << "Type/Kind fields do not match static one"; + } + *staticSlotUsage = info.StaticSlotUsage; - return info.Guid; - } - + return info.Guid; + } + void TBlobStorageController::AllocatePDiskWithSerial(TConfigState& state, ui32 nodeId, const TSerial& serial, TDriveSerialInfo *driveInfo) { TPDiskId pdiskId = FindFirstEmptyPDiskId(state.PDisks, nodeId); @@ -144,25 +144,25 @@ namespace NKikimr { return true; }); - // Create new pdisks from DriveSerial table - - // Iterate over initial DrivesSerials map since every call to Unshare will invalidate iterators + // Create new pdisks from DriveSerial table + + // Iterate over initial DrivesSerials map since every call to Unshare will invalidate iterators state.DrivesSerials.ScanRange({}, {}, [&](const auto& serial, const auto& driveInfo, const auto& getMutableItem) { - if (driveInfo.LifeStage == NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN) { - // Try to find drive in currently online nodes and create new PDisk - if (auto nodeIt = NodeForSerial.find(serial.Serial); nodeIt != NodeForSerial.end()) { + if (driveInfo.LifeStage == NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN) { + // Try to find drive in currently online nodes and create new PDisk + if (auto nodeIt = NodeForSerial.find(serial.Serial); nodeIt != NodeForSerial.end()) { AllocatePDiskWithSerial(state, nodeIt->second, serial, getMutableItem()); - } - } else if (driveInfo.LifeStage == NKikimrBlobStorage::TDriveLifeStage::ALLOCATED - || driveInfo.LifeStage == NKikimrBlobStorage::TDriveLifeStage::ERROR) { + } + } else if (driveInfo.LifeStage == NKikimrBlobStorage::TDriveLifeStage::ALLOCATED + || driveInfo.LifeStage == NKikimrBlobStorage::TDriveLifeStage::ERROR) { const auto it = NodeForSerial.find(serial.Serial); const ui32 nodeId = it != NodeForSerial.end() ? it->second : 0; // TODO(alexvru): check where no entry in NodeForSerial is a valid case ValidatePDiskWithSerial(state, nodeId, serial, driveInfo, getMutableItem); - } + } return true; }); - + const auto &hostConfigs = state.HostConfigs.Get(); for (const auto &kvBox : state.Boxes.Get()) { const TBoxId &boxId = kvBox.first; @@ -196,7 +196,7 @@ namespace NKikimr { // update PDisk configuration if needed if (pdisk->Kind != category || pdisk->SharedWithOs != driveInfo.SharedWithOs || pdisk->ReadCentric != driveInfo.ReadCentric || pdisk->BoxId != boxId || - pdisk->PDiskConfig != driveInfo.PDiskConfig.GetOrElse(TString())) { + pdisk->PDiskConfig != driveInfo.PDiskConfig.GetOrElse(TString())) { TPDiskInfo *pdisk = state.PDisks.FindForUpdate(pdiskId); pdisk->Kind = category; pdisk->SharedWithOs = driveInfo.SharedWithOs; @@ -215,41 +215,41 @@ namespace NKikimr { pdiskId = pdiskIt->second; guid = CheckStaticPDisk(state, pdiskId, category, driveInfo.PDiskConfig, &staticSlotUsage); } else { - pdiskId = FindFirstEmptyPDiskId(state.PDisks, *nodeId); + pdiskId = FindFirstEmptyPDiskId(state.PDisks, *nodeId); guid = RandomNumber<Schema::PDisk::Guid::Type>(); } - TString path = drive.Path; - // try find current serial number for device - TString currentSerial; - if (auto nodeIt = state.Nodes.Get().find(*nodeId); nodeIt != state.Nodes.Get().end()) { - for (const auto& [serial, driveData] : nodeIt->second.KnownDrives) { - if (driveData.Path == path) { - currentSerial = serial; - break; - } - } - } - - // emplace PDisk into set - state.PDisks.ConstructInplaceNewEntry(pdiskId, hostId, path, category.GetRaw(), - guid, driveInfo.SharedWithOs, driveInfo.ReadCentric, 1000, - driveInfo.PDiskConfig.GetOrElse(TString()), boxId, DefaultMaxSlots, - NKikimrBlobStorage::EDriveStatus::ACTIVE, TInstant::Zero(), currentSerial, currentSerial, + TString path = drive.Path; + // try find current serial number for device + TString currentSerial; + if (auto nodeIt = state.Nodes.Get().find(*nodeId); nodeIt != state.Nodes.Get().end()) { + for (const auto& [serial, driveData] : nodeIt->second.KnownDrives) { + if (driveData.Path == path) { + currentSerial = serial; + break; + } + } + } + + // emplace PDisk into set + state.PDisks.ConstructInplaceNewEntry(pdiskId, hostId, path, category.GetRaw(), + guid, driveInfo.SharedWithOs, driveInfo.ReadCentric, 1000, + driveInfo.PDiskConfig.GetOrElse(TString()), boxId, DefaultMaxSlots, + NKikimrBlobStorage::EDriveStatus::ACTIVE, TInstant::Zero(), currentSerial, currentSerial, TString(), staticSlotUsage); // insert PDisk into location map state.PDiskLocationMap.emplace(location, pdiskId); - STLOG(PRI_NOTICE, BS_CONTROLLER, BSCFP02, "Create new pdisk", (PDiskId, pdiskId), - (Location, location)); + STLOG(PRI_NOTICE, BS_CONTROLLER, BSCFP02, "Create new pdisk", (PDiskId, pdiskId), + (Location, location)); } state.PDisksToRemove.erase(pdiskId); } } } - for (const auto& pdiskId : state.PDisksToRemove) { - STLOG(PRI_NOTICE, BS_CONTROLLER, BSCFP03, "PDisk to remove:", (PDiskId, pdiskId)); - } + for (const auto& pdiskId : state.PDisksToRemove) { + STLOG(PRI_NOTICE, BS_CONTROLLER, BSCFP03, "PDisk to remove:", (PDiskId, pdiskId)); + } } void TBlobStorageController::FitPDisksForNode(TConfigState& state, ui32 nodeId, const std::vector<TSerial>& serials) { diff --git a/ydb/core/mind/bscontroller/error.h b/ydb/core/mind/bscontroller/error.h index c3f7eaa0de..6cd4a22cb2 100644 --- a/ydb/core/mind/bscontroller/error.h +++ b/ydb/core/mind/bscontroller/error.h @@ -113,9 +113,9 @@ namespace NKikimr::NBsController { *this << "PDisk not found" << TErrorParams::Fqdn(hostKey.GetFqdn()) << TErrorParams::IcPort(hostKey.GetIcPort()) - << TErrorParams::NodeId(hostKey.GetNodeId()) - << TErrorParams::PDiskId(pdiskId) - << TErrorParams::Path(path); + << TErrorParams::NodeId(hostKey.GetNodeId()) + << TErrorParams::PDiskId(pdiskId) + << TErrorParams::Path(path); } NKikimrBlobStorage::TConfigResponse::TStatus::EFailReason GetFailReason() const override { @@ -199,10 +199,10 @@ namespace NKikimr::NBsController { } }; - struct TExAlready : TExError { - NKikimrBlobStorage::TConfigResponse::TStatus::EFailReason GetFailReason() const override { - return NKikimrBlobStorage::TConfigResponse::TStatus::kAlready; - } - }; - + struct TExAlready : TExError { + NKikimrBlobStorage::TConfigResponse::TStatus::EFailReason GetFailReason() const override { + return NKikimrBlobStorage::TConfigResponse::TStatus::kAlready; + } + }; + } // NKikimr::NBsController diff --git a/ydb/core/mind/bscontroller/group_mapper_ut.cpp b/ydb/core/mind/bscontroller/group_mapper_ut.cpp index be8201f4e7..0dcb5be086 100644 --- a/ydb/core/mind/bscontroller/group_mapper_ut.cpp +++ b/ydb/core/mind/bscontroller/group_mapper_ut.cpp @@ -1,8 +1,8 @@ #include <library/cpp/testing/unittest/registar.h> - -#include "group_geometry_info.h" + +#include "group_geometry_info.h" #include "group_mapper.h" -#include "ut_helpers.h" +#include "ut_helpers.h" using namespace NKikimr; using namespace NKikimr::NBsController; @@ -144,7 +144,7 @@ public: return 0; } if (!success) { - Ctest << "error# " << error << Endl; + Ctest << "error# " << error << Endl; } UNIT_ASSERT(success); TGroupRecord& record = Groups[groupId]; @@ -182,7 +182,7 @@ public: } } - Ctest << "groupId# " << groupId << " reallocating group# " << FormatGroup(group.Group) << Endl; + Ctest << "groupId# " << groupId << " reallocating group# " << FormatGroup(group.Group) << Endl; TString error; bool success = mapper.AllocateGroup(groupId, group.Group, replaced.data(), replaced.size(), std::move(forbid), @@ -191,7 +191,7 @@ public: if (requireError) { return {}; } - Ctest << "error# " << error << Endl; + Ctest << "error# " << error << Endl; } else { UNIT_ASSERT(!requireError); } @@ -321,7 +321,7 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) { TGroupMapper globalMapper(TTestContext::CreateGroupGeometry(TBlobStorageGroupType::Erasure4Plus2Block, 1, 8, 2)); globalContext.PopulateGroupMapper(globalMapper, 16); for (ui32 i = 0; i < globalContext.GetTotalDisks(); ++i) { - Ctest << i << "/" << globalContext.GetTotalDisks() << Endl; + Ctest << i << "/" << globalContext.GetTotalDisks() << Endl; TGroupMapper::TGroupDefinition group; globalContext.AllocateGroup(globalMapper, group); @@ -340,7 +340,7 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) { TGroupMapper mapper(TTestContext::CreateGroupGeometry(TBlobStorageGroupType::Erasure4Plus2Block, 1, 8, numVDisksPerFailDomain)); context.PopulateGroupMapper(mapper, 8 * numVDisksPerFailDomain); for (ui32 i = 0; i < context.GetTotalDisks(); ++i) { - Ctest << i << "/" << context.GetTotalDisks() << Endl; + Ctest << i << "/" << context.GetTotalDisks() << Endl; TGroupMapper::TGroupDefinition group; context.AllocateGroup(mapper, group); context.CheckGroupErasure(group); @@ -365,7 +365,7 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) { TGroupMapper mapper(TTestContext::CreateGroupGeometry(TBlobStorageGroupType::ErasureMirror3dc)); context.PopulateGroupMapper(mapper, 9); for (ui32 i = 0; i < context.GetTotalDisks(); ++i) { - Ctest << i << "/" << context.GetTotalDisks() << Endl; + Ctest << i << "/" << context.GetTotalDisks() << Endl; TGroupMapper::TGroupDefinition group; context.AllocateGroup(mapper, group); context.CheckGroupErasure(group); @@ -390,7 +390,7 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) { TGroupMapper mapper(TTestContext::CreateGroupGeometry(TBlobStorageGroupType::Erasure4Plus2Block)); context.PopulateGroupMapper(mapper, 8); for (ui32 i = 0; i < context.GetTotalDisks(); ++i) { - Ctest << i << "/" << context.GetTotalDisks() << Endl; + Ctest << i << "/" << context.GetTotalDisks() << Endl; TGroupMapper::TGroupDefinition group; context.AllocateGroup(mapper, group); context.CheckGroupErasure(group); @@ -420,7 +420,7 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) { TGroupMapper mapper(TTestContext::CreateGroupGeometry(TBlobStorageGroupType::Erasure4Plus2Block)); context.PopulateGroupMapper(mapper, 8); for (ui32 i = 0; i < context.GetTotalDisks(); ++i) { - Ctest << i << "/" << context.GetTotalDisks() << Endl; + Ctest << i << "/" << context.GetTotalDisks() << Endl; TGroupMapper::TGroupDefinition group; context.AllocateGroup(mapper, group); context.CheckGroupErasure(group); @@ -447,7 +447,7 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) { TGroupMapper mapper(TTestContext::CreateGroupGeometry(TBlobStorageGroupType::ErasureMirror3dc)); context.PopulateGroupMapper(mapper, 9); for (ui32 i = 0; i < context.GetTotalDisks(); ++i) { - Ctest << i << "/" << context.GetTotalDisks() << Endl; + Ctest << i << "/" << context.GetTotalDisks() << Endl; TGroupMapper::TGroupDefinition group; context.AllocateGroup(mapper, group); context.CheckGroupErasure(group); @@ -478,12 +478,12 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) { TGroupMapper::TGroupDefinition group; ui32 groupId = context.AllocateGroup(mapper, group); groupIds.push_back(groupId); - Ctest << "groupId# " << groupId << " content# " << context.FormatGroup(group) << Endl; + Ctest << "groupId# " << groupId << " content# " << context.FormatGroup(group) << Endl; context.CheckGroupErasure(group); context.ReallocateGroup(mapper, groupId, {}); } } - Ctest << "remapping disks" << Endl; + Ctest << "remapping disks" << Endl; { TGroupMapper mapper(TTestContext::CreateGroupGeometry(TBlobStorageGroupType::Erasure4Plus2Block)); TSet<TPDiskId> unusableDisks; @@ -491,7 +491,7 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) { for (const TPDiskId& pdiskId : pdisks) { if (unusableDisks.size() < 2) { if (unusableDisks.insert(pdiskId).second) { - Ctest << "making unusable disk# " << pdiskId.ToString() << Endl; + Ctest << "making unusable disk# " << pdiskId.ToString() << Endl; } } } @@ -499,7 +499,7 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) { context.PopulateGroupMapper(mapper, 10, unusableDisks); for (ui32 groupId : groupIds) { auto group = context.ReallocateGroup(mapper, groupId, unusableDisks); - Ctest << "groupId# " << groupId << " new content# " << context.FormatGroup(group) << Endl; + Ctest << "groupId# " << groupId << " new content# " << context.FormatGroup(group) << Endl; context.CheckGroupErasure(group); } } @@ -604,15 +604,15 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) { } } - Ctest << "iteration# " << k << " numBodies# " << disks.size() << " numDisks# " << numDisks << Endl; - + Ctest << "iteration# " << k << " numBodies# " << disks.size() << " numDisks# " << numDisks << Endl; + const ui32 maxSlots = 16; TTestContext context(std::move(disks)); context.IteratePDisks([&](auto&, auto& v) { v.NumSlots = rand(0, maxSlots); }); for (;;) { - Ctest << "spawning new mapper" << Endl; + Ctest << "spawning new mapper" << Endl; TGroupMapper mapper(TTestContext::CreateGroupGeometry(numDataCenters >= 3 ? TBlobStorageGroupType::ErasureMirror3dc : TBlobStorageGroupType::Erasure4Plus2Block)); diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h index 31474c21c6..ba311a254c 100644 --- a/ydb/core/mind/bscontroller/impl.h +++ b/ydb/core/mind/bscontroller/impl.h @@ -69,7 +69,7 @@ public: class TTxScrubStart; class TTxScrubQuantumFinished; class TTxUpdateLastSeenReady; - class TTxUpdateNodeDrives; + class TTxUpdateNodeDrives; class TVSlotInfo; class TPDiskInfo; @@ -293,9 +293,9 @@ public: NKikimrBlobStorage::EDriveStatus Status; TInstant StatusTimestamp; - TString ExpectedSerial; - TString LastSeenSerial; - TString LastSeenPath; + TString ExpectedSerial; + TString LastSeenSerial; + TString LastSeenPath; const ui32 StaticSlotUsage = 0; template<typename T> @@ -309,10 +309,10 @@ public: Table::NextVSlotId, Table::PDiskConfig, Table::Status, - Table::Timestamp, - Table::ExpectedSerial, - Table::LastSeenSerial, - Table::LastSeenPath + Table::Timestamp, + Table::ExpectedSerial, + Table::LastSeenSerial, + Table::LastSeenPath > adapter( &TPDiskInfo::Path, &TPDiskInfo::Kind, @@ -322,10 +322,10 @@ public: &TPDiskInfo::NextVSlotId, &TPDiskInfo::PDiskConfig, &TPDiskInfo::Status, - &TPDiskInfo::StatusTimestamp, - &TPDiskInfo::ExpectedSerial, - &TPDiskInfo::LastSeenSerial, - &TPDiskInfo::LastSeenPath + &TPDiskInfo::StatusTimestamp, + &TPDiskInfo::ExpectedSerial, + &TPDiskInfo::LastSeenSerial, + &TPDiskInfo::LastSeenPath ); callback(&adapter); } @@ -341,9 +341,9 @@ public: TBoxId boxId, ui32 defaultMaxSlots, NKikimrBlobStorage::EDriveStatus status, - TInstant statusTimestamp, - const TString& expectedSerial, - const TString& lastSeenSerial, + TInstant statusTimestamp, + const TString& expectedSerial, + const TString& lastSeenSerial, const TString& lastSeenPath, ui32 staticSlotUsage) : HostId(hostId) @@ -357,9 +357,9 @@ public: , BoxId(boxId) , Status(status) , StatusTimestamp(statusTimestamp) - , ExpectedSerial(expectedSerial) - , LastSeenSerial(lastSeenSerial) - , LastSeenPath(lastSeenPath) + , ExpectedSerial(expectedSerial) + , LastSeenSerial(lastSeenSerial) + , LastSeenPath(lastSeenPath) , StaticSlotUsage(staticSlotUsage) { ExtractConfig(defaultMaxSlots); @@ -399,16 +399,16 @@ public: Metrics.GetState() == NKikimrBlobStorage::TPDiskState::Normal); } - bool ShouldBeSettledBySelfHeal() const { - switch (Status) { - case NKikimrBlobStorage::EDriveStatus::FAULTY: - case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED: - return true; - default: - return false; - } - } - + bool ShouldBeSettledBySelfHeal() const { + switch (Status) { + case NKikimrBlobStorage::EDriveStatus::FAULTY: + case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED: + return true; + default: + return false; + } + } + bool BadInTermsOfSelfHeal() const { return ShouldBeSettledBySelfHeal() || Status == NKikimrBlobStorage::EDriveStatus::INACTIVE; } @@ -427,7 +427,7 @@ public: case NKikimrBlobStorage::EDriveStatus::INACTIVE: case NKikimrBlobStorage::EDriveStatus::SPARE: case NKikimrBlobStorage::EDriveStatus::FAULTY: - case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED: + case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED: return true; case NKikimrBlobStorage::EDriveStatus::EDriveStatus_INT_MIN_SENTINEL_DO_NOT_USE_: @@ -437,10 +437,10 @@ public: Y_FAIL("unexpected EDriveStatus"); } - TString PathOrSerial() const { - return Path ? Path : ExpectedSerial; - } - + TString PathOrSerial() const { + return Path ? Path : ExpectedSerial; + } + void OnCommit(); }; @@ -486,9 +486,9 @@ public: struct TGroupStatus { // status derived from the actual state of VDisks (IsReady() to be exact) - NKikimrBlobStorage::TGroupStatus::E OperatingStatus = NKikimrBlobStorage::TGroupStatus::UNKNOWN; + NKikimrBlobStorage::TGroupStatus::E OperatingStatus = NKikimrBlobStorage::TGroupStatus::UNKNOWN; // status derived by adding underlying PDisk status (FAULTY&BROKEN are assumed to be not working ones) - NKikimrBlobStorage::TGroupStatus::E ExpectedStatus = NKikimrBlobStorage::TGroupStatus::UNKNOWN; + NKikimrBlobStorage::TGroupStatus::E ExpectedStatus = NKikimrBlobStorage::TGroupStatus::UNKNOWN; } Status; // group status depends on the IsReady value for every VDisk; so it has to be updated every time there is possible @@ -608,20 +608,20 @@ public: return values; } - TPDiskCategory::EDeviceType GetCommonDeviceType() const { + TPDiskCategory::EDeviceType GetCommonDeviceType() const { if (VDisksInGroup) { const TPDiskCategory::EDeviceType type = VDisksInGroup.front()->PDisk->Kind.Type(); for (const TVSlotInfo *vslot : VDisksInGroup) { - if (type != vslot->PDisk->Kind.Type()) { - return TPDiskCategory::DEVICE_TYPE_UNKNOWN; - } - } - return type; - } else { - return TPDiskCategory::DEVICE_TYPE_UNKNOWN; - } - } - + if (type != vslot->PDisk->Kind.Type()) { + return TPDiskCategory::DEVICE_TYPE_UNKNOWN; + } + } + return type; + } else { + return TPDiskCategory::DEVICE_TYPE_UNKNOWN; + } + } + void FillInGroupParameters(NKikimrBlobStorage::TEvControllerSelectGroupsResult::TGroupParameters *params) const { FillInResources(params->MutableAssuredResources(), true); FillInResources(params->MutableCurrentResources(), false); @@ -732,8 +732,8 @@ public: bool IsRegistered = false; Table::NextPDiskID::Type NextPDiskID; - // in-mem only - std::map<TString, NPDisk::TDriveData> KnownDrives; + // in-mem only + std::map<TString, NPDisk::TDriveData> KnownDrives; template<typename T> static void Apply(TBlobStorageController* /*controller*/, T&& callback) { @@ -755,7 +755,7 @@ public: }; - std::map<TString, TNodeId> NodeForSerial; + std::map<TString, TNodeId> NodeForSerial; TMap<ui32, TSet<ui32>> NodesAwaitingKeysForGroup; struct THostConfigInfo { @@ -1149,82 +1149,82 @@ public: } }; - struct TSerial { - TString Serial; - - TSerial(const TString& serial) - : Serial(serial) - {} - - auto GetKey() const { - return std::tie(Serial); - } - - operator TString() const { - return Serial; - } - - friend bool operator<(const TSerial &x, const TSerial &y) { - return x.GetKey() < y.GetKey(); - } - - friend bool operator==(const TSerial &x, const TSerial &y) { - return x.GetKey() == y.GetKey(); - } - - friend bool operator!=(const TSerial &x, const TSerial &y) { - return !(x == y); - } - }; - - struct TDriveSerialInfo { - using Table = Schema::DriveSerial; - - Table::BoxId::Type BoxId; - TMaybe<Table::NodeId::Type> NodeId; - TMaybe<Table::PDiskId::Type> PDiskId; - TMaybe<Table::Guid::Type> Guid; - Table::LifeStage::Type LifeStage = NKikimrBlobStorage::TDriveLifeStage::UNKNOWN; - Table::Kind::Type Kind = 0; - Table::PDiskType::Type PDiskType = PDiskTypeToPDiskType(TPDiskCategory::DEVICE_TYPE_UNKNOWN); - TMaybe<Table::PDiskConfig::Type> PDiskConfig; - - TDriveSerialInfo() = default; - TDriveSerialInfo(const TDriveSerialInfo&) = default; - - TDriveSerialInfo(Table::BoxId::Type boxId) - : BoxId(boxId) - , LifeStage(NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN) - {} - - template<typename T> - static void Apply(TBlobStorageController* /*controller*/, T&& callback) { - static TTableAdapter<Table, TDriveSerialInfo, - Table::BoxId, - Table::NodeId, - Table::PDiskId, - Table::Guid, - Table::LifeStage, - Table::Kind, - Table::PDiskType, - Table::PDiskConfig - > adapter( - &TDriveSerialInfo::BoxId, - &TDriveSerialInfo::NodeId, - &TDriveSerialInfo::PDiskId, - &TDriveSerialInfo::Guid, - &TDriveSerialInfo::LifeStage, - &TDriveSerialInfo::Kind, - &TDriveSerialInfo::PDiskType, - &TDriveSerialInfo::PDiskConfig - ); - callback(&adapter); - } + struct TSerial { + TString Serial; + + TSerial(const TString& serial) + : Serial(serial) + {} + + auto GetKey() const { + return std::tie(Serial); + } + + operator TString() const { + return Serial; + } + + friend bool operator<(const TSerial &x, const TSerial &y) { + return x.GetKey() < y.GetKey(); + } + + friend bool operator==(const TSerial &x, const TSerial &y) { + return x.GetKey() == y.GetKey(); + } + + friend bool operator!=(const TSerial &x, const TSerial &y) { + return !(x == y); + } + }; + + struct TDriveSerialInfo { + using Table = Schema::DriveSerial; + + Table::BoxId::Type BoxId; + TMaybe<Table::NodeId::Type> NodeId; + TMaybe<Table::PDiskId::Type> PDiskId; + TMaybe<Table::Guid::Type> Guid; + Table::LifeStage::Type LifeStage = NKikimrBlobStorage::TDriveLifeStage::UNKNOWN; + Table::Kind::Type Kind = 0; + Table::PDiskType::Type PDiskType = PDiskTypeToPDiskType(TPDiskCategory::DEVICE_TYPE_UNKNOWN); + TMaybe<Table::PDiskConfig::Type> PDiskConfig; + + TDriveSerialInfo() = default; + TDriveSerialInfo(const TDriveSerialInfo&) = default; + + TDriveSerialInfo(Table::BoxId::Type boxId) + : BoxId(boxId) + , LifeStage(NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN) + {} + + template<typename T> + static void Apply(TBlobStorageController* /*controller*/, T&& callback) { + static TTableAdapter<Table, TDriveSerialInfo, + Table::BoxId, + Table::NodeId, + Table::PDiskId, + Table::Guid, + Table::LifeStage, + Table::Kind, + Table::PDiskType, + Table::PDiskConfig + > adapter( + &TDriveSerialInfo::BoxId, + &TDriveSerialInfo::NodeId, + &TDriveSerialInfo::PDiskId, + &TDriveSerialInfo::Guid, + &TDriveSerialInfo::LifeStage, + &TDriveSerialInfo::Kind, + &TDriveSerialInfo::PDiskType, + &TDriveSerialInfo::PDiskConfig + ); + callback(&adapter); + } void OnCommit() {} void OnClone(const THolder<TDriveSerialInfo>&) {} - }; - + }; + struct THostRecord { TNodeId NodeId; TNodeLocation Location; @@ -1482,9 +1482,9 @@ private: void ReadGroups(TSet<ui32>& groupIDsToRead, bool discard, TEvBlobStorage::TEvControllerNodeServiceSetUpdate *result); - void ReadPDisk(const TPDiskId& pdiskId, const TPDiskInfo& pdisk, - TEvBlobStorage::TEvControllerNodeServiceSetUpdate *result, - const NKikimrBlobStorage::EEntityStatus entityStatus); + void ReadPDisk(const TPDiskId& pdiskId, const TPDiskInfo& pdisk, + TEvBlobStorage::TEvControllerNodeServiceSetUpdate *result, + const NKikimrBlobStorage::EEntityStatus entityStatus); void ReadVSlot(const TVSlotInfo& vslot, TEvBlobStorage::TEvControllerNodeServiceSetUpdate *result); @@ -1526,7 +1526,7 @@ private: void RenderHeader(IOutputStream& out); void RenderFooter(IOutputStream& out); void RenderMonPage(IOutputStream& out); - void RenderInternalTables(IOutputStream& out, const TString& table); + void RenderInternalTables(IOutputStream& out, const TString& table); void RenderGroupDetail(IOutputStream &out, TGroupId groupId); void RenderGroupsInStoragePool(IOutputStream &out, const TBoxStoragePoolId& id); void RenderVSlotTable(IOutputStream& out, std::function<void()> callback); @@ -1552,7 +1552,7 @@ private: void Handle(TEvBlobStorage::TEvControllerSelectGroups::TPtr &ev); void Handle(TEvBlobStorage::TEvControllerUpdateDiskStatus::TPtr &ev); void Handle(TEvBlobStorage::TEvControllerUpdateGroupStat::TPtr &ev); - void Handle(TEvBlobStorage::TEvControllerUpdateNodeDrives::TPtr &ev); + void Handle(TEvBlobStorage::TEvControllerUpdateNodeDrives::TPtr &ev); void Handle(TEvControllerCommitGroupLatencies::TPtr &ev); void Handle(TEvBlobStorage::TEvRequestControllerInfo::TPtr &ev); void Handle(TEvBlobStorage::TEvControllerGroupReconfigureWipe::TPtr &ev); @@ -1643,7 +1643,7 @@ private: void Handle(TEvPrivate::TEvDropDonor::TPtr ev); - Schema::PDisk::Guid::Type CheckStaticPDisk(TConfigState &state, TPDiskId pdiskId, const TPDiskCategory& category, + Schema::PDisk::Guid::Type CheckStaticPDisk(TConfigState &state, TPDiskId pdiskId, const TPDiskCategory& category, const TMaybe<Schema::PDisk::PDiskConfig::Type>& pdiskConfig, ui32 *staticSlotUsage); void AllocatePDiskWithSerial(TConfigState& state, ui32 nodeId, const TSerial& serial, TDriveSerialInfo *driveInfo); void ValidatePDiskWithSerial(TConfigState& state, ui32 nodeId, const TSerial& serial, const TDriveSerialInfo& driveInfo, @@ -1721,7 +1721,7 @@ public: hFunc(TEvBlobStorage::TEvControllerSelectGroups, Handle); hFunc(TEvBlobStorage::TEvControllerUpdateDiskStatus, Handle); hFunc(TEvBlobStorage::TEvControllerUpdateGroupStat, Handle); - hFunc(TEvBlobStorage::TEvControllerUpdateNodeDrives, Handle); + hFunc(TEvBlobStorage::TEvControllerUpdateNodeDrives, Handle); hFunc(TEvControllerCommitGroupLatencies, Handle); hFunc(TEvBlobStorage::TEvRequestControllerInfo, Handle); hFunc(TEvBlobStorage::TEvControllerGroupReconfigureWipe, Handle); @@ -1754,7 +1754,7 @@ public: fFunc(TEvBlobStorage::EvControllerSelectGroups, EnqueueIncomingEvent); fFunc(TEvBlobStorage::EvControllerUpdateDiskStatus, EnqueueIncomingEvent); fFunc(TEvBlobStorage::EvControllerUpdateGroupStat, EnqueueIncomingEvent); - fFunc(TEvBlobStorage::EvControllerUpdateNodeDrives, EnqueueIncomingEvent); + fFunc(TEvBlobStorage::EvControllerUpdateNodeDrives, EnqueueIncomingEvent); fFunc(TEvControllerCommitGroupLatencies::EventType, EnqueueIncomingEvent); fFunc(TEvBlobStorage::EvRequestControllerInfo, EnqueueIncomingEvent); fFunc(TEvBlobStorage::EvControllerGroupReconfigureWipe, EnqueueIncomingEvent); @@ -1812,7 +1812,7 @@ public: } ValidateInternalState(); - UpdatePDisksCounters(); + UpdatePDisksCounters(); IssueInitialGroupContent(); InitializeSelfHealState(); UpdateSystemViews(); @@ -1820,21 +1820,21 @@ public: SignalTabletActive(TActivationContext::AsActorContext()); } - void UpdatePDisksCounters() { - ui32 numWithoutSlotCount = 0; - ui32 numWithoutSerial = 0; + void UpdatePDisksCounters() { + ui32 numWithoutSlotCount = 0; + ui32 numWithoutSerial = 0; for (const auto& [id, pdisk] : PDisks) { - numWithoutSlotCount += !pdisk->HasExpectedSlotCount; - numWithoutSerial += !pdisk->ExpectedSerial; - } - auto& counters = TabletCounters->Simple(); - counters[NBlobStorageController::COUNTER_PDISKS_WITHOUT_EXPECTED_SLOT_COUNT].Set(numWithoutSlotCount); - counters[NBlobStorageController::COUNTER_PDISKS_WITHOUT_EXPECTED_SERIAL].Set(numWithoutSerial); - - ui32 numNotSeen = 0; - ui32 numRemoved = 0; - ui32 numError = 0; - for (const auto& [serial, driveInfo] : DrivesSerials) { + numWithoutSlotCount += !pdisk->HasExpectedSlotCount; + numWithoutSerial += !pdisk->ExpectedSerial; + } + auto& counters = TabletCounters->Simple(); + counters[NBlobStorageController::COUNTER_PDISKS_WITHOUT_EXPECTED_SLOT_COUNT].Set(numWithoutSlotCount); + counters[NBlobStorageController::COUNTER_PDISKS_WITHOUT_EXPECTED_SERIAL].Set(numWithoutSerial); + + ui32 numNotSeen = 0; + ui32 numRemoved = 0; + ui32 numError = 0; + for (const auto& [serial, driveInfo] : DrivesSerials) { switch (driveInfo->LifeStage) { case NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN: ++numNotSeen; @@ -1847,12 +1847,12 @@ public: break; default: break; - } - } - - counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_NOT_SEEN].Set(numNotSeen); - counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_REMOVED].Set(numRemoved); - counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_ERROR].Set(numError); + } + } + + counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_NOT_SEEN].Set(numNotSeen); + counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_REMOVED].Set(numRemoved); + counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_ERROR].Set(numError); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1937,8 +1937,8 @@ public: const TString Path; const TPDiskCategory Category; const Schema::PDisk::Guid::Type Guid; - Schema::PDisk::PDiskConfig::Type PDiskConfig; - ui32 ExpectedSlotCount = 0; + Schema::PDisk::PDiskConfig::Type PDiskConfig; + ui32 ExpectedSlotCount = 0; // runtime info ui32 StaticSlotUsage = 0; @@ -1950,14 +1950,14 @@ public: , Path(pdisk.GetPath()) , Category(pdisk.GetPDiskCategory()) , Guid(pdisk.GetPDiskGuid()) - { - if (pdisk.HasPDiskConfig()) { - const auto& cfg = pdisk.GetPDiskConfig(); - bool success = cfg.SerializeToString(&PDiskConfig); - Y_VERIFY(success); - ExpectedSlotCount = cfg.GetExpectedSlotCount(); - } - } + { + if (pdisk.HasPDiskConfig()) { + const auto& cfg = pdisk.GetPDiskConfig(); + bool success = cfg.SerializeToString(&PDiskConfig); + Y_VERIFY(success); + ExpectedSlotCount = cfg.GetExpectedSlotCount(); + } + } }; std::map<TPDiskId, TStaticPDiskInfo> StaticPDisks; @@ -1972,7 +1972,7 @@ public: void OnRegisterNode(const TActorId& serverId, TNodeId nodeId); void OnWardenConnected(TNodeId nodeId); void OnWardenDisconnected(TNodeId nodeId); - void EraseKnownDrivesOnDisconnected(TNodeInfo *nodeInfo); + void EraseKnownDrivesOnDisconnected(TNodeInfo *nodeInfo); using TVSlotFinder = std::function<void(const TVSlotId&, const std::function<void(const TVSlotInfo&)>&)>; diff --git a/ydb/core/mind/bscontroller/load_everything.cpp b/ydb/core/mind/bscontroller/load_everything.cpp index fe137d006f..8fa32c53f7 100644 --- a/ydb/core/mind/bscontroller/load_everything.cpp +++ b/ydb/core/mind/bscontroller/load_everything.cpp @@ -36,7 +36,7 @@ public: auto groupStoragePool = db.Table<Schema::GroupStoragePool>().Range().Select(); auto groupLatencies = db.Table<Schema::GroupLatencies>().Select(); auto scrubState = db.Table<Schema::ScrubState>().Select(); - auto pdiskSerial = db.Table<Schema::DriveSerial>().Select(); + auto pdiskSerial = db.Table<Schema::DriveSerial>().Select(); if (!state.IsReady() || !nodes.IsReady() || !disk.IsReady() @@ -54,8 +54,8 @@ public: || !boxStoragePoolPDiskFilter.IsReady() || !groupStoragePool.IsReady() || !groupLatencies.IsReady() - || !scrubState.IsReady() - || !pdiskSerial.IsReady()) { + || !scrubState.IsReady() + || !pdiskSerial.IsReady()) { return false; } } @@ -77,12 +77,12 @@ public: Self->SelfHealEnable = state.GetValue<T::SelfHealEnable>(); Self->DonorMode = state.GetValue<T::DonorModeEnable>(); Self->ScrubPeriodicity = TDuration::Seconds(state.GetValue<T::ScrubPeriodicity>()); - Self->SerialManagementStage = state.GetValue<T::SerialManagementStage>(); + Self->SerialManagementStage = state.GetValue<T::SerialManagementStage>(); Self->PDiskSpaceMarginPromille = state.GetValue<T::PDiskSpaceMarginPromille>(); Self->GroupReserveMin = state.GetValue<T::GroupReserveMin>(); Self->GroupReservePart = state.GetValue<T::GroupReservePart>(); Self->MaxScrubbedDisksAtOnce = state.GetValue<T::MaxScrubbedDisksAtOnce>(); - Self->PDiskSpaceColorBorder = state.GetValue<T::PDiskSpaceColorBorder>(); + Self->PDiskSpaceColorBorder = state.GetValue<T::PDiskSpaceColorBorder>(); Self->SysViewChangedSettings = true; } } @@ -205,8 +205,8 @@ public: // HostConfig, Box, BoxStoragePool if (!NTableAdapter::FetchTable<Schema::HostConfig>(db, Self, Self->HostConfigs) || !NTableAdapter::FetchTable<Schema::Box>(db, Self, Self->Boxes) - || !NTableAdapter::FetchTable<Schema::BoxStoragePool>(db, Self, Self->StoragePools) - || !NTableAdapter::FetchTable<Schema::DriveSerial>(db, Self, Self->DrivesSerials)) { + || !NTableAdapter::FetchTable<Schema::BoxStoragePool>(db, Self, Self->StoragePools) + || !NTableAdapter::FetchTable<Schema::DriveSerial>(db, Self, Self->DrivesSerials)) { return false; } for (const auto& [storagePoolId, storagePool] : Self->StoragePools) { @@ -230,14 +230,14 @@ public: } } - for (const auto& [serial, info] : Self->DrivesSerials) { + for (const auto& [serial, info] : Self->DrivesSerials) { if (info->NodeId && info->PDiskId) { const auto hostId = Self->HostRecords->GetHostId(*info->NodeId); const bool inserted = driveToBox.emplace(std::make_tuple(*hostId, serial.Serial), info->BoxId).second; - Y_VERIFY(inserted, "duplicate Serial-generated drive"); - } - } - + Y_VERIFY(inserted, "duplicate Serial-generated drive"); + } + } + // PDisks Self->PDisks.clear(); { @@ -257,10 +257,10 @@ public: THostId hostId; TBoxId boxId; - TString path = disks.GetValue<T::Path>(); - TString pathOrSerial = path ? path : disks.GetValue<T::ExpectedSerial>(); - Y_VERIFY_S(pathOrSerial, "For pdiskId# " << disks.GetValue<T::PDiskID>() - << " not found neither pathOrSerial nor serial"); + TString path = disks.GetValue<T::Path>(); + TString pathOrSerial = path ? path : disks.GetValue<T::ExpectedSerial>(); + Y_VERIFY_S(pathOrSerial, "For pdiskId# " << disks.GetValue<T::PDiskID>() + << " not found neither pathOrSerial nor serial"); if (const auto& x = Self->HostRecords->GetHostId(disks.GetValue<T::NodeID>())) { hostId = *x; @@ -269,7 +269,7 @@ public: } // find the owning box - if (const auto it = driveToBox.find(std::make_tuple(hostId, pathOrSerial)); it != driveToBox.end()) { + if (const auto it = driveToBox.find(std::make_tuple(hostId, pathOrSerial)); it != driveToBox.end()) { boxId = it->second; driveToBox.erase(it); } else { @@ -281,11 +281,11 @@ public: const ui32 staticSlotUsage = it != Self->StaticPDisks.end() ? it->second.StaticSlotUsage : 0; // construct PDisk item - Self->AddPDisk(disks.GetKey(), hostId, disks.GetValue<T::Path>(), disks.GetValue<T::Category>(), - disks.GetValue<T::Guid>(), getOpt(T::SharedWithOs()), getOpt(T::ReadCentric()), - disks.GetValueOrDefault<T::NextVSlotId>(), disks.GetValue<T::PDiskConfig>(), boxId, - Self->DefaultMaxSlots, disks.GetValue<T::Status>(), disks.GetValue<T::Timestamp>(), - disks.GetValue<T::ExpectedSerial>(), disks.GetValue<T::LastSeenSerial>(), + Self->AddPDisk(disks.GetKey(), hostId, disks.GetValue<T::Path>(), disks.GetValue<T::Category>(), + disks.GetValue<T::Guid>(), getOpt(T::SharedWithOs()), getOpt(T::ReadCentric()), + disks.GetValueOrDefault<T::NextVSlotId>(), disks.GetValue<T::PDiskConfig>(), boxId, + Self->DefaultMaxSlots, disks.GetValue<T::Status>(), disks.GetValue<T::Timestamp>(), + disks.GetValue<T::ExpectedSerial>(), disks.GetValue<T::LastSeenSerial>(), disks.GetValue<T::LastSeenPath>(), staticSlotUsage); if (!disks.Next()) diff --git a/ydb/core/mind/bscontroller/monitoring.cpp b/ydb/core/mind/bscontroller/monitoring.cpp index ab5923f373..245c782cea 100644 --- a/ydb/core/mind/bscontroller/monitoring.cpp +++ b/ydb/core/mind/bscontroller/monitoring.cpp @@ -15,15 +15,15 @@ static void RenderBytesCell(IOutputStream& out, ui64 bytes) { } } -template<typename T> -static TString PrintMaybe(const TMaybe<T>& m) { - if (m) { - return TStringBuilder() << *m; - } else { - return "<<i>null</i>>"; - } -} - +template<typename T> +static TString PrintMaybe(const TMaybe<T>& m) { + if (m) { + return TStringBuilder() << *m; + } else { + return "<<i>null</i>>"; + } +} + class TBlobStorageController::TTxMonEvent_OperationLog : public TTransactionBase<TBlobStorageController> { const TActorId RespondTo; const TCgiParameters Params; @@ -112,7 +112,7 @@ public: H3() { out << "Operation Log"; } - TABLE_CLASS("table") { + TABLE_CLASS("table") { TABLEHEAD() { TABLER() { TABLEH() { out << "Index"; } @@ -402,7 +402,7 @@ public: H3() { s << "Health-related operations since " << Since.ToRfc822StringLocal(); } - TABLE_CLASS("table") { + TABLE_CLASS("table") { TABLEHEAD() { TABLER() { TABLEH() { s << "UTC ts"; } @@ -661,9 +661,9 @@ bool TBlobStorageController::OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr e RenderGroupDetail(str, groupId); } else if (page == "Scrub") { ScrubState.Render(str); - } else if (page == "InternalTables") { - const TString table = cgi.Has("table") ? cgi.Get("table") : "pdisks"; - RenderInternalTables(str, table); + } else if (page == "InternalTables") { + const TString table = cgi.Has("table") ? cgi.Get("table") : "pdisks"; + RenderInternalTables(str, table); } else if (page == "StopGivingGroups") { StopGivingGroups = true; str << "OK"; @@ -710,13 +710,13 @@ void TBlobStorageController::RenderFooter(IOutputStream& out) { void TBlobStorageController::RenderMonPage(IOutputStream& out) { RenderHeader(out); - out << "<a href='app?TabletID=" << TabletID() << "&page=OperationLog'>Operation Log</a><br>"; - out << "<a href='app?TabletID=" << TabletID() << "&page=SelfHeal'>Self Heal Status</a> (" << + out << "<a href='app?TabletID=" << TabletID() << "&page=OperationLog'>Operation Log</a><br>"; + out << "<a href='app?TabletID=" << TabletID() << "&page=SelfHeal'>Self Heal Status</a> (" << (SelfHealEnable ? "enabled" : "disabled") << ")<br>"; - out << "<a href='app?TabletID=" << TabletID() << "&page=HealthEvents'>Health events</a><br>"; - out << "<a href='app?TabletID=" << TabletID() << "&page=Scrub'>Scrub state</a><br>"; - out << "<a href='app?TabletID=" << TabletID() << "&page=InternalTables'>Internal tables</a><br>"; - + out << "<a href='app?TabletID=" << TabletID() << "&page=HealthEvents'>Health events</a><br>"; + out << "<a href='app?TabletID=" << TabletID() << "&page=Scrub'>Scrub state</a><br>"; + out << "<a href='app?TabletID=" << TabletID() << "&page=InternalTables'>Internal tables</a><br>"; + HTML(out) { DIV_CLASS("panel panel-info") { DIV_CLASS("panel-heading") { @@ -751,40 +751,40 @@ void TBlobStorageController::RenderMonPage(IOutputStream& out) { TABLED() { out << "PDisk space margin (‰)"; } TABLED() { out << PDiskSpaceMarginPromille; } } - TABLER() { - TABLED() { out << "PDisk space color border"; } - TABLED() { out << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(PDiskSpaceColorBorder); } - } + TABLER() { + TABLED() { out << "PDisk space color border"; } + TABLED() { out << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(PDiskSpaceColorBorder); } + } } } } } } - RenderFooter(out); -} - -void TBlobStorageController::RenderInternalTables(IOutputStream& out, const TString& table) { - RenderHeader(out); - - auto gen_li = [&](const TString& component) { - out << "<li" << (component == table ? " class='active'" : "") << ">"; - out << "<a href='app?TabletID=" << TabletID() << "&page=InternalTables&table=" << component << "'>"; - out << component << "</a>"; - out << "</li>"; - }; - + RenderFooter(out); +} + +void TBlobStorageController::RenderInternalTables(IOutputStream& out, const TString& table) { + RenderHeader(out); + + auto gen_li = [&](const TString& component) { + out << "<li" << (component == table ? " class='active'" : "") << ">"; + out << "<a href='app?TabletID=" << TabletID() << "&page=InternalTables&table=" << component << "'>"; + out << component << "</a>"; + out << "</li>"; + }; + out << "<ul class='nav nav-tabs'>"; - gen_li("pdisks"); - gen_li("vdisks"); - gen_li("groups"); - gen_li("boxes"); - gen_li("serials"); + gen_li("pdisks"); + gen_li("vdisks"); + gen_li("groups"); + gen_li("boxes"); + gen_li("serials"); out << "</ul>"; HTML(out) { - if (table == "pdisks") { - TABLE_CLASS("table") { + if (table == "pdisks") { + TABLE_CLASS("table") { TABLEHEAD() { TABLER() { TAG_ATTRS(TTableH, {{"title", "NodeId:PDiskId"}}) { out << "Id"; } @@ -793,12 +793,12 @@ void TBlobStorageController::RenderInternalTables(IOutputStream& out, const TStr TABLEH() { out << "Path"; } TABLEH() { out << "Guid"; } TABLEH() { out << "BoxId"; } - TABLEH() { out << "Total Size"; } + TABLEH() { out << "Total Size"; } TABLEH() { out << "Status"; } TABLEH() { out << "State"; } - TABLEH() { out << "ExpectedSerial"; } - TABLEH() { out << "LastSeenSerial"; } - TABLEH() { out << "LastSeenPath"; } + TABLEH() { out << "ExpectedSerial"; } + TABLEH() { out << "LastSeenSerial"; } + TABLEH() { out << "LastSeenPath"; } } } TABLEBODY() { @@ -819,30 +819,30 @@ void TBlobStorageController::RenderInternalTables(IOutputStream& out, const TStr out << NKikimrBlobStorage::TPDiskState::E_Name(m.GetState()); } } - TABLED() { out << pdisk->ExpectedSerial.Quote(); } - TABLED() { - TString color = pdisk->ExpectedSerial == pdisk->LastSeenSerial ? "green" : "red"; - out << "<font color='" << color << "'>" << pdisk->LastSeenSerial.Quote() << "</font>"; - } - TABLED() { out << pdisk->LastSeenPath; } + TABLED() { out << pdisk->ExpectedSerial.Quote(); } + TABLED() { + TString color = pdisk->ExpectedSerial == pdisk->LastSeenSerial ? "green" : "red"; + out << "<font color='" << color << "'>" << pdisk->LastSeenSerial.Quote() << "</font>"; + } + TABLED() { out << pdisk->LastSeenPath; } } } } } - } else if (table == "vdisks") { + } else if (table == "vdisks") { RenderVSlotTable(out, [&] { for (const auto& [key, value] : VSlots) { RenderVSlotRow(out, *value); } }); - } else if (table == "groups") { + } else if (table == "groups") { RenderGroupTable(out, [&] { for (const auto& [key, value] : GroupMap) { RenderGroupRow(out, *value); } }); - } else if (table == "boxes") { - TABLE_CLASS("table") { + } else if (table == "boxes") { + TABLE_CLASS("table") { TABLEHEAD() { TABLER() { TAG_ATTRS(TTableH, {{"colspan", "3"}}) { out << "Box attributes"; } @@ -901,33 +901,33 @@ void TBlobStorageController::RenderInternalTables(IOutputStream& out, const TStr } } } - } else if (table == "serials") { - TABLE_CLASS("table") { - TABLEHEAD() { - TABLER() { - TABLEH() { out << "Serial"; } - TAG_ATTRS(TTableH, {{"title", "NodeId:PDiskId"}}) { out << "PDisk id"; } - TABLEH() { out << "BoxId"; } - TABLEH() { out << "Guid"; } - TABLEH() { out << "LifeStage"; } - TABLEH() { out << "Kind"; } - TABLEH() { out << "PDiskType"; } - } - } - TABLEBODY() { - for (const auto& [serial, info] : DrivesSerials) { - TABLER() { - TABLED() { out << serial.Serial.Quote(); } + } else if (table == "serials") { + TABLE_CLASS("table") { + TABLEHEAD() { + TABLER() { + TABLEH() { out << "Serial"; } + TAG_ATTRS(TTableH, {{"title", "NodeId:PDiskId"}}) { out << "PDisk id"; } + TABLEH() { out << "BoxId"; } + TABLEH() { out << "Guid"; } + TABLEH() { out << "LifeStage"; } + TABLEH() { out << "Kind"; } + TABLEH() { out << "PDiskType"; } + } + } + TABLEBODY() { + for (const auto& [serial, info] : DrivesSerials) { + TABLER() { + TABLED() { out << serial.Serial.Quote(); } TABLED() { out << "(" << PrintMaybe(info->NodeId) << ":" << PrintMaybe(info->PDiskId) << ")"; } TABLED() { out << info->BoxId; } TABLED() { out << PrintMaybe(info->Guid); } TABLED() { out << info->LifeStage; } TABLED() { out << info->Kind; } TABLED() { out << info->PDiskType; } - } - } - } - } + } + } + } + } } } @@ -976,7 +976,7 @@ void TBlobStorageController::RenderGroupsInStoragePool(IOutputStream &out, const void TBlobStorageController::RenderVSlotTable(IOutputStream& out, std::function<void()> callback) { HTML(out) { - TABLE_CLASS("table") { + TABLE_CLASS("table") { TABLEHEAD() { TABLER() { TAG_ATTRS(TTableH, {{"colspan", "8"}}) { out << "VDisk attributes"; } @@ -993,10 +993,10 @@ void TBlobStorageController::RenderVSlotTable(IOutputStream& out, std::function< TABLEH() { out << "Status"; } TABLEH() { out << "IsReady"; } TABLEH() { out << "LastSeenReady"; } - - TABLEH() { out << "Data Size"; } - - TABLEH() { out << "Data Size"; } + + TABLEH() { out << "Data Size"; } + + TABLEH() { out << "Data Size"; } } } TABLEBODY() { @@ -1038,7 +1038,7 @@ void TBlobStorageController::RenderVSlotRow(IOutputStream& out, const TVSlotInfo void TBlobStorageController::RenderGroupTable(IOutputStream& out, std::function<void()> callback) { HTML(out) { - TABLE_CLASS("table") { + TABLE_CLASS("table") { TABLEHEAD() { TABLER() { TAG_ATTRS(TTableH, {{"title", "GroupId:Gen"}}) { out << "ID"; } @@ -1048,10 +1048,10 @@ void TBlobStorageController::RenderGroupTable(IOutputStream& out, std::function< TABLEH() { out << "Life cycle phase"; } TABLEH() { out << "Allocated size"; } TABLEH() { out << "Available size"; } - TAG_ATTRS(TTableH, {{"title", "Data Size"}}) { out << "Data<br/>Size"; } - TAG_ATTRS(TTableH, {{"title", "PutTabletLog Latency"}}) { out << "PutTabletLog<br/>Latency"; } - TAG_ATTRS(TTableH, {{"title", "PutUserData Latency"}}) { out << "PutUserData<br/>Latency"; } - TAG_ATTRS(TTableH, {{"title", "GetFast Latency"}}) { out << "GetFast<br/>Latency"; } + TAG_ATTRS(TTableH, {{"title", "Data Size"}}) { out << "Data<br/>Size"; } + TAG_ATTRS(TTableH, {{"title", "PutTabletLog Latency"}}) { out << "PutTabletLog<br/>Latency"; } + TAG_ATTRS(TTableH, {{"title", "PutUserData Latency"}}) { out << "PutUserData<br/>Latency"; } + TAG_ATTRS(TTableH, {{"title", "GetFast Latency"}}) { out << "GetFast<br/>Latency"; } TABLEH() { out << "Seen operational"; } TABLEH() { out << "Operating<br/>status"; } TABLEH() { out << "Expected<br/>status"; } @@ -1114,8 +1114,8 @@ void TBlobStorageController::RenderGroupRow(IOutputStream& out, const TGroupInfo TABLED() { out << (group.SeenOperational ? "YES" : ""); } const auto& status = group.Status; - TABLED() { out << NKikimrBlobStorage::TGroupStatus::E_Name(status.OperatingStatus); } - TABLED() { out << NKikimrBlobStorage::TGroupStatus::E_Name(status.ExpectedStatus); } + TABLED() { out << NKikimrBlobStorage::TGroupStatus::E_Name(status.OperatingStatus); } + TABLED() { out << NKikimrBlobStorage::TGroupStatus::E_Name(status.ExpectedStatus); } } } } diff --git a/ydb/core/mind/bscontroller/mv_object_map_ut.cpp b/ydb/core/mind/bscontroller/mv_object_map_ut.cpp index 6f0cd72d31..2d80013a1f 100644 --- a/ydb/core/mind/bscontroller/mv_object_map_ut.cpp +++ b/ydb/core/mind/bscontroller/mv_object_map_ut.cpp @@ -1,8 +1,8 @@ #include <library/cpp/testing/unittest/registar.h> -#include "mv_object_map.h" -#include "ut_helpers.h" - +#include "mv_object_map.h" +#include "ut_helpers.h" + using namespace NKikimr::NBsController; Y_UNIT_TEST_SUITE(TMultiversionObjectMap) { @@ -22,12 +22,12 @@ Y_UNIT_TEST_SUITE(TMultiversionObjectMap) { m = prev; test.BeginTx(version); - Ctest << "begin version " << version << Endl; + Ctest << "begin version " << version << Endl; for (size_t iter = 0; iter < 100; ++iter) { ui32 key = RandomNumber<ui32>(1000); ui32 value = RandomNumber<ui32>(); -// Ctest << key << " -> " << value << Endl; +// Ctest << key << " -> " << value << Endl; m[key] = value; if (ui32 *p = test.FindForUpdate(key)) { *p = value; @@ -40,7 +40,7 @@ Y_UNIT_TEST_SUITE(TMultiversionObjectMap) { if (m.size() >= 50 && RandomNumber(2u) == 0) { auto it = m.begin(); std::advance(it, RandomNumber(m.size())); -// Ctest << "delete " << it->first << Endl; +// Ctest << "delete " << it->first << Endl; test.DeleteExistingEntry(it->first); m.erase(it); } else if (!m.size() || RandomNumber(2u) == 0) { @@ -51,7 +51,7 @@ Y_UNIT_TEST_SUITE(TMultiversionObjectMap) { ui32 value = RandomNumber<ui32>(); m.emplace(key, value); test.CreateNewEntry(key, value); -// Ctest << "new " << key << " -> " << value << Endl; +// Ctest << "new " << key << " -> " << value << Endl; break; } } @@ -63,7 +63,7 @@ Y_UNIT_TEST_SUITE(TMultiversionObjectMap) { UNIT_ASSERT(readp); UNIT_ASSERT_VALUES_EQUAL(*readp, it->second); const ui32 value = RandomNumber<ui32>(); -// Ctest << "modify " << it->first << " -> " << value << Endl; +// Ctest << "modify " << it->first << " -> " << value << Endl; auto *p = test.FindForUpdate(it->first); UNIT_ASSERT(p); UNIT_ASSERT_VALUES_EQUAL(*p, it->second); @@ -84,7 +84,7 @@ Y_UNIT_TEST_SUITE(TMultiversionObjectMap) { test.FinishTx(); if (version % 2 == 0) { - Ctest << "drop version " << version << Endl; + Ctest << "drop version " << version << Endl; test.Drop(version); vm.erase(version); } @@ -92,7 +92,7 @@ Y_UNIT_TEST_SUITE(TMultiversionObjectMap) { if (vm.size() >= 3 && RandomNumber(2u) == 0) { auto it = vm.begin(); ui32 version = it->first; - Ctest << "commit version " << version << Endl; + Ctest << "commit version " << version << Endl; test.Commit(version); TMap& x = it->second; for (ui32 key = 0; key < 1000; ++key) { diff --git a/ydb/core/mind/bscontroller/register_node.cpp b/ydb/core/mind/bscontroller/register_node.cpp index 54001477e4..a54801b742 100644 --- a/ydb/core/mind/bscontroller/register_node.cpp +++ b/ydb/core/mind/bscontroller/register_node.cpp @@ -1,201 +1,201 @@ #include "impl.h" #include <ydb/core/blobstorage/base/utility.h> -#include "config.h" - +#include "config.h" + namespace NKikimr::NBsController { -class TBlobStorageController::TTxUpdateNodeDrives - : public TTransactionBase<TBlobStorageController> -{ - NKikimrBlobStorage::TEvControllerUpdateNodeDrives Record; - std::optional<TConfigState> State; - - std::unique_ptr<IEventHandle> Response; - - void UpdateDevicesInfo(TTransactionContext& txc, TEvBlobStorage::TEvControllerNodeServiceSetUpdate* result) { - - const TNodeId nodeId = Record.GetNodeId(); - - auto createLog = [&] () { - TStringStream out; - bool first = true; - out << "["; - for (const auto& data : Record.GetDrivesData()) { - out << (std::exchange(first, false) ? "" : ", ") - << "{" - << data.GetPath() << " " - << data.GetSerialNumber() << " " - << data.GetModelNumber() << " " - << TPDiskCategory::DeviceTypeStr(PDiskTypeToPDiskType(data.GetDeviceType()), true) << " " - << "}"; - } - out << "]"; - return out.Str(); - }; - STLOG(PRI_DEBUG, BS_CONTROLLER, BSCTXRN05, "Add devicesData from NodeWarden", - (NodeId, nodeId), (Devices, createLog())); - - std::map<TString, TString> serialForPath; - for (const auto& data : Record.GetDrivesData()) { - serialForPath[data.GetPath()] = data.GetSerialNumber(); - } - - NIceDb::TNiceDb db(txc.DB); - using T = Schema::PDisk; - - TPDiskId minPDiskId = TPDiskId::MinForNode(nodeId); - for (auto it = Self->PDisks.lower_bound(minPDiskId); it != Self->PDisks.end() && it->first.NodeId == nodeId; ++it) { - Y_VERIFY(it->second); - TPDiskInfo& info = *it->second; - TPDiskId pdiskId = it->first; - - const T::TKey::Type key(pdiskId.GetKey()); - TString serial; - - if (auto serialIt = serialForPath.find(info.Path); serialIt != serialForPath.end()) { - serial = serialIt->second; - if (info.ExpectedSerial != serial) { - TStringStream log; - auto prio = NLog::PRI_NOTICE; - - if (!info.ExpectedSerial) { - if (auto driveIt = Self->DrivesSerials.find(TSerial{serial}); driveIt != Self->DrivesSerials.end()) { - log << "device is managed by HostConfigs and was removed."; +class TBlobStorageController::TTxUpdateNodeDrives + : public TTransactionBase<TBlobStorageController> +{ + NKikimrBlobStorage::TEvControllerUpdateNodeDrives Record; + std::optional<TConfigState> State; + + std::unique_ptr<IEventHandle> Response; + + void UpdateDevicesInfo(TTransactionContext& txc, TEvBlobStorage::TEvControllerNodeServiceSetUpdate* result) { + + const TNodeId nodeId = Record.GetNodeId(); + + auto createLog = [&] () { + TStringStream out; + bool first = true; + out << "["; + for (const auto& data : Record.GetDrivesData()) { + out << (std::exchange(first, false) ? "" : ", ") + << "{" + << data.GetPath() << " " + << data.GetSerialNumber() << " " + << data.GetModelNumber() << " " + << TPDiskCategory::DeviceTypeStr(PDiskTypeToPDiskType(data.GetDeviceType()), true) << " " + << "}"; + } + out << "]"; + return out.Str(); + }; + STLOG(PRI_DEBUG, BS_CONTROLLER, BSCTXRN05, "Add devicesData from NodeWarden", + (NodeId, nodeId), (Devices, createLog())); + + std::map<TString, TString> serialForPath; + for (const auto& data : Record.GetDrivesData()) { + serialForPath[data.GetPath()] = data.GetSerialNumber(); + } + + NIceDb::TNiceDb db(txc.DB); + using T = Schema::PDisk; + + TPDiskId minPDiskId = TPDiskId::MinForNode(nodeId); + for (auto it = Self->PDisks.lower_bound(minPDiskId); it != Self->PDisks.end() && it->first.NodeId == nodeId; ++it) { + Y_VERIFY(it->second); + TPDiskInfo& info = *it->second; + TPDiskId pdiskId = it->first; + + const T::TKey::Type key(pdiskId.GetKey()); + TString serial; + + if (auto serialIt = serialForPath.find(info.Path); serialIt != serialForPath.end()) { + serial = serialIt->second; + if (info.ExpectedSerial != serial) { + TStringStream log; + auto prio = NLog::PRI_NOTICE; + + if (!info.ExpectedSerial) { + if (auto driveIt = Self->DrivesSerials.find(TSerial{serial}); driveIt != Self->DrivesSerials.end()) { + log << "device is managed by HostConfigs and was removed."; if (driveIt->second->LifeStage == NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN) { - log << " Drive was added while node was offline, so update ExpectedSerial and" - << " remove fictional row from DriveSerial table"; - info.ExpectedSerial = serial; - Self->DrivesSerials.erase(driveIt); - db.Table<Schema::DriveSerial>().Key(TSerial{serial}.GetKey()).Delete(); + log << " Drive was added while node was offline, so update ExpectedSerial and" + << " remove fictional row from DriveSerial table"; + info.ExpectedSerial = serial; + Self->DrivesSerials.erase(driveIt); + db.Table<Schema::DriveSerial>().Key(TSerial{serial}.GetKey()).Delete(); } else if (driveIt->second->LifeStage == NKikimrBlobStorage::TDriveLifeStage::REMOVED) { - log << " Drive is still marked as REMOVED, so do not update ExpectedSerial"; - } - } else { - // disk has not seen yet - info.ExpectedSerial = serial; - } - } else if (Self->SerialManagementStage == NKikimrBlobStorage::TSerialManagementStage::CHECK_SERIAL) { - prio = NLog::PRI_ERROR; - log << "new serial mismatched stored pdisk's serial"; - } else { - log << "Set new ExpectedSerial for pdisk"; - + log << " Drive is still marked as REMOVED, so do not update ExpectedSerial"; + } + } else { + // disk has not seen yet + info.ExpectedSerial = serial; + } + } else if (Self->SerialManagementStage == NKikimrBlobStorage::TSerialManagementStage::CHECK_SERIAL) { + prio = NLog::PRI_ERROR; + log << "new serial mismatched stored pdisk's serial"; + } else { + log << "Set new ExpectedSerial for pdisk"; + auto [it, emplaced] = Self->DrivesSerials.emplace(serial, MakeHolder<TDriveSerialInfo>(info.BoxId)); it->second->Guid = info.Guid; it->second->Kind = info.Kind.Kind(); it->second->PDiskType = PDiskTypeToPDiskType(info.Kind.Type()); it->second->PDiskConfig = info.PDiskConfig; it->second->LifeStage = NKikimrBlobStorage::TDriveLifeStage::REMOVED; - - TDriveSerialInfo::Apply(Self, [&, it = it] (auto* adapter) { + + TDriveSerialInfo::Apply(Self, [&, it = it] (auto* adapter) { adapter->IssueUpdateRow(txc, TSerial{serial}, *it->second); - }); - - info.ExpectedSerial = serial; - db.Table<T>().Key(key).Update<T::ExpectedSerial>(serial); - } - STLOG(prio, BS_CONTROLLER, BSCTXRN06, log.Str(), (PDiskId, pdiskId), (Path, info.Path), - (OldSerial, info.ExpectedSerial), (NewSerial, serial)); - } - } - if (info.LastSeenSerial != serial) { - info.LastSeenSerial = serial; - db.Table<T>().Key(key).Update<T::LastSeenSerial>(serial); - if (serial) { - Self->ReadPDisk(pdiskId, info, result, NKikimrBlobStorage::RESTART); - } - } - } - - TNodeInfo& nodeInfo = Self->GetNode(nodeId); - Self->EraseKnownDrivesOnDisconnected(&nodeInfo); - - for (const auto& data : Record.GetDrivesData()) { - const auto& serial = data.GetSerialNumber(); + }); + + info.ExpectedSerial = serial; + db.Table<T>().Key(key).Update<T::ExpectedSerial>(serial); + } + STLOG(prio, BS_CONTROLLER, BSCTXRN06, log.Str(), (PDiskId, pdiskId), (Path, info.Path), + (OldSerial, info.ExpectedSerial), (NewSerial, serial)); + } + } + if (info.LastSeenSerial != serial) { + info.LastSeenSerial = serial; + db.Table<T>().Key(key).Update<T::LastSeenSerial>(serial); + if (serial) { + Self->ReadPDisk(pdiskId, info, result, NKikimrBlobStorage::RESTART); + } + } + } + + TNodeInfo& nodeInfo = Self->GetNode(nodeId); + Self->EraseKnownDrivesOnDisconnected(&nodeInfo); + + for (const auto& data : Record.GetDrivesData()) { + const auto& serial = data.GetSerialNumber(); if (auto it = Self->NodeForSerial.find(serial); it != Self->NodeForSerial.end() && it->second != nodeId) { - STLOG(PRI_ERROR, BS_CONTROLLER, BSCTXRN03, - "Received drive from NewNodeId, but drive is reported as placed in OldNodeId", - (NewNodeId, nodeId), (OldNodeId, it->second), (Serial, serial)); - } else { - Self->NodeForSerial[serial] = nodeId; - } - auto [it, emplaced] = nodeInfo.KnownDrives.emplace(serial, data); - if (it->second.DeviceType == TPDiskCategory::DEVICE_TYPE_NVME) { - it->second.DeviceType = TPDiskCategory::DEVICE_TYPE_SSD; - } - } - } - + STLOG(PRI_ERROR, BS_CONTROLLER, BSCTXRN03, + "Received drive from NewNodeId, but drive is reported as placed in OldNodeId", + (NewNodeId, nodeId), (OldNodeId, it->second), (Serial, serial)); + } else { + Self->NodeForSerial[serial] = nodeId; + } + auto [it, emplaced] = nodeInfo.KnownDrives.emplace(serial, data); + if (it->second.DeviceType == TPDiskCategory::DEVICE_TYPE_NVME) { + it->second.DeviceType = TPDiskCategory::DEVICE_TYPE_SSD; + } + } + } + public: - TTxUpdateNodeDrives(NKikimrBlobStorage::TEvControllerUpdateNodeDrives&& rec, TBlobStorageController *controller) - : TTransactionBase(controller) - , Record(std::move(rec)) - {} - - TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_UPDATE_NODE_DRIVES; } - - bool Execute(TTransactionContext& txc, const TActorContext&) override { - const TNodeId nodeId = Record.GetNodeId(); - - auto result = std::make_unique<TEvBlobStorage::TEvControllerNodeServiceSetUpdate>(NKikimrProto::OK, nodeId); - + TTxUpdateNodeDrives(NKikimrBlobStorage::TEvControllerUpdateNodeDrives&& rec, TBlobStorageController *controller) + : TTransactionBase(controller) + , Record(std::move(rec)) + {} + + TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_UPDATE_NODE_DRIVES; } + + bool Execute(TTransactionContext& txc, const TActorContext&) override { + const TNodeId nodeId = Record.GetNodeId(); + + auto result = std::make_unique<TEvBlobStorage::TEvControllerNodeServiceSetUpdate>(NKikimrProto::OK, nodeId); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); - State->CheckConsistency(); - - UpdateDevicesInfo(txc, result.get()); - - TNodeInfo& nodeInfo = Self->GetNode(nodeId); - - std::vector<TSerial> serials; - for (const auto& data : Record.GetDrivesData()) { - serials.emplace_back(data.GetSerialNumber()); - } - - try { - Self->FitPDisksForNode(*State, nodeId, serials); - State->CheckConsistency(); - } catch (const TExError& e) { - Self->EraseKnownDrivesOnDisconnected(&nodeInfo); - STLOG(PRI_ERROR, BS_CONTROLLER, BSCTXRN04, - "Error during FitPDisks after receiving TEvControllerRegisterNode", (TExError, e.what())); - } - - result->Record.SetInstanceId(Self->InstanceId); - result->Record.SetComprehensive(false); - result->Record.SetAvailDomain(AppData()->DomainsInfo->GetDomainUidByTabletId(Self->TabletID())); - Response = std::make_unique<IEventHandle>(MakeBlobStorageNodeWardenID(nodeId), Self->SelfId(), result.release(), 0, 0); - - TString error; + State->CheckConsistency(); + + UpdateDevicesInfo(txc, result.get()); + + TNodeInfo& nodeInfo = Self->GetNode(nodeId); + + std::vector<TSerial> serials; + for (const auto& data : Record.GetDrivesData()) { + serials.emplace_back(data.GetSerialNumber()); + } + + try { + Self->FitPDisksForNode(*State, nodeId, serials); + State->CheckConsistency(); + } catch (const TExError& e) { + Self->EraseKnownDrivesOnDisconnected(&nodeInfo); + STLOG(PRI_ERROR, BS_CONTROLLER, BSCTXRN04, + "Error during FitPDisks after receiving TEvControllerRegisterNode", (TExError, e.what())); + } + + result->Record.SetInstanceId(Self->InstanceId); + result->Record.SetComprehensive(false); + result->Record.SetAvailDomain(AppData()->DomainsInfo->GetDomainUidByTabletId(Self->TabletID())); + Response = std::make_unique<IEventHandle>(MakeBlobStorageNodeWardenID(nodeId), Self->SelfId(), result.release(), 0, 0); + + TString error; if (State->Changed() && !Self->CommitConfigUpdates(*State, false, false, txc, &error)) { - State->Rollback(); - State.reset(); - } - - return true; - } - - void Complete(const TActorContext&) override { - if (State) { - // Send new TNodeWardenServiceSet to NodeWarder inside - State->ApplyConfigUpdates(); - State.reset(); - } - if (Response) { - TActivationContext::Send(Response.release()); - } - } -}; - -class TBlobStorageController::TTxRegisterNode - : public TTransactionBase<TBlobStorageController> -{ - TEvBlobStorage::TEvControllerRegisterNode::TPtr Request; - std::unique_ptr<IEventHandle> Response; - NKikimrBlobStorage::TEvControllerUpdateNodeDrives UpdateNodeDrivesRecord; - - -public: + State->Rollback(); + State.reset(); + } + + return true; + } + + void Complete(const TActorContext&) override { + if (State) { + // Send new TNodeWardenServiceSet to NodeWarder inside + State->ApplyConfigUpdates(); + State.reset(); + } + if (Response) { + TActivationContext::Send(Response.release()); + } + } +}; + +class TBlobStorageController::TTxRegisterNode + : public TTransactionBase<TBlobStorageController> +{ + TEvBlobStorage::TEvControllerRegisterNode::TPtr Request; + std::unique_ptr<IEventHandle> Response; + NKikimrBlobStorage::TEvControllerUpdateNodeDrives UpdateNodeDrivesRecord; + + +public: TTxRegisterNode(TEvBlobStorage::TEvControllerRegisterNode::TPtr& ev, TBlobStorageController *controller) : TTransactionBase(controller) , Request(ev) @@ -203,7 +203,7 @@ public: TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_REGISTER_NODE; } - bool Execute(TTransactionContext& /*txc*/, const TActorContext&) override { + bool Execute(TTransactionContext& /*txc*/, const TActorContext&) override { Self->TabletCounters->Cumulative()[NBlobStorageController::COUNTER_REGISTER_NODE_COUNT].Increment(1); TRequestCounter counter(Self->TabletCounters, NBlobStorageController::COUNTER_REGISTER_NODE_USEC); @@ -212,12 +212,12 @@ public: STLOG(PRI_DEBUG, BS_CONTROLLER, BSCTXRN01, "Handle TEvControllerRegisterNode", (Request, record)); const TNodeId nodeId = record.GetNodeID(); - UpdateNodeDrivesRecord.SetNodeId(nodeId); - - for (const auto& data : record.GetDrivesData()) { - *UpdateNodeDrivesRecord.AddDrivesData() = data; - } - + UpdateNodeDrivesRecord.SetNodeId(nodeId); + + for (const auto& data : record.GetDrivesData()) { + *UpdateNodeDrivesRecord.AddDrivesData() = data; + } + Self->OnRegisterNode(request->Recipient, nodeId); Self->ProcessVDiskStatus(record.GetVDiskStatus()); @@ -233,13 +233,13 @@ public: } } - TNodeInfo& nodeInfo = Self->GetNode(nodeId); - + TNodeInfo& nodeInfo = Self->GetNode(nodeId); + auto res = std::make_unique<TEvBlobStorage::TEvControllerNodeServiceSetUpdate>(NKikimrProto::OK, nodeId); - TSet<ui32> groupIDsToRead; - const TPDiskId minPDiskId(TPDiskId::MinForNode(nodeId)); - const TVSlotId vslotId = TVSlotId::MinForPDisk(minPDiskId); + TSet<ui32> groupIDsToRead; + const TPDiskId minPDiskId(TPDiskId::MinForNode(nodeId)); + const TVSlotId vslotId = TVSlotId::MinForPDisk(minPDiskId); for (auto it = Self->VSlots.lower_bound(vslotId); it != Self->VSlots.end() && it->first.NodeId == nodeId; ++it) { Self->ReadVSlot(*it->second, res.get()); if (!it->second->IsBeingDeleted()) { @@ -288,18 +288,18 @@ public: Self->ReadGroups(groupsToDiscard, true, res.get()); - nodeInfo.IsRegistered = true; + nodeInfo.IsRegistered = true; + + for (auto it = Self->PDisks.lower_bound(minPDiskId); it != Self->PDisks.end() && it->first.NodeId == nodeId; ++it) { + Self->ReadPDisk(it->first, *it->second, res.get(), NKikimrBlobStorage::INITIAL); + } - for (auto it = Self->PDisks.lower_bound(minPDiskId); it != Self->PDisks.end() && it->first.NodeId == nodeId; ++it) { - Self->ReadPDisk(it->first, *it->second, res.get(), NKikimrBlobStorage::INITIAL); - } - res->Record.SetInstanceId(Self->InstanceId); res->Record.SetComprehensive(true); res->Record.SetAvailDomain(AppData()->DomainsInfo->GetDomainUidByTabletId(Self->TabletID())); Response = std::make_unique<IEventHandle>(request->Sender, Self->SelfId(), res.release(), 0, request->Cookie); - + return true; } @@ -341,7 +341,7 @@ void TBlobStorageController::ReadGroups(TSet<ui32>& groupIDsToRead, bool discard } void TBlobStorageController::ReadPDisk(const TPDiskId& pdiskId, const TPDiskInfo& pdisk, - TEvBlobStorage::TEvControllerNodeServiceSetUpdate *result, const NKikimrBlobStorage::EEntityStatus entityStatus) { + TEvBlobStorage::TEvControllerNodeServiceSetUpdate *result, const NKikimrBlobStorage::EEntityStatus entityStatus) { NKikimrBlobStorage::TNodeWardenServiceSet *serviceSet = result->Record.MutableServiceSet(); NKikimrBlobStorage::TNodeWardenServiceSet::TPDisk *pDisk = serviceSet->AddPDisks(); if (const auto it = StaticPDiskMap.find(pdiskId); it != StaticPDiskMap.end()) { @@ -349,11 +349,11 @@ void TBlobStorageController::ReadPDisk(const TPDiskId& pdiskId, const TPDiskInfo } else { pDisk->SetNodeID(pdiskId.NodeId); pDisk->SetPDiskID(pdiskId.PDiskId); - if (pdisk.Path) { - pDisk->SetPath(pdisk.Path); - } else if (pdisk.LastSeenPath) { - pDisk->SetPath(pdisk.LastSeenPath); - } + if (pdisk.Path) { + pDisk->SetPath(pdisk.Path); + } else if (pdisk.LastSeenPath) { + pDisk->SetPath(pdisk.LastSeenPath); + } pDisk->SetPDiskCategory(pdisk.Kind.GetRaw()); pDisk->SetPDiskGuid(pdisk.Guid); if (pdisk.PDiskConfig && !pDisk->MutablePDiskConfig()->ParseFromString(pdisk.PDiskConfig)) { @@ -361,10 +361,10 @@ void TBlobStorageController::ReadPDisk(const TPDiskId& pdiskId, const TPDiskInfo (PDiskId, pdiskId.PDiskId)); } } - pDisk->SetExpectedSerial(pdisk.ExpectedSerial); - pDisk->SetManagementStage(SerialManagementStage); - pDisk->SetSpaceColorBorder(PDiskSpaceColorBorder); - pDisk->SetEntityStatus(entityStatus); + pDisk->SetExpectedSerial(pdisk.ExpectedSerial); + pDisk->SetManagementStage(SerialManagementStage); + pDisk->SetSpaceColorBorder(PDiskSpaceColorBorder); + pDisk->SetEntityStatus(entityStatus); } void TBlobStorageController::ReadVSlot(const TVSlotInfo& vslot, TEvBlobStorage::TEvControllerNodeServiceSetUpdate *result) { @@ -395,10 +395,10 @@ void TBlobStorageController::Handle(TEvBlobStorage::TEvControllerRegisterNode::T Execute(new TTxRegisterNode(ev, this)); } -void TBlobStorageController::Handle(TEvBlobStorage::TEvControllerUpdateNodeDrives::TPtr& ev) { +void TBlobStorageController::Handle(TEvBlobStorage::TEvControllerUpdateNodeDrives::TPtr& ev) { Execute(new TTxUpdateNodeDrives(std::move(ev->Get()->Record), this)); -} - +} + void TBlobStorageController::Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev) { auto&& [it, inserted] = PipeServerToNode.emplace(ev->Get()->ServerId, std::nullopt); Y_VERIFY_DEBUG(inserted); @@ -466,17 +466,17 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId) { ScrubState.OnNodeDisconnected(nodeId); TNodeInfo& node = GetNode(nodeId); node.IsRegistered = false; - EraseKnownDrivesOnDisconnected(&node); + EraseKnownDrivesOnDisconnected(&node); if (!lastSeenReadyQ.empty()) { Execute(CreateTxUpdateLastSeenReady(std::move(lastSeenReadyQ))); } } -void TBlobStorageController::EraseKnownDrivesOnDisconnected(TNodeInfo *nodeInfo) { - for (const auto& [serial, driveData] : nodeInfo->KnownDrives) { - NodeForSerial.erase(serial); - } - nodeInfo->KnownDrives.clear(); -} - +void TBlobStorageController::EraseKnownDrivesOnDisconnected(TNodeInfo *nodeInfo) { + for (const auto& [serial, driveData] : nodeInfo->KnownDrives) { + NodeForSerial.erase(serial); + } + nodeInfo->KnownDrives.clear(); +} + } // NKikimr::NBsController diff --git a/ydb/core/mind/bscontroller/scheme.h b/ydb/core/mind/bscontroller/scheme.h index e266b833cd..3b7a425c95 100644 --- a/ydb/core/mind/bscontroller/scheme.h +++ b/ydb/core/mind/bscontroller/scheme.h @@ -25,7 +25,7 @@ struct Schema : NIceDb::Schema { struct NodeID : Column<1, Node::ID::ColumnType> {}; // PK struct PDiskID : Column<2, Node::NextPDiskID::ColumnType> {}; // PK struct Path : Column<3, NScheme::NTypeIds::Utf8> {}; - struct Category : Column<4, NScheme::NTypeIds::Uint64> { using Type = TPDiskCategory;}; + struct Category : Column<4, NScheme::NTypeIds::Uint64> { using Type = TPDiskCategory;}; //struct SystemConfig : Column<5, NScheme::NTypeIds::String> {}; //struct PhysicalLocation : Column<6, NScheme::NTypeIds::String> {}; struct Guid : Column<7, NScheme::NTypeIds::Uint64> {}; @@ -35,13 +35,13 @@ struct Schema : NIceDb::Schema { struct PDiskConfig : Column<11, NScheme::NTypeIds::String> {}; struct Status : Column<12, NScheme::NTypeIds::Uint32> { using Type = NKikimrBlobStorage::EDriveStatus; }; struct Timestamp : Column<13, NScheme::NTypeIds::Uint64> { using Type = TInstant; }; - struct ExpectedSerial : Column<14, NScheme::NTypeIds::String> {}; - struct LastSeenSerial : Column<15, NScheme::NTypeIds::String> {}; - struct LastSeenPath : Column<16, NScheme::NTypeIds::String> {}; + struct ExpectedSerial : Column<14, NScheme::NTypeIds::String> {}; + struct LastSeenSerial : Column<15, NScheme::NTypeIds::String> {}; + struct LastSeenPath : Column<16, NScheme::NTypeIds::String> {}; using TKey = TableKey<NodeID, PDiskID>; // order is important using TColumns = TableColumns<NodeID, PDiskID, Path, Category, Guid, SharedWithOs, ReadCentric, NextVSlotId, - Status, Timestamp, PDiskConfig, ExpectedSerial, LastSeenSerial, LastSeenPath>; + Status, Timestamp, PDiskConfig, ExpectedSerial, LastSeenSerial, LastSeenPath>; }; struct Group : Table<4> { @@ -49,7 +49,7 @@ struct Schema : NIceDb::Schema { struct Generation : Column<2, NScheme::NTypeIds::Uint32> {}; struct ErasureSpecies : Column<3, NScheme::NTypeIds::Uint32> { using Type = TErasureType::EErasureSpecies; }; struct Owner : Column<4, NScheme::NTypeIds::Uint64> {}; - struct DesiredPDiskCategory : Column<5, NScheme::NTypeIds::Uint64> { using Type = TPDiskCategory; }; + struct DesiredPDiskCategory : Column<5, NScheme::NTypeIds::Uint64> { using Type = TPDiskCategory; }; struct DesiredVDiskCategory : Column<6, NScheme::NTypeIds::Uint64> { using Type = NKikimrBlobStorage::TVDiskKind::EVDiskKind; }; struct EncryptionMode : Column<7, NScheme::NTypeIds::Uint32> { static constexpr Type Default = 0; }; struct LifeCyclePhase : Column<8, NScheme::NTypeIds::Uint32> { static constexpr Type Default = 0; }; @@ -77,18 +77,18 @@ struct Schema : NIceDb::Schema { struct SelfHealEnable : Column<8, NScheme::NTypeIds::Bool> { static constexpr Type Default = false; }; struct DonorModeEnable : Column<9, NScheme::NTypeIds::Bool> { static constexpr Type Default = true; }; struct ScrubPeriodicity : Column<10, NScheme::NTypeIds::Uint32> { static constexpr Type Default = 86400 * 30; }; - struct SerialManagementStage : Column<11, NScheme::NTypeIds::Uint32> { using Type = NKikimrBlobStorage::TSerialManagementStage::E; }; + struct SerialManagementStage : Column<11, NScheme::NTypeIds::Uint32> { using Type = NKikimrBlobStorage::TSerialManagementStage::E; }; struct NextStoragePoolId : Column<12, NScheme::NTypeIds::Uint64> { static constexpr Type Default = 0; }; struct PDiskSpaceMarginPromille : Column<13, NScheme::NTypeIds::Uint32> { static constexpr Type Default = 150; }; // 15% default margin (85% max usage) struct GroupReserveMin : Column<14, NScheme::NTypeIds::Uint32> { static constexpr Type Default = 0; }; struct GroupReservePart : Column<15, NScheme::NTypeIds::Uint32> { static constexpr Type Default = 0; }; // parts per million struct MaxScrubbedDisksAtOnce : Column<16, NScheme::NTypeIds::Uint32> { static constexpr Type Default = Max<ui32>(); }; // no limit - struct PDiskSpaceColorBorder : Column<17, NScheme::NTypeIds::Uint32> { using Type = NKikimrBlobStorage::TPDiskSpaceColor::E; static constexpr Type Default = NKikimrBlobStorage::TPDiskSpaceColor::GREEN; }; + struct PDiskSpaceColorBorder : Column<17, NScheme::NTypeIds::Uint32> { using Type = NKikimrBlobStorage::TPDiskSpaceColor::E; static constexpr Type Default = NKikimrBlobStorage::TPDiskSpaceColor::GREEN; }; using TKey = TableKey<FixedKey>; using TColumns = TableColumns<FixedKey, NextGroupID, SchemaVersion, NextOperationLogIndex, DefaultMaxSlots, InstanceId, SelfHealEnable, DonorModeEnable, ScrubPeriodicity, SerialManagementStage, NextStoragePoolId, - PDiskSpaceMarginPromille, GroupReserveMin, GroupReservePart, MaxScrubbedDisksAtOnce, PDiskSpaceColorBorder>; + PDiskSpaceMarginPromille, GroupReserveMin, GroupReservePart, MaxScrubbedDisksAtOnce, PDiskSpaceColorBorder>; }; struct VSlot : Table<5> { @@ -354,21 +354,21 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns<NodeId, PDiskId, VSlotId, State, ScrubCycleStartTime, ScrubCycleFinishTime, Success>; }; - struct DriveSerial : Table<129> { - struct Serial : Column<1, NScheme::NTypeIds::String> {}; // PK - struct BoxId : Column<2, Box::BoxId::ColumnType> {}; - struct NodeId : Column<3, Node::ID::ColumnType> {}; // FK PDisk.NodeID - struct PDiskId : Column<4, Node::NextPDiskID::ColumnType> {}; // FK PDisk.PDiskID - struct Guid : Column<5, PDisk::Guid::ColumnType> {}; // Check-only column for PDisk.Guid - struct LifeStage : Column<6, NScheme::NTypeIds::Uint32> { using Type = NKikimrBlobStorage::TDriveLifeStage::E; }; - struct Kind : Column<7, HostConfigDrive::Kind::ColumnType> {}; - struct PDiskType : Column<8, HostConfigDrive::TypeCol::ColumnType> { using Type = NKikimrBlobStorage::EPDiskType; }; - struct PDiskConfig : Column<9, NScheme::NTypeIds::String> {}; - - using TKey = TableKey<Serial>; - using TColumns = TableColumns<Serial, BoxId, NodeId, PDiskId, Guid, LifeStage, Kind, PDiskType, PDiskConfig>; - }; - + struct DriveSerial : Table<129> { + struct Serial : Column<1, NScheme::NTypeIds::String> {}; // PK + struct BoxId : Column<2, Box::BoxId::ColumnType> {}; + struct NodeId : Column<3, Node::ID::ColumnType> {}; // FK PDisk.NodeID + struct PDiskId : Column<4, Node::NextPDiskID::ColumnType> {}; // FK PDisk.PDiskID + struct Guid : Column<5, PDisk::Guid::ColumnType> {}; // Check-only column for PDisk.Guid + struct LifeStage : Column<6, NScheme::NTypeIds::Uint32> { using Type = NKikimrBlobStorage::TDriveLifeStage::E; }; + struct Kind : Column<7, HostConfigDrive::Kind::ColumnType> {}; + struct PDiskType : Column<8, HostConfigDrive::TypeCol::ColumnType> { using Type = NKikimrBlobStorage::EPDiskType; }; + struct PDiskConfig : Column<9, NScheme::NTypeIds::String> {}; + + using TKey = TableKey<Serial>; + using TColumns = TableColumns<Serial, BoxId, NodeId, PDiskId, Guid, LifeStage, Kind, PDiskType, PDiskConfig>; + }; + using TTables = SchemaTables< Node, PDisk, @@ -390,8 +390,8 @@ struct Schema : NIceDb::Schema { OperationLog, MigrationPlan, MigrationEntry, - ScrubState, - DriveSerial + ScrubState, + DriveSerial >; using TSettings = SchemaSettings< diff --git a/ydb/core/mind/bscontroller/self_heal.cpp b/ydb/core/mind/bscontroller/self_heal.cpp index d88afd5191..b732d98e93 100644 --- a/ydb/core/mind/bscontroller/self_heal.cpp +++ b/ydb/core/mind/bscontroller/self_heal.cpp @@ -635,60 +635,60 @@ namespace NKikimr::NBsController { const TInstant now = TActivationContext::Now(); bool reschedule = false; - auto updateDiskCounters = [&]( - NKikimrBlobStorage::EDriveStatus status, - NBlobStorageController::EPercentileCounters histCounter, - NBlobStorageController::ESimpleCounters groups, - NBlobStorageController::ESimpleCounters slots, - NBlobStorageController::ESimpleCounters bytes) { - - // build histogram of PDisks in faulty state with VSlots over 'em - auto& histo = TabletCounters->Percentile()[histCounter]; - histo.Clear(); - const auto& ranges = histo.GetRanges(); // a sorted vector of ranges - for (const auto& [pdiskId, pdisk] : PDisks) { - if (pdisk->Status == status && pdisk->NumActiveSlots) { - const ui64 passed = (now - pdisk->StatusTimestamp).Seconds(); - auto comp = [](const ui64 value, const auto& range) { return value < range.RangeVal; }; - const size_t idx = std::upper_bound(ranges.begin(), ranges.end(), passed, comp) - ranges.begin() - 1; - histo.IncrementForRange(idx); - reschedule = true; - } + auto updateDiskCounters = [&]( + NKikimrBlobStorage::EDriveStatus status, + NBlobStorageController::EPercentileCounters histCounter, + NBlobStorageController::ESimpleCounters groups, + NBlobStorageController::ESimpleCounters slots, + NBlobStorageController::ESimpleCounters bytes) { + + // build histogram of PDisks in faulty state with VSlots over 'em + auto& histo = TabletCounters->Percentile()[histCounter]; + histo.Clear(); + const auto& ranges = histo.GetRanges(); // a sorted vector of ranges + for (const auto& [pdiskId, pdisk] : PDisks) { + if (pdisk->Status == status && pdisk->NumActiveSlots) { + const ui64 passed = (now - pdisk->StatusTimestamp).Seconds(); + auto comp = [](const ui64 value, const auto& range) { return value < range.RangeVal; }; + const size_t idx = std::upper_bound(ranges.begin(), ranges.end(), passed, comp) - ranges.begin() - 1; + histo.IncrementForRange(idx); + reschedule = true; + } } - // calculate some simple counters - ui64 vslotsOnFaultyPDisks = 0; - ui64 bytesOnFaultyPDisks = 0; - std::unordered_set<TGroupId> groupsWithSlotsOnFaultyPDisks; - for (const auto& [vslotId, vslot] : VSlots) { - if (!vslot->IsBeingDeleted() && vslot->PDisk->Status == status) { - ++vslotsOnFaultyPDisks; - bytesOnFaultyPDisks += vslot->Metrics.GetAllocatedSize(); - groupsWithSlotsOnFaultyPDisks.insert(vslot->GroupId); - } + // calculate some simple counters + ui64 vslotsOnFaultyPDisks = 0; + ui64 bytesOnFaultyPDisks = 0; + std::unordered_set<TGroupId> groupsWithSlotsOnFaultyPDisks; + for (const auto& [vslotId, vslot] : VSlots) { + if (!vslot->IsBeingDeleted() && vslot->PDisk->Status == status) { + ++vslotsOnFaultyPDisks; + bytesOnFaultyPDisks += vslot->Metrics.GetAllocatedSize(); + groupsWithSlotsOnFaultyPDisks.insert(vslot->GroupId); + } } - auto& s = TabletCounters->Simple(); - s[groups].Set(groupsWithSlotsOnFaultyPDisks.size()); - s[slots].Set(vslotsOnFaultyPDisks); - s[bytes].Set(bytesOnFaultyPDisks); - }; - - updateDiskCounters( - NKikimrBlobStorage::EDriveStatus::FAULTY, - NBlobStorageController::COUNTER_FAULTY_USETTLED_PDISKS, - NBlobStorageController::COUNTER_GROUPS_WITH_SLOTS_ON_FAULTY_DISKS, - NBlobStorageController::COUNTER_SLOTS_ON_FAULTY_DISKS, - NBlobStorageController::COUNTER_BYTES_ON_FAULTY_DISKS - ); - - updateDiskCounters( - NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED, - NBlobStorageController::COUNTER_TO_BE_REMOVED_USETTLED_PDISKS, - NBlobStorageController::COUNTER_GROUPS_WITH_SLOTS_ON_TO_BE_REMOVED_DISKS, - NBlobStorageController::COUNTER_SLOTS_ON_TO_BE_REMOVED_DISKS, - NBlobStorageController::COUNTER_BYTES_ON_TO_BE_REMOVED_DISKS - ); - + auto& s = TabletCounters->Simple(); + s[groups].Set(groupsWithSlotsOnFaultyPDisks.size()); + s[slots].Set(vslotsOnFaultyPDisks); + s[bytes].Set(bytesOnFaultyPDisks); + }; + + updateDiskCounters( + NKikimrBlobStorage::EDriveStatus::FAULTY, + NBlobStorageController::COUNTER_FAULTY_USETTLED_PDISKS, + NBlobStorageController::COUNTER_GROUPS_WITH_SLOTS_ON_FAULTY_DISKS, + NBlobStorageController::COUNTER_SLOTS_ON_FAULTY_DISKS, + NBlobStorageController::COUNTER_BYTES_ON_FAULTY_DISKS + ); + + updateDiskCounters( + NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED, + NBlobStorageController::COUNTER_TO_BE_REMOVED_USETTLED_PDISKS, + NBlobStorageController::COUNTER_GROUPS_WITH_SLOTS_ON_TO_BE_REMOVED_DISKS, + NBlobStorageController::COUNTER_SLOTS_ON_TO_BE_REMOVED_DISKS, + NBlobStorageController::COUNTER_BYTES_ON_TO_BE_REMOVED_DISKS + ); + TabletCounters->Simple()[NBlobStorageController::COUNTER_SELF_HEAL_UNREASSIGNABLE_GROUPS] = SelfHealUnreassignableGroups->load(); Schedule(TDuration::Seconds(15), new TEvPrivate::TEvUpdateSelfHealCounters); diff --git a/ydb/core/mind/bscontroller/ut_bscontroller/main.cpp b/ydb/core/mind/bscontroller/ut_bscontroller/main.cpp index 34bcb980ff..87f7b18fb3 100644 --- a/ydb/core/mind/bscontroller/ut_bscontroller/main.cpp +++ b/ydb/core/mind/bscontroller/ut_bscontroller/main.cpp @@ -10,16 +10,16 @@ #include <ydb/core/testlib/basics/helpers.h> #include <ydb/core/testlib/basics/runtime.h> #include <ydb/core/testlib/tablet_helpers.h> - -#include <library/cpp/testing/unittest/registar.h> -#include <library/cpp/actors/core/interconnect.h> -#include <library/cpp/actors/interconnect/interconnect.h> - + +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/actors/core/interconnect.h> +#include <library/cpp/actors/interconnect/interconnect.h> + #include <util/datetime/cputimer.h> #include <util/random/random.h> -#include <google/protobuf/text_format.h> - +#include <google/protobuf/text_format.h> + using namespace NActors; using namespace NKikimr; using namespace NKikimr::NBsController; @@ -99,16 +99,16 @@ struct TEnvironmentSetup { return response->Get()->Record.GetResponse(); } - void RegisterNode() { - for (ui32 i = 1; i <= NodeCount; ++i) { - const TActorId self = Runtime->AllocateEdgeActor(); - auto ev = MakeHolder<TEvBlobStorage::TEvControllerRegisterNode>(i, TVector<ui32>{}, TVector<ui32>{}, TVector<NPDisk::TDriveData>{}); - Runtime->SendToPipe(TabletId, self, ev.Release(), NodeId, GetPipeConfigWithRetries()); - auto response = Runtime->GrabEdgeEventRethrow<TEvBlobStorage::TEvControllerNodeServiceSetUpdate>(self); - } - } - - + void RegisterNode() { + for (ui32 i = 1; i <= NodeCount; ++i) { + const TActorId self = Runtime->AllocateEdgeActor(); + auto ev = MakeHolder<TEvBlobStorage::TEvControllerRegisterNode>(i, TVector<ui32>{}, TVector<ui32>{}, TVector<NPDisk::TDriveData>{}); + Runtime->SendToPipe(TabletId, self, ev.Release(), NodeId, GetPipeConfigWithRetries()); + auto response = Runtime->GrabEdgeEventRethrow<TEvBlobStorage::TEvControllerNodeServiceSetUpdate>(self); + } + } + + NKikimrBlobStorage::TEvControllerSelectGroupsResult SelectGroups(const NKikimrBlobStorage::TEvControllerSelectGroups& request) { const TActorId self = Runtime->AllocateEdgeActor(); auto ev = MakeHolder<TEvBlobStorage::TEvControllerSelectGroups>(); @@ -293,37 +293,37 @@ Y_UNIT_TEST_SUITE(BsControllerConfig) { UNIT_ASSERT(env.ParsePDisks(response.GetStatus(baseConfigIndex).GetBaseConfig()) == env.ExpectedPDisks); }); } - Y_UNIT_TEST(ManyPDisksRestarts) { - int nodes = 100; - TEnvironmentSetup env(nodes, 1); - RunTestWithReboots(env.TabletIds, [&] { return env.PrepareInitialEventsFilter(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& outActiveZone) { - TFinalizer finalizer(env); - env.Prepare(dispatchName, setup, outActiveZone); - - NKikimrBlobStorage::TConfigRequest request; - TVector<TEnvironmentSetup::TPDiskDefinition> disks; - - int numRotDisks = 8; - int numSsdDisks = 8; - for (int i = 0; i < numRotDisks + numSsdDisks; ++i) { - TString path = TStringBuilder() << "/dev/disk" << i; - disks.emplace_back(path, i < numRotDisks ? NKikimrBlobStorage::ROT : NKikimrBlobStorage::SSD, false, false, 0); - } - env.DefineBox(1, "test box", disks, env.GetNodes(), request); - - env.DefineStoragePool(1, 1, "first storage pool", nodes * numRotDisks, NKikimrBlobStorage::ROT, {}, request); - env.DefineStoragePool(1, 2, "first storage pool", nodes * numSsdDisks, NKikimrBlobStorage::SSD, {}, request); - - size_t baseConfigIndex = request.CommandSize(); - request.AddCommand()->MutableQueryBaseConfig(); - - NKikimrBlobStorage::TConfigResponse response = env.Invoke(request); - UNIT_ASSERT(response.GetSuccess()); - UNIT_ASSERT(env.ParsePDisks(response.GetStatus(baseConfigIndex).GetBaseConfig()) == env.ExpectedPDisks); - env.RegisterNode(); - }); - } - + Y_UNIT_TEST(ManyPDisksRestarts) { + int nodes = 100; + TEnvironmentSetup env(nodes, 1); + RunTestWithReboots(env.TabletIds, [&] { return env.PrepareInitialEventsFilter(); }, [&](const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& outActiveZone) { + TFinalizer finalizer(env); + env.Prepare(dispatchName, setup, outActiveZone); + + NKikimrBlobStorage::TConfigRequest request; + TVector<TEnvironmentSetup::TPDiskDefinition> disks; + + int numRotDisks = 8; + int numSsdDisks = 8; + for (int i = 0; i < numRotDisks + numSsdDisks; ++i) { + TString path = TStringBuilder() << "/dev/disk" << i; + disks.emplace_back(path, i < numRotDisks ? NKikimrBlobStorage::ROT : NKikimrBlobStorage::SSD, false, false, 0); + } + env.DefineBox(1, "test box", disks, env.GetNodes(), request); + + env.DefineStoragePool(1, 1, "first storage pool", nodes * numRotDisks, NKikimrBlobStorage::ROT, {}, request); + env.DefineStoragePool(1, 2, "first storage pool", nodes * numSsdDisks, NKikimrBlobStorage::SSD, {}, request); + + size_t baseConfigIndex = request.CommandSize(); + request.AddCommand()->MutableQueryBaseConfig(); + + NKikimrBlobStorage::TConfigResponse response = env.Invoke(request); + UNIT_ASSERT(response.GetSuccess()); + UNIT_ASSERT(env.ParsePDisks(response.GetStatus(baseConfigIndex).GetBaseConfig()) == env.ExpectedPDisks); + env.RegisterNode(); + }); + } + Y_UNIT_TEST(ExtendByCreatingSeparateBox) { const ui32 numNodes = 50; const ui32 numNodes1 = 20; @@ -826,64 +826,64 @@ Y_UNIT_TEST_SUITE(BsControllerConfig) { } } - Y_UNIT_TEST(AddDriveSerial) { - TEnvironmentSetup env(10, 1); - auto test = [&] (const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& outActiveZone) { - TFinalizer finalizer(env); - env.Prepare(dispatchName, setup, outActiveZone); - - for (int i = 0; i < 3; ++i) { - NKikimrBlobStorage::TConfigRequest request; - auto pb = request.AddCommand()->MutableAddDriveSerial(); - pb->SetSerial("SN_123"); - pb->SetBoxId(1); - NKikimrBlobStorage::TConfigResponse response = env.Invoke(request); - if (i == 0) { - UNIT_ASSERT(response.GetSuccess()); - UNIT_ASSERT(response.StatusSize() == 1); - UNIT_ASSERT(response.GetStatus(0).GetSuccess()); - } else { - UNIT_ASSERT(!response.GetSuccess()); - UNIT_ASSERT(response.StatusSize() == 1); - UNIT_ASSERT(!response.GetStatus(0).GetSuccess()); - UNIT_ASSERT(response.GetStatus(0).GetFailReason() - == NKikimrBlobStorage::TConfigResponse::TStatus::kAlready); - } - } - }; - RunTestWithReboots(env.TabletIds, [&] { return env.PrepareInitialEventsFilter(); }, test); - } - - Y_UNIT_TEST(AddDriveSerialMassive) { - TEnvironmentSetup env(10, 1); - auto test = [&] (const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& outActiveZone) { - TFinalizer finalizer(env); - env.Prepare(dispatchName, setup, outActiveZone); - - const size_t disksCount = 10; - for (size_t i = 0; i < disksCount; ++i) { - NKikimrBlobStorage::TConfigRequest request; - auto pb = request.AddCommand()->MutableAddDriveSerial(); - pb->SetSerial(TStringBuilder() << "SN_" << i); - pb->SetBoxId(1); - NKikimrBlobStorage::TConfigResponse response = env.Invoke(request); - UNIT_ASSERT(response.GetSuccess()); - UNIT_ASSERT(response.StatusSize() == 1); - UNIT_ASSERT(response.GetStatus(0).GetSuccess()); - } - for (size_t i = 0; i < disksCount; ++i) { - NKikimrBlobStorage::TConfigRequest request; - auto pb = request.AddCommand()->MutableRemoveDriveSerial(); - pb->SetSerial(TStringBuilder() << "SN_" << i); - NKikimrBlobStorage::TConfigResponse response = env.Invoke(request); - UNIT_ASSERT(response.GetSuccess()); - UNIT_ASSERT(response.StatusSize() == 1); - UNIT_ASSERT(response.GetStatus(0).GetSuccess()); - } - }; - RunTestWithReboots(env.TabletIds, [&] { return env.PrepareInitialEventsFilter(); }, test); - } - + Y_UNIT_TEST(AddDriveSerial) { + TEnvironmentSetup env(10, 1); + auto test = [&] (const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& outActiveZone) { + TFinalizer finalizer(env); + env.Prepare(dispatchName, setup, outActiveZone); + + for (int i = 0; i < 3; ++i) { + NKikimrBlobStorage::TConfigRequest request; + auto pb = request.AddCommand()->MutableAddDriveSerial(); + pb->SetSerial("SN_123"); + pb->SetBoxId(1); + NKikimrBlobStorage::TConfigResponse response = env.Invoke(request); + if (i == 0) { + UNIT_ASSERT(response.GetSuccess()); + UNIT_ASSERT(response.StatusSize() == 1); + UNIT_ASSERT(response.GetStatus(0).GetSuccess()); + } else { + UNIT_ASSERT(!response.GetSuccess()); + UNIT_ASSERT(response.StatusSize() == 1); + UNIT_ASSERT(!response.GetStatus(0).GetSuccess()); + UNIT_ASSERT(response.GetStatus(0).GetFailReason() + == NKikimrBlobStorage::TConfigResponse::TStatus::kAlready); + } + } + }; + RunTestWithReboots(env.TabletIds, [&] { return env.PrepareInitialEventsFilter(); }, test); + } + + Y_UNIT_TEST(AddDriveSerialMassive) { + TEnvironmentSetup env(10, 1); + auto test = [&] (const TString& dispatchName, std::function<void(TTestActorRuntime&)> setup, bool& outActiveZone) { + TFinalizer finalizer(env); + env.Prepare(dispatchName, setup, outActiveZone); + + const size_t disksCount = 10; + for (size_t i = 0; i < disksCount; ++i) { + NKikimrBlobStorage::TConfigRequest request; + auto pb = request.AddCommand()->MutableAddDriveSerial(); + pb->SetSerial(TStringBuilder() << "SN_" << i); + pb->SetBoxId(1); + NKikimrBlobStorage::TConfigResponse response = env.Invoke(request); + UNIT_ASSERT(response.GetSuccess()); + UNIT_ASSERT(response.StatusSize() == 1); + UNIT_ASSERT(response.GetStatus(0).GetSuccess()); + } + for (size_t i = 0; i < disksCount; ++i) { + NKikimrBlobStorage::TConfigRequest request; + auto pb = request.AddCommand()->MutableRemoveDriveSerial(); + pb->SetSerial(TStringBuilder() << "SN_" << i); + NKikimrBlobStorage::TConfigResponse response = env.Invoke(request); + UNIT_ASSERT(response.GetSuccess()); + UNIT_ASSERT(response.StatusSize() == 1); + UNIT_ASSERT(response.GetStatus(0).GetSuccess()); + } + }; + RunTestWithReboots(env.TabletIds, [&] { return env.PrepareInitialEventsFilter(); }, test); + } + Y_UNIT_TEST(OverlayMap) { for (ui32 iter = 0; iter < 100; ++iter) { struct TItem { @@ -912,7 +912,7 @@ Y_UNIT_TEST_SUITE(BsControllerConfig) { ui32 index = RandomNumber<ui32>(1000); base[index] = MakeHolder<TItem>(i); reference[index] = MakeHolder<TItem>(i); - Ctest << "initial " << index << " -> " << i << Endl; + Ctest << "initial " << index << " -> " << i << Endl; } TOverlayMap<ui32, TItem> overlay(base); @@ -923,19 +923,19 @@ Y_UNIT_TEST_SUITE(BsControllerConfig) { const ui32 index = RandomNumber(reference.size()); auto it = reference.begin(); std::advance(it, index); - Ctest << "deleting " << it->first << Endl; + Ctest << "deleting " << it->first << Endl; overlay.DeleteExistingEntry(it->first); reference.erase(it); } else { const ui32 index = RandomNumber<ui32>(1000); const ui32 value = RandomNumber<ui32>(); if (reference.count(index)) { - Ctest << "updating " << index << " -> " << value << Endl; + Ctest << "updating " << index << " -> " << value << Endl; TItem *valp = overlay.FindForUpdate(index); Y_VERIFY(valp); valp->Value = value; } else { - Ctest << "inserting " << index << " -> " << value << Endl; + Ctest << "inserting " << index << " -> " << value << Endl; overlay.ConstructInplaceNewEntry(index, value); } reference[index] = MakeHolder<TItem>(value); @@ -1013,15 +1013,15 @@ Y_UNIT_TEST_SUITE(BsControllerConfig) { }; for (int iter = 0; iter < 100; ++iter) { - Ctest << "Next iteration\n"; + Ctest << "Next iteration\n"; const unsigned num = 1000; TMap<unsigned, THolder<TAlpha>> alphas; TMap<unsigned, THolder<TBeta>> betas; for (unsigned key = 0; key < num; ++key) { alphas.emplace(key, MakeHolder<TAlpha>(key)); betas.emplace(key, MakeHolder<TBeta>(key)); - Ctest << Sprintf("Alpha[%u]# %p\n", key, alphas[key].Get()); - Ctest << Sprintf("Beta[%u]# %p\n", key, alphas[key].Get()); + Ctest << Sprintf("Alpha[%u]# %p\n", key, alphas[key].Get()); + Ctest << Sprintf("Beta[%u]# %p\n", key, alphas[key].Get()); } for (int i = 0; i < 1000; ++i) { const unsigned a = RandomNumber(num); @@ -1039,11 +1039,11 @@ Y_UNIT_TEST_SUITE(BsControllerConfig) { } for (unsigned key : alphaKeys) { - Ctest << Sprintf("Alphas.FindForUpdate Key# %u\n", key); + Ctest << Sprintf("Alphas.FindForUpdate Key# %u\n", key); ++state.Alphas.FindForUpdate(key)->Value; } for (unsigned key : betaKeys) { - Ctest << Sprintf("Betas.FindForUpdate Key# %u\n", key); + Ctest << Sprintf("Betas.FindForUpdate Key# %u\n", key); ++state.Betas.FindForUpdate(key)->Value; } diff --git a/ydb/core/mind/bscontroller/ut_helpers.h b/ydb/core/mind/bscontroller/ut_helpers.h index 1d000c5a08..960fb632f0 100644 --- a/ydb/core/mind/bscontroller/ut_helpers.h +++ b/ydb/core/mind/bscontroller/ut_helpers.h @@ -1,5 +1,5 @@ -#pragma once - -#include <util/stream/null.h> - -#define Ctest Cnull +#pragma once + +#include <util/stream/null.h> + +#define Ctest Cnull diff --git a/ydb/core/mind/bscontroller/ut_selfheal/env.h b/ydb/core/mind/bscontroller/ut_selfheal/env.h index 240c43ca60..e79bf914a1 100644 --- a/ydb/core/mind/bscontroller/ut_selfheal/env.h +++ b/ydb/core/mind/bscontroller/ut_selfheal/env.h @@ -198,7 +198,7 @@ struct TEnvironmentSetup { for (TActorId edge : edges) { WaitForEdgeActorEvent<TEvDone>(edge); } - Ctest << "All node wardens are connected to BSC" << Endl; + Ctest << "All node wardens are connected to BSC" << Endl; } void Wait(TDuration timeout) { diff --git a/ydb/core/mind/bscontroller/ut_selfheal/main.cpp b/ydb/core/mind/bscontroller/ut_selfheal/main.cpp index a8383e8ced..79bba0a26a 100644 --- a/ydb/core/mind/bscontroller/ut_selfheal/main.cpp +++ b/ydb/core/mind/bscontroller/ut_selfheal/main.cpp @@ -1,7 +1,7 @@ #include <ydb/core/mind/bscontroller/ut_helpers.h> - + #include <library/cpp/testing/unittest/registar.h> - + #include "env.h" Y_UNIT_TEST_SUITE(BsControllerTest) { @@ -41,7 +41,7 @@ Y_UNIT_TEST_SUITE(BsControllerTest) { auto move = [&](auto& from, auto& to, NKikimrBlobStorage::EDriveStatus status) { auto it = from.begin(); std::advance(it, RandomNumber(from.size())); - Ctest << "PDisk# " << *it + Ctest << "PDisk# " << *it << " setting status to " << NKikimrBlobStorage::EDriveStatus_Name(status) << Endl; request = {}; diff --git a/ydb/core/mind/bscontroller/ut_selfheal/node_warden_mock.h b/ydb/core/mind/bscontroller/ut_selfheal/node_warden_mock.h index 993fd17f55..5c993ba0fb 100644 --- a/ydb/core/mind/bscontroller/ut_selfheal/node_warden_mock.h +++ b/ydb/core/mind/bscontroller/ut_selfheal/node_warden_mock.h @@ -98,9 +98,9 @@ public: startedDynamicGroups.push_back(groupId); groupGenerations.push_back(gen); } - + auto ev = std::make_unique<TEvBlobStorage::TEvControllerRegisterNode>(NodeId, startedDynamicGroups, - groupGenerations, TVector<NPDisk::TDriveData>{}); + groupGenerations, TVector<NPDisk::TDriveData>{}); auto& record = ev->Record; for (const auto& [id, vdisk] : VDisks) { vdisk->Serialize(record.AddVDiskStatus()); diff --git a/ydb/core/mind/hive/storage_group_info.cpp b/ydb/core/mind/hive/storage_group_info.cpp index 209619a3e1..fbcb7fc7ba 100644 --- a/ydb/core/mind/hive/storage_group_info.cpp +++ b/ydb/core/mind/hive/storage_group_info.cpp @@ -11,13 +11,13 @@ TStorageGroupInfo::TStorageGroupInfo(const TStoragePoolInfo& storagePool, TStora bool TStorageGroupInfo::AcquireAllocationUnit(const TLeaderTabletInfo* tablet, ui32 channel) { Y_VERIFY(tablet->BoundChannels.size() > channel); - bool acquired = Units.insert({tablet, channel}).second; - if (acquired) { + bool acquired = Units.insert({tablet, channel}).second; + if (acquired) { AcquiredIOPS += tablet->BoundChannels[channel].GetIOPS(); AcquiredThroughput += tablet->BoundChannels[channel].GetThroughput(); AcquiredSize += tablet->BoundChannels[channel].GetSize(); } - return acquired; + return acquired; } bool TStorageGroupInfo::ReleaseAllocationUnit(const TLeaderTabletInfo* tablet, ui32 channel) { diff --git a/ydb/core/mind/node_broker_ut.cpp b/ydb/core/mind/node_broker_ut.cpp index a60de88a84..0d9498ddcd 100644 --- a/ydb/core/mind/node_broker_ut.cpp +++ b/ydb/core/mind/node_broker_ut.cpp @@ -94,7 +94,7 @@ void SetupServices(TTestActorRuntime &runtime, TIntrusivePtr<TNodeWardenConfig> existingNodeWardenConfig = NodeWardenConfigs[nodeIndex]; if (existingNodeWardenConfig != nullptr) { - //std::swap(nodeWardenConfig->SectorMaps, existingNodeWardenConfig->SectorMaps); + //std::swap(nodeWardenConfig->SectorMaps, existingNodeWardenConfig->SectorMaps); } if (nodeIndex == 0) { diff --git a/ydb/core/mon/mon.cpp b/ydb/core/mon/mon.cpp index 792a4dad7a..d4722266f3 100644 --- a/ydb/core/mon/mon.cpp +++ b/ydb/core/mon/mon.cpp @@ -364,7 +364,7 @@ namespace NActors { void Output(IMonHttpRequest &request, const NMon::IEvHttpInfoRes &result) const { if (result.GetContentType() == NMon::IEvHttpInfoRes::Html) { THtmlResultMonPage resultPage(Path, Title, Host, PreTag, result); - resultPage.Parent = this->Parent; + resultPage.Parent = this->Parent; resultPage.Output(request); } else { result.Output(request.Output()); diff --git a/ydb/core/node_whiteboard/defs.h b/ydb/core/node_whiteboard/defs.h index 34525e1f70..99e490cf97 100644 --- a/ydb/core/node_whiteboard/defs.h +++ b/ydb/core/node_whiteboard/defs.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once // unique tag to fix pragma once gcc glueing: ./ydb/core/node_whiteboard/defs.h #include <library/cpp/actors/core/defs.h> #include <library/cpp/actors/core/actor.h> @@ -6,10 +6,10 @@ #include <library/cpp/actors/core/actorid.h> #include <ydb/core/protos/services.pb.h> #include <ydb/core/debug/valgrind_check.h> -#include <util/generic/array_ref.h> -#include <util/generic/string.h> - -namespace NKikimr { - // actorlib is organic part of kikimr so we emulate global import by this directive - using namespace NActors; -} +#include <util/generic/array_ref.h> +#include <util/generic/string.h> + +namespace NKikimr { + // actorlib is organic part of kikimr so we emulate global import by this directive + using namespace NActors; +} diff --git a/ydb/core/node_whiteboard/node_whiteboard.h b/ydb/core/node_whiteboard/node_whiteboard.h index cab958d1cd..cdf7601807 100644 --- a/ydb/core/node_whiteboard/node_whiteboard.h +++ b/ydb/core/node_whiteboard/node_whiteboard.h @@ -55,7 +55,7 @@ struct TEvWhiteboard{ EvTraceResponse, EvSignalBodyRequest, EvSignalBodyResponse, - EvPDiskStateDelete, + EvPDiskStateDelete, EvVDiskStateGenerationChange, EvEnd }; @@ -192,8 +192,8 @@ struct TEvWhiteboard{ Record.SetIncarnationGuid(incarnationGuid); } - explicit TEvVDiskStateUpdate(NKikimrWhiteboard::TVDiskStateInfo&& rec) { - Record = std::move(rec); + explicit TEvVDiskStateUpdate(NKikimrWhiteboard::TVDiskStateInfo&& rec) { + Record = std::move(rec); } }; @@ -217,15 +217,15 @@ struct TEvWhiteboard{ {} }; - struct TEvPDiskStateDelete : TEventPB<TEvPDiskStateDelete, NKikimrWhiteboard::TPDiskStateInfo, EvPDiskStateDelete> { - TEvPDiskStateDelete() = default; - - explicit TEvPDiskStateDelete(const ui32& pdiskId) { - Record.SetPDiskId(pdiskId); - } - }; - - + struct TEvPDiskStateDelete : TEventPB<TEvPDiskStateDelete, NKikimrWhiteboard::TPDiskStateInfo, EvPDiskStateDelete> { + TEvPDiskStateDelete() = default; + + explicit TEvPDiskStateDelete(const ui32& pdiskId) { + Record.SetPDiskId(pdiskId); + } + }; + + struct TEvVDiskStateRequest : public TEventPB<TEvVDiskStateRequest, NKikimrWhiteboard::TEvVDiskStateRequest, EvVDiskStateRequest> {}; struct TEvVDiskStateResponse : public TEventPB<TEvVDiskStateResponse, NKikimrWhiteboard::TEvVDiskStateResponse, EvVDiskStateResponse> {}; diff --git a/ydb/core/node_whiteboard/ya.make b/ydb/core/node_whiteboard/ya.make index cb41c1128b..2d9f4debd2 100644 --- a/ydb/core/node_whiteboard/ya.make +++ b/ydb/core/node_whiteboard/ya.make @@ -1,12 +1,12 @@ -LIBRARY() - -OWNER(g:kikimr) - -SRCS( - node_whiteboard.h -) - -PEERDIR( +LIBRARY() + +OWNER(g:kikimr) + +SRCS( + node_whiteboard.h +) + +PEERDIR( library/cpp/actors/core library/cpp/actors/helpers library/cpp/actors/interconnect @@ -23,6 +23,6 @@ PEERDIR( ydb/core/debug ydb/core/erasure ydb/core/protos -) - -END() +) + +END() diff --git a/ydb/core/persqueue/mirrorer.cpp b/ydb/core/persqueue/mirrorer.cpp index 2e7c19b904..37aea95ad1 100644 --- a/ydb/core/persqueue/mirrorer.cpp +++ b/ydb/core/persqueue/mirrorer.cpp @@ -384,7 +384,7 @@ void TMirrorer::CreateConsumer(TEvPQ::TEvCreateConsumer::TPtr&, const TActorCont PartitionStream.Reset(); auto factory = AppData(ctx)->PersQueueMirrorReaderFactory; - Y_VERIFY(factory); + Y_VERIFY(factory); ReadSession = factory->GetReadSession(Config, Partition, CredentialsProvider, MAX_BYTES_IN_FLIGHT); diff --git a/ydb/core/persqueue/partition.cpp b/ydb/core/persqueue/partition.cpp index ce56bae4a4..cc8e2419cf 100644 --- a/ydb/core/persqueue/partition.cpp +++ b/ydb/core/persqueue/partition.cpp @@ -1311,7 +1311,7 @@ void TPartition::FailBadClient(const TActorContext& ctx) } -bool CheckDiskStatus(const TStorageStatusFlags status) +bool CheckDiskStatus(const TStorageStatusFlags status) { return !status.Check(NKikimrBlobStorage::StatusDiskSpaceLightYellowMove); } diff --git a/ydb/core/protos/blobstorage.proto b/ydb/core/protos/blobstorage.proto index 21e265fc7a..a08ccbf865 100644 --- a/ydb/core/protos/blobstorage.proto +++ b/ydb/core/protos/blobstorage.proto @@ -150,18 +150,18 @@ enum EStatusFlags { // Never use this type for storage, use ui32 bitset StatusNotEnoughDiskSpaceForOperation = 32; // 0000 0010 0000 } -message TTimestamps { - optional uint64 SentByDSProxyUs = 1 [default = 0]; - optional uint64 ReceivedByVDiskUs = 2 [default = 0]; - optional uint64 SentByVDiskUs = 3 [default = 0]; - optional uint64 ReceivedByDSProxyUs = 4 [default = 0]; -} +message TTimestamps { + optional uint64 SentByDSProxyUs = 1 [default = 0]; + optional uint64 ReceivedByVDiskUs = 2 [default = 0]; + optional uint64 SentByVDiskUs = 3 [default = 0]; + optional uint64 ReceivedByDSProxyUs = 4 [default = 0]; +} enum EEntityStatus { INITIAL = 1; // entity was generated from the current state by the configuration request CREATE = 2; // entity was just created and notification is being pushed to the warden DESTROY = 3; // entity was just destroyed and the notification is being pushed to the warden - RESTART = 4; // entity has changed config or changed environment and should be restarted by warden + RESTART = 4; // entity has changed config or changed environment and should be restarted by warden } message TGroupInfo { @@ -366,7 +366,7 @@ message TEvVPut { optional uint64 Cookie = 7; optional EPutHandleClass HandleClass = 9; optional TMsgQoS MsgQoS = 10; - optional TTimestamps Timestamps = 23; + optional TTimestamps Timestamps = 23; } message TEvVPutResult { @@ -378,7 +378,7 @@ message TEvVPutResult { optional uint64 Cookie = 4; optional uint32 StatusFlags = 5; optional TMsgQoS MsgQoS = 10; - optional TTimestamps Timestamps = 23; + optional TTimestamps Timestamps = 23; optional float ApproximateFreeSpaceShare = 25 [default = 0]; // 0 is a special value for 'unknown' optional fixed64 IncarnationGuid = 30; @@ -467,7 +467,7 @@ message TEvVGet { optional uint64 TabletId = 21 [default = 0]; // tabletId to get the blocked generation for optional bool AcquireBlockedGeneration = 22 [default = false]; // set to true to get the blocked generation - optional TTimestamps Timestamps = 23; + optional TTimestamps Timestamps = 23; optional uint32 ForceBlockedGeneration = 24 [default = 0]; // non-zero means a successfull block must be done first } @@ -496,7 +496,7 @@ message TEvVGetResult { optional TMsgQoS MsgQoS = 10; optional uint32 BlockedGeneration = 11 [default = 0]; - optional TTimestamps Timestamps = 23; + optional TTimestamps Timestamps = 23; optional bool IsRangeOverflow = 24 [default = false]; // true if RangeQuery response is too large and is cut optional fixed64 IncarnationGuid = 30; @@ -941,10 +941,10 @@ message TNodeWardenServiceSet { optional EEntityStatus EntityStatus = 9; optional uint64 InMemoryForTestsBufferBytes = 10 [default = 0]; // non zero == force in-memory pdisk use - optional string ExpectedSerial = 11; // optional, used for serial number check in PDisk - - optional TSerialManagementStage.E ManagementStage = 12; - optional TPDiskSpaceColor.E SpaceColorBorder = 13; + optional string ExpectedSerial = 11; // optional, used for serial number check in PDisk + + optional TSerialManagementStage.E ManagementStage = 12; + optional TPDiskSpaceColor.E SpaceColorBorder = 13; } message TVDisk { @@ -999,34 +999,34 @@ message TNodeWardenCache { optional TNodeWardenServiceSet ServiceSet = 3; } -message TMockDevicesConfig { - repeated TDriveData Devices = 1; -} - -message TDriveData { - optional string Path = 1; - optional string SerialNumber = 2; - optional string FirmwareRevision = 3; - optional string ModelNumber = 4; - optional uint64 Size = 6; - optional bool IsMock = 7 [default = false]; - optional EPDiskType DeviceType = 5; -} - +message TMockDevicesConfig { + repeated TDriveData Devices = 1; +} + +message TDriveData { + optional string Path = 1; + optional string SerialNumber = 2; + optional string FirmwareRevision = 3; + optional string ModelNumber = 4; + optional uint64 Size = 6; + optional bool IsMock = 7 [default = false]; + optional EPDiskType DeviceType = 5; +} + message TEvControllerRegisterNode { reserved 1; optional uint32 NodeID = 2; repeated uint32 Groups = 4; repeated uint32 GroupGenerations = 5; // must be zero entries (for old nodes) and the same number as in Groups for new ones repeated TVDiskStatus VDiskStatus = 6; // actual status for currently operating VDisks - repeated TDriveData DrivesData = 7; + repeated TDriveData DrivesData = 7; +} + +message TEvControllerUpdateNodeDrives { + optional uint32 NodeId = 1; + repeated TDriveData DrivesData = 2; } -message TEvControllerUpdateNodeDrives { - optional uint32 NodeId = 1; - repeated TDriveData DrivesData = 2; -} - message TEvControllerNodeServiceSetUpdate { message TGroupMetadata { optional uint32 GroupId = 1; @@ -1287,19 +1287,19 @@ message TEvTestLoadRequest { optional double Weight = 3; } message TLoadStart { - message TRequestInfo { - optional float SendTime = 1; - optional uint64 Type = 2; - optional uint32 Size = 3; - optional EPutHandleClass PutHandleClass = 4; - } + message TRequestInfo { + optional float SendTime = 1; + optional uint64 Type = 2; + optional uint32 Size = 3; + optional EPutHandleClass PutHandleClass = 4; + } message TTabletInfo { optional uint64 TabletId = 1; optional uint32 Channel = 2; optional uint32 GroupId = 3; optional uint32 Generation = 4; - repeated TRequestInfo Requests = 5; - optional float ScriptedCycleDurationSec = 6; + repeated TRequestInfo Requests = 5; + optional float ScriptedCycleDurationSec = 6; } message TPerTabletProfile { repeated TTabletInfo Tablets = 1; @@ -1314,7 +1314,7 @@ message TEvTestLoadRequest { optional uint32 MaxInFlightReadBytes = 10; repeated TIntervalInfo ReadIntervals = 11; repeated TSizeInfo ReadSizes = 12; - optional uint64 MaxTotalBytesWritten = 13; + optional uint64 MaxTotalBytesWritten = 13; optional EGetHandleClass GetHandleClass = 14; }; optional uint64 Tag = 1; @@ -1326,7 +1326,7 @@ message TEvTestLoadRequest { } message TLoadStop { optional uint64 Tag = 1; - optional bool RemoveAllTags = 2; + optional bool RemoveAllTags = 2; } enum ELogMode { LOG_PARALLEL = 1; @@ -1391,49 +1391,49 @@ message TEvTestLoadRequest { // minimum distance kept between current Step of written blobs and CollectStep of barriers optional uint32 StepDistance = 15; } - message TPDiskReadLoadStart { - message TChunkInfo { - optional uint32 Slots = 1; // number of slots per chunk - optional uint32 Weight = 2; // probability weight - } - optional uint64 Tag = 1; - optional uint32 PDiskId = 2; - optional uint64 PDiskGuid = 3; + message TPDiskReadLoadStart { + message TChunkInfo { + optional uint32 Slots = 1; // number of slots per chunk + optional uint32 Weight = 2; // probability weight + } + optional uint64 Tag = 1; + optional uint32 PDiskId = 2; + optional uint64 PDiskGuid = 3; optional NKikimrBlobStorage.TVDiskID VDiskId = 4; - repeated TChunkInfo Chunks = 5; - optional uint32 DurationSeconds = 6; + repeated TChunkInfo Chunks = 5; + optional uint32 DurationSeconds = 6; optional uint32 InFlightReads = 7; optional bool Sequential = 9 [default = false]; - optional uint32 IntervalMsMin = 10; - optional uint32 IntervalMsMax = 11; + optional uint32 IntervalMsMin = 10; + optional uint32 IntervalMsMax = 11; optional bool IsWardenlessTest = 13 [default = false]; - } - message TPDiskLogLoadStart { - message TWorkerConfig { + } + message TPDiskLogLoadStart { + message TWorkerConfig { optional NKikimrBlobStorage.TVDiskID VDiskId = 1; - optional uint32 MaxInFlight = 2; - - // Measurement units of all parameters is bytes - optional uint32 SizeIntervalMin = 3; - optional uint32 SizeIntervalMax = 4; - optional uint64 BurstInterval = 5; - optional uint64 BurstSize = 6; - optional uint64 StorageDuration = 7; - - optional uint64 MaxTotalBytesWritten = 8; - } - - optional uint64 Tag = 1; - optional uint32 PDiskId = 2; - optional uint64 PDiskGuid = 3; - - optional uint32 DurationSeconds = 5; - repeated TWorkerConfig Workers = 7; - - optional bool IsWardenlessTest = 8 [default = false]; - } + optional uint32 MaxInFlight = 2; + + // Measurement units of all parameters is bytes + optional uint32 SizeIntervalMin = 3; + optional uint32 SizeIntervalMax = 4; + optional uint64 BurstInterval = 5; + optional uint64 BurstSize = 6; + optional uint64 StorageDuration = 7; + + optional uint64 MaxTotalBytesWritten = 8; + } + + optional uint64 Tag = 1; + optional uint32 PDiskId = 2; + optional uint64 PDiskGuid = 3; + + optional uint32 DurationSeconds = 5; + repeated TWorkerConfig Workers = 7; + + optional bool IsWardenlessTest = 8 [default = false]; + } message TKeyValueLoadStart { message TWorkerConfig { optional string KeyPrefix = 1; @@ -1483,8 +1483,8 @@ message TEvTestLoadRequest { TLoadStop LoadStop = 3; TPDiskLoadStart PDiskLoadStart = 4; TVDiskLoadStart VDiskLoadStart = 5; - TPDiskReadLoadStart PDiskReadLoadStart = 6; - TPDiskLogLoadStart PDiskLogLoadStart = 7; + TPDiskReadLoadStart PDiskReadLoadStart = 6; + TPDiskLogLoadStart PDiskLogLoadStart = 7; TKeyValueLoadStart KeyValueLoadStart = 8; TKqpLoadStart KqpLoadStart = 9; TMemoryLoadStart MemoryLoadStart = 10; diff --git a/ydb/core/protos/blobstorage_config.proto b/ydb/core/protos/blobstorage_config.proto index a2b2c09f9e..10d0a29e6c 100644 --- a/ydb/core/protos/blobstorage_config.proto +++ b/ydb/core/protos/blobstorage_config.proto @@ -11,8 +11,8 @@ package NKikimrBlobStorage; enum EPDiskType { ROT = 0; // rotational drives (HDD) SSD = 1; // solid state drives (SSD) - NVME = 2; // PCIe-connected solid state drives (NVMe SSD) - UNKNOWN_TYPE = 3; // used if device type is unknown or if group consists of different PDisk device types + NVME = 2; // PCIe-connected solid state drives (NVMe SSD) + UNKNOWN_TYPE = 3; // used if device type is unknown or if group consists of different PDisk device types } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -188,43 +188,43 @@ enum EDriveStatus { BROKEN = 3; // drive is not working, groups are automatically moved out of this drive upon reception of this status SPARE = 4; // spare drive -- groups are created only when being moved from BROKEN drives FAULTY = 5; // drive is expected to become BROKEN soon, new groups are not created, old groups are asynchronously moved out from this drive - TO_BE_REMOVED = 6; // same as INACTIVE, but drive is counted in fault model as not working -} - -message TGroupStatus { - enum E { - UNKNOWN = 0; // group status is unknown (default value) - FULL = 1; // all VDisks of the group are READY for specific period of time - PARTIAL = 2; // some of VDisks are operational, but group is not yet DEGRADED - DEGRADED = 3; // group is DEGRADED -- one random failure may lead to group loss (but may not lead too) - DISINTEGRATED = 4; // group is not available for operation - } -} - -message TDriveLifeStage { - enum E { - UNKNOWN = 0; // life stage is unknown (default) - NOT_SEEN = 1; // info about drive is located in BSC db, but drive is not seen in any node - ALLOCATED = 2; // PDisk is created - REMOVED = 3; // drive marked as removed - ERROR = 4; // drive was moved between nodes with allocated VDisks - } -} - -message TSerialManagementStage { - enum E { - DISCOVER_SERIAL = 0; - CHECK_SERIAL = 1; - ONLY_SERIAL = 2; - } -} - + TO_BE_REMOVED = 6; // same as INACTIVE, but drive is counted in fault model as not working +} + +message TGroupStatus { + enum E { + UNKNOWN = 0; // group status is unknown (default value) + FULL = 1; // all VDisks of the group are READY for specific period of time + PARTIAL = 2; // some of VDisks are operational, but group is not yet DEGRADED + DEGRADED = 3; // group is DEGRADED -- one random failure may lead to group loss (but may not lead too) + DISINTEGRATED = 4; // group is not available for operation + } +} + +message TDriveLifeStage { + enum E { + UNKNOWN = 0; // life stage is unknown (default) + NOT_SEEN = 1; // info about drive is located in BSC db, but drive is not seen in any node + ALLOCATED = 2; // PDisk is created + REMOVED = 3; // drive marked as removed + ERROR = 4; // drive was moved between nodes with allocated VDisks + } +} + +message TSerialManagementStage { + enum E { + DISCOVER_SERIAL = 0; + CHECK_SERIAL = 1; + ONLY_SERIAL = 2; + } +} + message TUpdateDriveStatus { THostKey HostKey = 1; // host on which we are looking for the drive string Path = 2; // absolute path to the device as enlisted in PDisk configuration EDriveStatus Status = 3; // new status uint32 PDiskId = 4; // may be set instead of path to identify PDisk - string Serial = 5; // may be set instead of path and PDiskId to identify PDisk + string Serial = 5; // may be set instead of path and PDiskId to identify PDisk uint64 StatusChangeTimestamp = 6; // used only in return of ReadDriveStatus } @@ -387,28 +387,28 @@ message TSetScrubPeriodicity { uint32 ScrubPeriodicity = 1; // in seconds; 0 = disable } -message TAddDriveSerial { - string Serial = 1; - uint64 BoxId = 2; - uint64 Kind = 3; - EPDiskType PDiskType = 4; // default UNKNOWN type implies auto-detection of drive type - NKikimrBlobStorage.TPDiskConfig PDiskConfig = 5; -} - -// Remove drive if empty and delete PDisk, but leave row in table with LifeStage == REMOVED -message TRemoveDriveSerial { - string Serial = 1; -} - -// Remove drive if empty and delete PDisk, but totaly erase all drive-related info from BSC's internal database -message TForgetDriveSerial { - string Serial = 1; -} - -message TMigrateToSerial { - TSerialManagementStage.E Stage = 1; -} - +message TAddDriveSerial { + string Serial = 1; + uint64 BoxId = 2; + uint64 Kind = 3; + EPDiskType PDiskType = 4; // default UNKNOWN type implies auto-detection of drive type + NKikimrBlobStorage.TPDiskConfig PDiskConfig = 5; +} + +// Remove drive if empty and delete PDisk, but leave row in table with LifeStage == REMOVED +message TRemoveDriveSerial { + string Serial = 1; +} + +// Remove drive if empty and delete PDisk, but totaly erase all drive-related info from BSC's internal database +message TForgetDriveSerial { + string Serial = 1; +} + +message TMigrateToSerial { + TSerialManagementStage.E Stage = 1; +} + message TSetPDiskSpaceMarginPromille { uint32 PDiskSpaceMarginPromille = 1; } @@ -458,10 +458,10 @@ message TConfigRequest { TEnableSelfHeal EnableDonorMode = 30; TDropDonorDisk DropDonorDisk = 31; TSetScrubPeriodicity SetScrubPeriodicity = 32; - TAddDriveSerial AddDriveSerial = 33; - TRemoveDriveSerial RemoveDriveSerial = 34; - TForgetDriveSerial ForgetDriveSerial = 36; - TMigrateToSerial MigrateToSerial = 35; + TAddDriveSerial AddDriveSerial = 33; + TRemoveDriveSerial RemoveDriveSerial = 34; + TForgetDriveSerial ForgetDriveSerial = 36; + TMigrateToSerial MigrateToSerial = 35; TSetPDiskSpaceMarginPromille SetPDiskSpaceMarginPromille = 37; TUpdateSettings UpdateSettings = 38; @@ -555,8 +555,8 @@ message TBaseConfig { uint64 BoxId = 5; uint64 StoragePoolId = 6; bool SeenOperational = 7; - TGroupStatus.E OperatingStatus = 8; // group status based on latest VDisk reports only - TGroupStatus.E ExpectedStatus = 9; // status based not only on operational report, but on PDisk status and plans too + TGroupStatus.E OperatingStatus = 8; // group status based on latest VDisk reports only + TGroupStatus.E ExpectedStatus = 9; // status based not only on operational report, but on PDisk status and plans too } message TNode { uint32 NodeId = 1; @@ -611,7 +611,7 @@ message TConfigResponse { kVDiskIdIncorrect = 6; kVSlotNotFound = 7; kDiskIsNotDonor = 8; - kAlready = 9; + kAlready = 9; kMayGetDegraded = 10; } diff --git a/ydb/core/protos/blobstorage_pdisk_config.proto b/ydb/core/protos/blobstorage_pdisk_config.proto index b574016542..00ea5c7e22 100644 --- a/ydb/core/protos/blobstorage_pdisk_config.proto +++ b/ydb/core/protos/blobstorage_pdisk_config.proto @@ -13,36 +13,36 @@ message TPDiskConfig { ForceDisable = 4; }; - /////////////////// Depracated ////////////////////////// - //optional uint64 RootSchedulerQuant = 101; - //optional uint64 OwnerSchedulerQuant = 102; - //optional uint64 OwnerSchedulerBurst = 103; - //optional uint64 BytesSchedulerQuant = 104; - //optional uint64 BytesSchedulerBurst = 106; - //optional uint64 SyncLogBurst = 115; - //optional uint64 HugeBurst = 116; - //optional uint64 FastReadBurst = 117; - //optional uint64 OtherReadBurst = 118; - //optional uint64 LoadBurst = 119; - //optional uint64 LowReadBurst = 121; - //optional uint64 TrimCostNs = 5; // DEPRECATED, has no effect! (Use zero to disable TRIM) - //optional uint64 StaticGroupChunkReservePerMile = 1003; // DEPRECATED, has no effect! - //optional uint64 AntiLockBrakingSystemPerMile = 1002; // 0 to disable, 1000 for full strength - //optional uint64 HistorySize = 1004; // History record count, displayed in http monitoring - //optional uint64 DriveModelStepSizeBytes = 1007; // Not used anymore - //optional uint64 UnusedBurstInflow = 1010; - //optional uint64 LogQDurationMs = 1012; - //optional uint64 RealtimeReadQDurationMs = 1013; - //optional uint64 FastReadQDurationMs = 1014; - //optional uint64 BatchQDurationMs = 1015; - //optional uint64 TrimQDurationMs = 1016; - //optional uint64 SensitiveQsDurationNs = 1017; - //optional uint64 BestEffortQsDurationNs = 1018; - //optional uint64 AdhesionSizeNs = 1019; - //optional uint64 AdhesionLookupCost = 1020; - //optional uint64 AdhesionLookupCount = 1021; - //////////////////////////////////////////////////////////////////////////// - + /////////////////// Depracated ////////////////////////// + //optional uint64 RootSchedulerQuant = 101; + //optional uint64 OwnerSchedulerQuant = 102; + //optional uint64 OwnerSchedulerBurst = 103; + //optional uint64 BytesSchedulerQuant = 104; + //optional uint64 BytesSchedulerBurst = 106; + //optional uint64 SyncLogBurst = 115; + //optional uint64 HugeBurst = 116; + //optional uint64 FastReadBurst = 117; + //optional uint64 OtherReadBurst = 118; + //optional uint64 LoadBurst = 119; + //optional uint64 LowReadBurst = 121; + //optional uint64 TrimCostNs = 5; // DEPRECATED, has no effect! (Use zero to disable TRIM) + //optional uint64 StaticGroupChunkReservePerMile = 1003; // DEPRECATED, has no effect! + //optional uint64 AntiLockBrakingSystemPerMile = 1002; // 0 to disable, 1000 for full strength + //optional uint64 HistorySize = 1004; // History record count, displayed in http monitoring + //optional uint64 DriveModelStepSizeBytes = 1007; // Not used anymore + //optional uint64 UnusedBurstInflow = 1010; + //optional uint64 LogQDurationMs = 1012; + //optional uint64 RealtimeReadQDurationMs = 1013; + //optional uint64 FastReadQDurationMs = 1014; + //optional uint64 BatchQDurationMs = 1015; + //optional uint64 TrimQDurationMs = 1016; + //optional uint64 SensitiveQsDurationNs = 1017; + //optional uint64 BestEffortQsDurationNs = 1018; + //optional uint64 AdhesionSizeNs = 1019; + //optional uint64 AdhesionLookupCost = 1020; + //optional uint64 AdhesionLookupCount = 1021; + //////////////////////////////////////////////////////////////////////////// + optional uint64 StatisticsUpdateIntervalMs = 100; optional uint64 BytesSchedulerWeight = 105; @@ -66,10 +66,10 @@ message TPDiskConfig { optional uint64 SortFreeChunksPerItems = 4; - optional uint32 SectorSize = 6; - optional uint32 ChunkSize = 7; + optional uint32 SectorSize = 6; + optional uint32 ChunkSize = 7; + - optional ESwitch GetDriveDataSwitch = 1000; // Disable is same as DoNotTouch. optional ESwitch WriteCacheSwitch = 1001; // non-force versions need GetDriveDataSwitch enabled to work. @@ -80,12 +80,12 @@ message TPDiskConfig { optional uint64 DriveModelTrimSpeedBps = 1009; optional uint64 ReorderingMs = 1011; optional uint64 CostLimitNs = 1022; - optional uint64 DeviceInFlight = 1023; + optional uint64 DeviceInFlight = 1023; + + optional uint32 BufferPoolBufferSizeBytes = 1024; + optional uint32 BufferPoolBufferCount = 1025; + optional uint32 MaxQueuedCompletionActions = 1026; - optional uint32 BufferPoolBufferSizeBytes = 1024; - optional uint32 BufferPoolBufferCount = 1025; - optional uint32 MaxQueuedCompletionActions = 1026; - optional uint64 InsaneLogChunksMultiplier = 2000; // Log of cutThreshold * InsaneLogChunksMultiplier is insane optional uint64 ExpectedSlotCount = 2001; // Number of slots to calculate per-vdisk disk space limit. diff --git a/ydb/core/protos/blobstorage_vdisk_config.proto b/ydb/core/protos/blobstorage_vdisk_config.proto index 0fc711beb0..5b800c1cf6 100644 --- a/ydb/core/protos/blobstorage_vdisk_config.proto +++ b/ydb/core/protos/blobstorage_vdisk_config.proto @@ -21,7 +21,7 @@ message TVDiskConfig { optional uint32 ReplInterconnectChannel = 50; optional bool BarrierValidation = 60; - optional bool EnableOverseerLsnReporting = 61; // deprecated + optional bool EnableOverseerLsnReporting = 61; // deprecated }; // organizes hierarchy of VDisk configs: VDisk config may have a base config, diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index 7ff9ecfa3c..d64169d4fc 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -72,7 +72,7 @@ message TActorSystemConfig { optional uint64 SpinThreshold = 2; optional uint64 ProgressThreshold = 3; optional TAffinity Affinity = 4; - optional bool UseSchedulerActor = 5; + optional bool UseSchedulerActor = 5; } repeated TExecutor Executor = 1; @@ -230,7 +230,7 @@ message TDomainsConfig { message TBlobStorageConfig { optional NKikimrBlobStorage.TNodeWardenServiceSet ServiceSet = 1; - optional bool EnableOverseerLsnReporting = 2 [default = false]; // deprecated + optional bool EnableOverseerLsnReporting = 2 [default = false]; // deprecated optional string CacheFilePath = 3; optional bool CachePDisks = 4 [default = true]; optional bool CacheVDisks = 5 [default = true]; @@ -606,19 +606,19 @@ message TDynamicNodeConfig { optional string NodeHost = 6; //DEPRECATED } -message TFeatureFlags { +message TFeatureFlags { enum Tribool { UNSET = 0; VALUE_TRUE = 1; VALUE_FALSE = 2; } - optional bool EnableSeparateSolomonShardForPDisk = 1 [default = true]; + optional bool EnableSeparateSolomonShardForPDisk = 1 [default = true]; optional bool UseForsetiSchedulerByDefaultForPDisk = 2 [default = true]; optional bool EnableSeparateTrimThreadForPDisk = 3 [default = true]; optional bool EnableSeparateSubmitThreadForPDisk = 4 [default = true]; optional bool EnablePerOwnerChunkLimitForPDisk = 5 [default = false]; - optional bool TrimEntireDeviceOnStartup = 6 [default = false]; + optional bool TrimEntireDeviceOnStartup = 6 [default = false]; optional bool EnableChunkGraceForPDisk = 7 [default = true]; optional bool AllowConsistentOperationsForSchemeShard = 8 [default = true]; optional bool EnableSchemeBoard = 9 [default = true]; // deprecated: always true @@ -670,14 +670,14 @@ message TFeatureFlags { optional bool EnableTtlOnAsyncIndexedTables = 55 [default = false]; optional bool EnableBulkUpsertToAsyncIndexedTables = 56 [default = false]; optional bool EnableNodeBrokerSingleDomainMode = 57 [default = false]; - optional bool EnableKqpSessionActor = 58 [default = false]; + optional bool EnableKqpSessionActor = 58 [default = false]; // Dangerous settings we don't want in the public api unless required optional bool EnablePublicApiExternalBlobs = 59 [default = false]; optional bool EnablePublicApiKeepInMemory = 60 [default = false]; optional bool EnableImplicitScanQueryInScripts = 61 [default = true]; optional bool EnablePredicateExtractForScanQueries = 62 [default = true]; -} - +} + message TSqsConfig { optional bool EnableSqs = 5; optional string Root = 1 [default = "/Root/SQS"]; @@ -1355,7 +1355,7 @@ message TAppConfig { //optional TLocalConfig LocalConfig = 23; DEPRECATED optional TDynamicNodeConfig DynamicNodeConfig = 24; optional NKikimrCms.TCmsConfig CmsConfig = 25; - optional TFeatureFlags FeatureFlags = 26; + optional TFeatureFlags FeatureFlags = 26; optional TSqsConfig SqsConfig = 27; optional NKikimrPQ.TPQConfig PQConfig = 28; optional NKikimrTenantPool.TTenantPoolConfig TenantPoolConfig = 29; @@ -1364,7 +1364,7 @@ message TAppConfig { optional TConfigsDispatcherConfig ConfigsDispatcherConfig = 33; optional TTableProfilesConfig TableProfilesConfig = 34; optional NKikimrProto.TKeyConfig KeyConfig = 35; - optional NKikimrProto.TKeyConfig PDiskKeyConfig = 51; + optional NKikimrProto.TKeyConfig PDiskKeyConfig = 51; optional NKikimrNodeBroker.TConfig NodeBrokerConfig = 36; optional TTableServiceConfig TableServiceConfig = 37; optional NKikimrSharedCache.TSharedCacheConfig SharedCacheConfig = 38; // dynamic configuration via cms diff --git a/ydb/core/protos/counters_bs_controller.proto b/ydb/core/protos/counters_bs_controller.proto index ecc47736c0..c0bd88bea1 100644 --- a/ydb/core/protos/counters_bs_controller.proto +++ b/ydb/core/protos/counters_bs_controller.proto @@ -12,14 +12,14 @@ enum ESimpleCounters { COUNTER_SLOTS_ON_FAULTY_DISKS = 2 [(CounterOpts) = {Name: "SlotsOnFaultyDisks"}]; COUNTER_BYTES_ON_FAULTY_DISKS = 3 [(CounterOpts) = {Name: "BytesOnFaultyDisks"}]; COUNTER_PDISKS_WITHOUT_EXPECTED_SLOT_COUNT = 4 [(CounterOpts) = {Name: "PDisksWithoutExpectedSlotCount"}]; - COUNTER_TO_BE_REMOVED_DISKS = 5 [(CounterOpts) = {Name: "ToBeRemovedDisks"}]; - COUNTER_GROUPS_WITH_SLOTS_ON_TO_BE_REMOVED_DISKS = 6 [(CounterOpts) = {Name: "GroupsWithSlotsOnToBeRemovedDisks"}]; - COUNTER_SLOTS_ON_TO_BE_REMOVED_DISKS = 7 [(CounterOpts) = {Name: "SlotsOnToBeRemovedDisks"}]; - COUNTER_BYTES_ON_TO_BE_REMOVED_DISKS = 8 [(CounterOpts) = {Name: "BytesOnToBeRemovedDisks"}]; - COUNTER_PDISKS_WITHOUT_EXPECTED_SERIAL = 9 [(CounterOpts) = {Name: "PDisksWithoutExpectedSerial"}]; - COUNTER_DRIVE_SERIAL_NOT_SEEN = 10 [(CounterOpts) = {Name: "DriveSerialNotSeen"}]; - COUNTER_DRIVE_SERIAL_REMOVED = 11 [(CounterOpts) = {Name: "DriveSerialRemoved"}]; - COUNTER_DRIVE_SERIAL_ERROR = 12 [(CounterOpts) = {Name: "DriveSerialError"}]; + COUNTER_TO_BE_REMOVED_DISKS = 5 [(CounterOpts) = {Name: "ToBeRemovedDisks"}]; + COUNTER_GROUPS_WITH_SLOTS_ON_TO_BE_REMOVED_DISKS = 6 [(CounterOpts) = {Name: "GroupsWithSlotsOnToBeRemovedDisks"}]; + COUNTER_SLOTS_ON_TO_BE_REMOVED_DISKS = 7 [(CounterOpts) = {Name: "SlotsOnToBeRemovedDisks"}]; + COUNTER_BYTES_ON_TO_BE_REMOVED_DISKS = 8 [(CounterOpts) = {Name: "BytesOnToBeRemovedDisks"}]; + COUNTER_PDISKS_WITHOUT_EXPECTED_SERIAL = 9 [(CounterOpts) = {Name: "PDisksWithoutExpectedSerial"}]; + COUNTER_DRIVE_SERIAL_NOT_SEEN = 10 [(CounterOpts) = {Name: "DriveSerialNotSeen"}]; + COUNTER_DRIVE_SERIAL_REMOVED = 11 [(CounterOpts) = {Name: "DriveSerialRemoved"}]; + COUNTER_DRIVE_SERIAL_ERROR = 12 [(CounterOpts) = {Name: "DriveSerialError"}]; COUNTER_DISK_SCRUB_WAITING_FOR_START = 13 [(CounterOpts) = {Name: "DiskScrubWaitingForStart"}]; COUNTER_DISK_SCRUB_RUNNING = 14 [(CounterOpts) = {Name: "DiskScrubRunning"}]; COUNTER_DISK_SCRUB_IN_PROGRESS = 15 [(CounterOpts) = {Name: "DiskScrubInProgress"}]; @@ -132,40 +132,40 @@ enum EPercentileCounters { Ranges { Value: 21600 Name: "21600" } Ranges { Value: 86400 Name: "inf" } }]; - - COUNTER_TO_BE_REMOVED_USETTLED_PDISKS = 2 [(CounterOpts) = { - Name: "ToBeRemovedUnsettledPDisks" - Integral: true - Ranges { Value: 0 Name: "0" } - Ranges { Value: 10 Name: "10" } - Ranges { Value: 20 Name: "20" } - Ranges { Value: 30 Name: "30" } - Ranges { Value: 40 Name: "40" } - Ranges { Value: 50 Name: "50" } - Ranges { Value: 60 Name: "60" } - Ranges { Value: 120 Name: "120" } - Ranges { Value: 180 Name: "180" } - Ranges { Value: 240 Name: "240" } - Ranges { Value: 300 Name: "300" } - Ranges { Value: 360 Name: "360" } - Ranges { Value: 420 Name: "420" } - Ranges { Value: 480 Name: "480" } - Ranges { Value: 540 Name: "540" } - Ranges { Value: 600 Name: "600" } - Ranges { Value: 1200 Name: "1200" } - Ranges { Value: 1800 Name: "1800" } - Ranges { Value: 2400 Name: "2400" } - Ranges { Value: 3000 Name: "3000" } - Ranges { Value: 3600 Name: "3600" } - Ranges { Value: 5400 Name: "5400" } - Ranges { Value: 7200 Name: "7200" } - Ranges { Value: 9000 Name: "9000" } - Ranges { Value: 10800 Name: "10800" } - Ranges { Value: 14400 Name: "14400" } - Ranges { Value: 18000 Name: "18000" } - Ranges { Value: 21600 Name: "21600" } - Ranges { Value: 86400 Name: "inf" } - }]; + + COUNTER_TO_BE_REMOVED_USETTLED_PDISKS = 2 [(CounterOpts) = { + Name: "ToBeRemovedUnsettledPDisks" + Integral: true + Ranges { Value: 0 Name: "0" } + Ranges { Value: 10 Name: "10" } + Ranges { Value: 20 Name: "20" } + Ranges { Value: 30 Name: "30" } + Ranges { Value: 40 Name: "40" } + Ranges { Value: 50 Name: "50" } + Ranges { Value: 60 Name: "60" } + Ranges { Value: 120 Name: "120" } + Ranges { Value: 180 Name: "180" } + Ranges { Value: 240 Name: "240" } + Ranges { Value: 300 Name: "300" } + Ranges { Value: 360 Name: "360" } + Ranges { Value: 420 Name: "420" } + Ranges { Value: 480 Name: "480" } + Ranges { Value: 540 Name: "540" } + Ranges { Value: 600 Name: "600" } + Ranges { Value: 1200 Name: "1200" } + Ranges { Value: 1800 Name: "1800" } + Ranges { Value: 2400 Name: "2400" } + Ranges { Value: 3000 Name: "3000" } + Ranges { Value: 3600 Name: "3600" } + Ranges { Value: 5400 Name: "5400" } + Ranges { Value: 7200 Name: "7200" } + Ranges { Value: 9000 Name: "9000" } + Ranges { Value: 10800 Name: "10800" } + Ranges { Value: 14400 Name: "14400" } + Ranges { Value: 18000 Name: "18000" } + Ranges { Value: 21600 Name: "21600" } + Ranges { Value: 86400 Name: "inf" } + }]; COUNTER_NUM_NOT_READY_VDISKS = 3 [(CounterOpts) = { Name: "NumNotReadyVDisks" @@ -213,5 +213,5 @@ enum ETxTypes { TXTYPE_SCRUB_START = 19 [(TxTypeOpts) = {Name: "TTxScrubStart"}]; TXTYPE_SCRUB_QUANTUM_FINISHED = 20 [(TxTypeOpts) = {Name: "TTxScrubQuantumFinished"}]; TXTYPE_UPDATE_LAST_SEEN_READY = 21 [(TxTypeOpts) = {Name: "TTxUpdateLastSeenReady"}]; - TXTYPE_UPDATE_NODE_DRIVES = 22 [(TxTypeOpts) = {Name: "TTxUpdateNodeDrives"}]; + TXTYPE_UPDATE_NODE_DRIVES = 22 [(TxTypeOpts) = {Name: "TTxUpdateNodeDrives"}]; } diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index 5c0270ea01..33e598c1c2 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -1420,7 +1420,7 @@ message TBackupProgress { repeated TShardError Errors = 5; optional TYTSettings YTSettings = 6; optional uint64 DataTotalSize = 7; - optional uint64 TxId = 8; + optional uint64 TxId = 8; } message TLastBackupResult { @@ -1430,7 +1430,7 @@ message TLastBackupResult { repeated TShardError Errors = 4; optional TYTSettings YTSettings = 5; optional uint64 DataTotalSize = 6; - optional uint64 TxId = 7; + optional uint64 TxId = 7; }; // Result for TDescribePath request diff --git a/ydb/core/protos/node_whiteboard.proto b/ydb/core/protos/node_whiteboard.proto index 486ecdb95e..b96713ca29 100644 --- a/ydb/core/protos/node_whiteboard.proto +++ b/ydb/core/protos/node_whiteboard.proto @@ -13,14 +13,14 @@ extend google.protobuf.FieldOptions { optional uint32 InsignificantChangePercent = 70554; } -enum EFlag { - Grey = 0; - Green = 1; - Yellow = 2; - Orange = 3; - Red = 4; -} - +enum EFlag { + Grey = 0; + Green = 1; + Yellow = 2; + Orange = 3; + Red = 4; +} + message TCustomTabletAttribute { optional uint32 Key = 1; // implementation-dependent optional string Value = 2; @@ -114,7 +114,7 @@ message TPDiskStateInfo { optional EFlag StateFlag = 16; // overall state - to be filled optional EFlag Overall = 17; - optional string SerialNumber = 18; + optional string SerialNumber = 18; } message TEvPDiskStateRequest { @@ -183,9 +183,9 @@ message TVDiskStateInfo { // How many unsynced VDisks from current BlobStorage group we see optional uint64 UnsyncedVDisks = 15 [default = 0]; // How much this VDisk have allocated on corresponding PDisk - optional uint64 AllocatedSize = 16 [(InsignificantChangeAmount) = 536870912]; // 512MiB - // How much space is available for VDisk corresponding to PDisk's hard space limits - optional uint64 AvailableSize = 28 [(InsignificantChangeAmount) = 536870912]; // 512MiB + optional uint64 AllocatedSize = 16 [(InsignificantChangeAmount) = 536870912]; // 512MiB + // How much space is available for VDisk corresponding to PDisk's hard space limits + optional uint64 AvailableSize = 28 [(InsignificantChangeAmount) = 536870912]; // 512MiB // Does this disk has some unreadable but not yet restored blobs? optional bool HasUnreadableBlobs = 24; optional fixed64 IncarnationGuid = 25; @@ -287,7 +287,7 @@ message TSystemStateInfo { optional uint64 MemoryLimit = 27; optional EConfigState ConfigState = 28 [default = Consistent]; optional uint64 MemoryUsedInAlloc = 29; - optional double MaxDiskUsage = 30; + optional double MaxDiskUsage = 30; optional NActorsInterconnect.TNodeLocation Location = 31; } diff --git a/ydb/core/protos/out/out.cpp b/ydb/core/protos/out/out.cpp index 4e647e2719..28c49a5c36 100644 --- a/ydb/core/protos/out/out.cpp +++ b/ydb/core/protos/out/out.cpp @@ -69,18 +69,18 @@ Y_DECLARE_OUT_SPEC(, NKikimrBlobStorage::EDriveStatus, stream, value) { stream << NKikimrBlobStorage::EDriveStatus_Name(value); } -Y_DECLARE_OUT_SPEC(, NKikimrBlobStorage::TGroupStatus::E, stream, value) { - stream << NKikimrBlobStorage::TGroupStatus::E_Name(value); -} - -Y_DECLARE_OUT_SPEC(, NKikimrBlobStorage::TDriveLifeStage::E, stream, value) { - stream << NKikimrBlobStorage::TDriveLifeStage::E_Name(value); -} - -Y_DECLARE_OUT_SPEC(, NKikimrBlobStorage::TSerialManagementStage::E, stream, value) { - stream << NKikimrBlobStorage::TSerialManagementStage::E_Name(value); -} - +Y_DECLARE_OUT_SPEC(, NKikimrBlobStorage::TGroupStatus::E, stream, value) { + stream << NKikimrBlobStorage::TGroupStatus::E_Name(value); +} + +Y_DECLARE_OUT_SPEC(, NKikimrBlobStorage::TDriveLifeStage::E, stream, value) { + stream << NKikimrBlobStorage::TDriveLifeStage::E_Name(value); +} + +Y_DECLARE_OUT_SPEC(, NKikimrBlobStorage::TSerialManagementStage::E, stream, value) { + stream << NKikimrBlobStorage::TSerialManagementStage::E_Name(value); +} + Y_DECLARE_OUT_SPEC(, NKikimrResourceBroker::EResourceType, stream, value) { stream << NKikimrResourceBroker::EResourceType_Name(value); } @@ -105,10 +105,10 @@ Y_DECLARE_OUT_SPEC(, NKikimrBlobStorage::EPDiskType, stream, value) { stream << NKikimrBlobStorage::EPDiskType_Name(value); } -Y_DECLARE_OUT_SPEC(, NKikimrBlobStorage::EVDiskStatus, stream, value) { - stream << NKikimrBlobStorage::EVDiskStatus_Name(value); -} - +Y_DECLARE_OUT_SPEC(, NKikimrBlobStorage::EVDiskStatus, stream, value) { + stream << NKikimrBlobStorage::EVDiskStatus_Name(value); +} + Y_DECLARE_OUT_SPEC(, NKikimrNodeBroker::TStatus::ECode, stream, value) { stream << NKikimrNodeBroker::TStatus::ECode_Name(value); } @@ -144,23 +144,23 @@ Y_DECLARE_OUT_SPEC(, NKikimrTxDataShard::EDatashardState, stream, value) { Y_DECLARE_OUT_SPEC(, NKikimrBlobStorage::TPDiskState::E, stream, value) { stream << NKikimrBlobStorage::TPDiskState::E_Name(value); } - -Y_DECLARE_OUT_SPEC(, NKikimrBlobStorage::TPDiskSpaceColor::E, stream, value) { - stream << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(value); -} - -Y_DECLARE_OUT_SPEC(, NKikimrWhiteboard::EFlag, stream, value) { - stream << NKikimrWhiteboard::EFlag_Name(value); -} + +Y_DECLARE_OUT_SPEC(, NKikimrBlobStorage::TPDiskSpaceColor::E, stream, value) { + stream << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(value); +} + +Y_DECLARE_OUT_SPEC(, NKikimrWhiteboard::EFlag, stream, value) { + stream << NKikimrWhiteboard::EFlag_Name(value); +} Y_DECLARE_OUT_SPEC(, NKikimrTxDataShard::TEvCompactTableResult::EStatus, stream, value) { stream << NKikimrTxDataShard::TEvCompactTableResult::EStatus_Name(value); } - -Y_DECLARE_OUT_SPEC(, NKikimrKqp::EQueryAction, stream, value) { - stream << NKikimrKqp::EQueryAction_Name(value); -} - -Y_DECLARE_OUT_SPEC(, NKikimrKqp::EQueryType, stream, value) { - stream << NKikimrKqp::EQueryType_Name(value); -} + +Y_DECLARE_OUT_SPEC(, NKikimrKqp::EQueryAction, stream, value) { + stream << NKikimrKqp::EQueryAction_Name(value); +} + +Y_DECLARE_OUT_SPEC(, NKikimrKqp::EQueryType, stream, value) { + stream << NKikimrKqp::EQueryType_Name(value); +} diff --git a/ydb/core/protos/services.proto b/ydb/core/protos/services.proto index 6d7f7f09c6..c17c8a7dc3 100644 --- a/ydb/core/protos/services.proto +++ b/ydb/core/protos/services.proto @@ -28,7 +28,7 @@ enum EServiceKikimr { BS_SYNCER = 272; BS_REPL = 273; BS_PDISK = 274; - BS_PDISK_TEST = 1102; + BS_PDISK_TEST = 1102; BS_YARD = 275; BS_PROXY = 276; BS_NODE = 277; @@ -218,7 +218,7 @@ enum EServiceKikimr { KQP_COMPILE_REQUEST = 542; KQP_NODE = 543; KQP_LOAD_TEST = 544; - KQP_SESSION = 545; + KQP_SESSION = 545; TABLET_RESOURCE_BROKER = 540; @@ -600,7 +600,7 @@ message TActivity { INTERCONNECT_POLLER = 285; INTERCONNECT_SESSION_KILLER = 286; IMMEDIATE_CONTROL_BOARD = 287; - DS_PROXY_OVERSEER_ACTOR = 288; // deprecated + DS_PROXY_OVERSEER_ACTOR = 288; // deprecated KQP_REQUEST_HANDLER = 289; KESUS_RESOLVE_ACTOR = 290; HIVE_MON_REQUEST = 291; @@ -856,7 +856,7 @@ message TActivity { KQP_TEST_WORKLOAD = 550; PQ_PARTITION_WRITER_ACTOR = 551; CHANGE_SENDER_CDC_ACTOR_PARTITION = 552; - KQP_SESSION_ACTOR = 553; + KQP_SESSION_ACTOR = 553; CHANGE_EXCHANGE_SPLIT_ACTOR = 554; BS_DEFRAG_REWRITER = 555; BS_DEFRAG_SCHEDULER = 556; diff --git a/ydb/core/tablet/node_whiteboard.cpp b/ydb/core/tablet/node_whiteboard.cpp index cc2b48baed..783758e99d 100644 --- a/ydb/core/tablet/node_whiteboard.cpp +++ b/ydb/core/tablet/node_whiteboard.cpp @@ -351,7 +351,7 @@ protected: HFunc(TEvWhiteboard::TEvNodeStateRequest, Handle); HFunc(TEvWhiteboard::TEvPDiskStateUpdate, Handle); HFunc(TEvWhiteboard::TEvPDiskStateRequest, Handle); - HFunc(TEvWhiteboard::TEvPDiskStateDelete, Handle); + HFunc(TEvWhiteboard::TEvPDiskStateDelete, Handle); HFunc(TEvWhiteboard::TEvVDiskStateUpdate, Handle); HFunc(TEvWhiteboard::TEvVDiskStateGenerationChange, Handle); HFunc(TEvWhiteboard::TEvVDiskStateDelete, Handle); @@ -504,7 +504,7 @@ protected: NKikimrWhiteboard::EFlag eFlag = NKikimrWhiteboard::EFlag::Green; NKikimrWhiteboard::EFlag pDiskFlag = NKikimrWhiteboard::EFlag::Green; ui32 yellowFlags = 0; - double maxDiskUsage = 0; + double maxDiskUsage = 0; for (const auto& pr : PDiskStateInfo) { if (!pr.second.HasState()) { pDiskFlag = std::max(pDiskFlag, NKikimrWhiteboard::EFlag::Yellow); @@ -537,10 +537,10 @@ protected: pDiskFlag = std::max(pDiskFlag, NKikimrWhiteboard::EFlag::Yellow); ++yellowFlags; } - maxDiskUsage = std::max(maxDiskUsage, 1.0 - avail); + maxDiskUsage = std::max(maxDiskUsage, 1.0 - avail); } } - SystemStateInfo.SetMaxDiskUsage(maxDiskUsage); + SystemStateInfo.SetMaxDiskUsage(maxDiskUsage); if (pDiskFlag == NKikimrWhiteboard::EFlag::Yellow) { switch (yellowFlags) { case 1: @@ -646,16 +646,16 @@ protected: ctx.Send(ev->Sender, response.Release(), 0, ev->Cookie); } - void Handle(TEvWhiteboard::TEvPDiskStateDelete::TPtr &ev, const TActorContext &ctx) { - auto pdiskId = ev->Get()->Record.GetPDiskId(); - - auto it = PDiskStateInfo.find(pdiskId); - if (it != PDiskStateInfo.end()) { - PDiskStateInfo.erase(it); - UpdateSystemState(ctx); - } - } - + void Handle(TEvWhiteboard::TEvPDiskStateDelete::TPtr &ev, const TActorContext &ctx) { + auto pdiskId = ev->Get()->Record.GetPDiskId(); + + auto it = PDiskStateInfo.find(pdiskId); + if (it != PDiskStateInfo.end()) { + PDiskStateInfo.erase(it); + UpdateSystemState(ctx); + } + } + void Handle(TEvWhiteboard::TEvVDiskStateRequest::TPtr &ev, const TActorContext &ctx) { const auto& request = ev->Get()->Record; ui64 changedSince = request.HasChangedSince() ? request.GetChangedSince() : 0; diff --git a/ydb/core/test_tablet/load_actor_write.cpp b/ydb/core/test_tablet/load_actor_write.cpp index 67a5dbfa12..719a0a334c 100644 --- a/ydb/core/test_tablet/load_actor_write.cpp +++ b/ydb/core/test_tablet/load_actor_write.cpp @@ -1,16 +1,16 @@ #include "load_actor_impl.h" #include <ydb/core/util/lz4_data_generator.h> - + namespace NKikimr::NTestShard { void TLoadActor::GenerateKeyValue(TString *key, TString *value, bool *isInline) { - const size_t len = GenerateRandomSize(Settings.GetSizes(), isInline) + sizeof(ui64); - ui64 seed = TAppData::RandomProvider->GenRand64(); + const size_t len = GenerateRandomSize(Settings.GetSizes(), isInline) + sizeof(ui64); + ui64 seed = TAppData::RandomProvider->GenRand64(); TString data = FastGenDataForLZ4(len, seed); - char *charData = data.Detach(); - for (size_t i = 0; i < Min<size_t>(sizeof(seed), data.size()); ++i) { - charData[i] = *(reinterpret_cast<char*>(&seed) + i); + char *charData = data.Detach(); + for (size_t i = 0; i < Min<size_t>(sizeof(seed), data.size()); ++i) { + charData[i] = *(reinterpret_cast<char*>(&seed) + i); } *key = MD5::Calc(data); *value = std::move(data); diff --git a/ydb/core/testlib/actors/test_runtime.cpp b/ydb/core/testlib/actors/test_runtime.cpp index 2dbdf2986a..63f0436bff 100644 --- a/ydb/core/testlib/actors/test_runtime.cpp +++ b/ydb/core/testlib/actors/test_runtime.cpp @@ -119,7 +119,7 @@ namespace NActors { node->LogSettings->MessagePrefix = " node " + ToString(nodeId); auto* nodeAppData = node->GetAppData<NKikimr::TAppData>(); - nodeAppData->DataShardExportFactory = app0->DataShardExportFactory; + nodeAppData->DataShardExportFactory = app0->DataShardExportFactory; nodeAppData->DomainsInfo = app0->DomainsInfo; nodeAppData->ChannelProfiles = app0->ChannelProfiles; nodeAppData->Counters = node->DynamicCounters; diff --git a/ydb/core/testlib/basics/appdata.h b/ydb/core/testlib/basics/appdata.h index 3b03385bdf..409869161f 100644 --- a/ydb/core/testlib/basics/appdata.h +++ b/ydb/core/testlib/basics/appdata.h @@ -14,8 +14,8 @@ namespace NKikimr { // FIXME - // Split this factory - class TDataShardExportFactory : public NKikimr::NDataShard::IExportFactory { + // Split this factory + class TDataShardExportFactory : public NKikimr::NDataShard::IExportFactory { public: NKikimr::NDataShard::IExport* CreateExportToYt(bool useTypeV3) const override { Y_UNUSED(useTypeV3); @@ -39,8 +39,8 @@ namespace NKikimr { TIntrusivePtr<NScheme::TTypeRegistry> Types; TIntrusivePtr<NMiniKQL::IFunctionRegistry> Funcs; TIntrusivePtr<TFormatFactory> Formats; - std::shared_ptr<NDataShard::IExportFactory> DataShardExportFactory; - std::shared_ptr<NPDisk::IIoContextFactory> IoContext; + std::shared_ptr<NDataShard::IExportFactory> DataShardExportFactory; + std::shared_ptr<NPDisk::IIoContextFactory> IoContext; ~TMine(); }; diff --git a/ydb/core/testlib/basics/storage.h b/ydb/core/testlib/basics/storage.h index 0e691ef89e..36cee17818 100644 --- a/ydb/core/testlib/basics/storage.h +++ b/ydb/core/testlib/basics/storage.h @@ -66,7 +66,7 @@ namespace NKikimr { FormatPDisk(PDiskPath, Conf.DiskSize, Conf.SectorSize, Conf.ChunkSize, PDiskGuid, 0x123 + salt, 0x456 + salt, 0x789 + salt, mainKey, - "", false, false, SectorMap); + "", false, false, SectorMap); } } diff --git a/ydb/core/testlib/basics/ya.make b/ydb/core/testlib/basics/ya.make index 0543e9b1b1..61f74922f4 100644 --- a/ydb/core/testlib/basics/ya.make +++ b/ydb/core/testlib/basics/ya.make @@ -1,6 +1,6 @@ LIBRARY() -OWNER(g:kikimr) +OWNER(g:kikimr) SRCS( appdata.cpp diff --git a/ydb/core/testlib/tablet_helpers.cpp b/ydb/core/testlib/tablet_helpers.cpp index d4e0dbe26c..a853040b93 100644 --- a/ydb/core/testlib/tablet_helpers.cpp +++ b/ydb/core/testlib/tablet_helpers.cpp @@ -722,9 +722,9 @@ namespace NKikimr { NKikimrBlobStorage::TDefineHostConfig hostConfig; hostConfig.SetHostConfigId(nodeId); - TString path = TStringBuilder() << runtime.GetTempDir() << "pdisk_1.dat"; - hostConfig.AddDrive()->SetPath(path); - Cerr << "tablet_helpers.cpp: SetPath # " << path << Endl; + TString path = TStringBuilder() << runtime.GetTempDir() << "pdisk_1.dat"; + hostConfig.AddDrive()->SetPath(path); + Cerr << "tablet_helpers.cpp: SetPath # " << path << Endl; bsConfigureRequest->Record.MutableRequest()->AddCommand()->MutableDefineHostConfig()->CopyFrom(hostConfig); auto &host = *boxConfig.AddHost(); diff --git a/ydb/core/testlib/test_client.cpp b/ydb/core/testlib/test_client.cpp index 08be0a3592..d4907c26f1 100644 --- a/ydb/core/testlib/test_client.cpp +++ b/ydb/core/testlib/test_client.cpp @@ -396,9 +396,9 @@ namespace Tests { NKikimrBlobStorage::TDefineHostConfig hostConfig; hostConfig.SetHostConfigId(nodeId); - TString path = TStringBuilder() << Runtime->GetTempDir() << "pdisk_1.dat"; - hostConfig.AddDrive()->SetPath(path); - Cerr << "test_client.cpp: SetPath # " << path << Endl; + TString path = TStringBuilder() << Runtime->GetTempDir() << "pdisk_1.dat"; + hostConfig.AddDrive()->SetPath(path); + Cerr << "test_client.cpp: SetPath # " << path << Endl; bsConfigureRequest->Record.MutableRequest()->AddCommand()->MutableDefineHostConfig()->CopyFrom(hostConfig); auto& host = *boxConfig.AddHost(); diff --git a/ydb/core/tx/datashard/export_iface.h b/ydb/core/tx/datashard/export_iface.h index e886dcc1ae..2374a9ab8b 100644 --- a/ydb/core/tx/datashard/export_iface.h +++ b/ydb/core/tx/datashard/export_iface.h @@ -26,7 +26,7 @@ public: virtual void Shutdown() const = 0; }; -class IExportFactory { +class IExportFactory { public: virtual ~IExportFactory() = default; diff --git a/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp b/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp index 2c71593453..9cfd8037b7 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp @@ -481,7 +481,7 @@ private: bool PrevVal; }; -NSchemeShardUT_Private::TTestEnv::TTestEnv(TTestActorRuntime& runtime, const TTestEnvOptions& opts, TSchemeShardFactory ssFactory, std::shared_ptr<NKikimr::NDataShard::IExportFactory> dsExportFactory) +NSchemeShardUT_Private::TTestEnv::TTestEnv(TTestActorRuntime& runtime, const TTestEnvOptions& opts, TSchemeShardFactory ssFactory, std::shared_ptr<NKikimr::NDataShard::IExportFactory> dsExportFactory) : SchemeShardFactory(ssFactory) , HiveState(new TFakeHiveState) , CoordinatorState(new TFakeCoordinator::TState) @@ -492,7 +492,7 @@ NSchemeShardUT_Private::TTestEnv::TTestEnv(TTestActorRuntime& runtime, const TTe ui64 coordinator = TTestTxConfig::Coordinator; ui64 txAllocator = TTestTxConfig::TxAllocator; - TAppPrepare app(dsExportFactory ? dsExportFactory : static_cast<std::shared_ptr<NKikimr::NDataShard::IExportFactory>>(std::make_shared<TDataShardExportFactory>())); + TAppPrepare app(dsExportFactory ? dsExportFactory : static_cast<std::shared_ptr<NKikimr::NDataShard::IExportFactory>>(std::make_shared<TDataShardExportFactory>())); app.SetEnableDataColumnForIndexTable(true); app.SetEnableSystemViews(opts.EnableSystemViews_); diff --git a/ydb/core/tx/schemeshard/ut_helpers/test_env.h b/ydb/core/tx/schemeshard/ut_helpers/test_env.h index d45fd7f305..72b934c570 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/test_env.h +++ b/ydb/core/tx/schemeshard/ut_helpers/test_env.h @@ -65,7 +65,7 @@ namespace NSchemeShardUT_Private { TTestEnv(TTestActorRuntime& runtime, ui32 nchannels = 4, bool enablePipeRetries = true, TSchemeShardFactory ssFactory = &CreateFlatTxSchemeShard, bool enableSystemViews = false); TTestEnv(TTestActorRuntime& runtime, const TTestEnvOptions& opts, - TSchemeShardFactory ssFactory = &CreateFlatTxSchemeShard, std::shared_ptr<NKikimr::NDataShard::IExportFactory> dsExportFactory = {}); + TSchemeShardFactory ssFactory = &CreateFlatTxSchemeShard, std::shared_ptr<NKikimr::NDataShard::IExportFactory> dsExportFactory = {}); TFakeHiveState::TPtr GetHiveState() const; TAutoPtr<ITabletScheduledEventsGuard> EnableSchemeshardPipeRetries(TTestActorRuntime& runtime); diff --git a/ydb/core/util/lz4_data_generator.h b/ydb/core/util/lz4_data_generator.h index e999b92800..d0fbdfe602 100644 --- a/ydb/core/util/lz4_data_generator.h +++ b/ydb/core/util/lz4_data_generator.h @@ -1,21 +1,21 @@ -#include <util/generic/string.h> +#include <util/generic/string.h> #include <util/random/fast.h> - -#include <contrib/libs/lz4/lz4.h> - -namespace NKikimr { - -inline TString GenDataForLZ4(const ui64 size, const ui64 seed = 0) { - TString data = TString::Uninitialized(size); - const ui32 long_step = Max<ui32>(2027, size / 20); - const ui32 short_step = Min<ui32>(53, long_step / 400); - for (ui32 i = 0; i < data.size(); ++i) { - const ui32 j = i + seed; - data[i] = 0xff & (j % short_step + j / long_step); - } - return data; -} - + +#include <contrib/libs/lz4/lz4.h> + +namespace NKikimr { + +inline TString GenDataForLZ4(const ui64 size, const ui64 seed = 0) { + TString data = TString::Uninitialized(size); + const ui32 long_step = Max<ui32>(2027, size / 20); + const ui32 short_step = Min<ui32>(53, long_step / 400); + for (ui32 i = 0; i < data.size(); ++i) { + const ui32 j = i + seed; + data[i] = 0xff & (j % short_step + j / long_step); + } + return data; +} + inline TString FastGenDataForLZ4(i64 size, ui64 seed) { TString data = TString::Uninitialized(size); @@ -33,6 +33,6 @@ inline TString FastGenDataForLZ4(i64 size, ui64 seed) { } return data; -} +} } diff --git a/ydb/core/util/lz4_data_generator_ut.cpp b/ydb/core/util/lz4_data_generator_ut.cpp index 3d0bc11a38..fb18c60ec3 100644 --- a/ydb/core/util/lz4_data_generator_ut.cpp +++ b/ydb/core/util/lz4_data_generator_ut.cpp @@ -1,54 +1,54 @@ -#include "lz4_data_generator.h" - -#include <library/cpp/testing/unittest/registar.h> - -namespace NKikimr { -Y_UNIT_TEST_SUITE(CompressionTest) { - -size_t CompressedSize(TString data) { - TString tmp; - tmp.resize(2 * data.size()); - return LZ4_compress_default(data.Detach(), tmp.Detach(), data.size(), tmp.size()); -} - - Y_UNIT_TEST(lz4_generator_basic) { - for (ui32 size = 1; size < 200; ++size) { - ui32 compressed = CompressedSize(GenDataForLZ4(size, size)); - UNIT_ASSERT(compressed); - } - } - - Y_UNIT_TEST(lz4_generator_deflates) { - { - ui32 size = 179; - ui32 compressed = CompressedSize(GenDataForLZ4(size)); - UNIT_ASSERT_C(compressed * 10 < size, - size << " -> " << compressed); - } - { - ui32 size = 382; - ui32 compressed = CompressedSize(GenDataForLZ4(size)); - UNIT_ASSERT_C(compressed * 20 < size, - size << " -> " << compressed); - } - { - ui32 size = 1752; - ui32 compressed = CompressedSize(GenDataForLZ4(size)); - UNIT_ASSERT_C(compressed * 50 < size, - size << " -> " << compressed); - } - { - ui32 size = 4096; - ui32 compressed = CompressedSize(GenDataForLZ4(size)); - UNIT_ASSERT_C(compressed * 50 < size, - size << " -> " << compressed); - } - { - ui32 size = 1052014; - ui32 compressed = CompressedSize(GenDataForLZ4(size)); - UNIT_ASSERT_C(compressed * 200 < size, - size << " -> " << compressed); - } - } -} -} +#include "lz4_data_generator.h" + +#include <library/cpp/testing/unittest/registar.h> + +namespace NKikimr { +Y_UNIT_TEST_SUITE(CompressionTest) { + +size_t CompressedSize(TString data) { + TString tmp; + tmp.resize(2 * data.size()); + return LZ4_compress_default(data.Detach(), tmp.Detach(), data.size(), tmp.size()); +} + + Y_UNIT_TEST(lz4_generator_basic) { + for (ui32 size = 1; size < 200; ++size) { + ui32 compressed = CompressedSize(GenDataForLZ4(size, size)); + UNIT_ASSERT(compressed); + } + } + + Y_UNIT_TEST(lz4_generator_deflates) { + { + ui32 size = 179; + ui32 compressed = CompressedSize(GenDataForLZ4(size)); + UNIT_ASSERT_C(compressed * 10 < size, + size << " -> " << compressed); + } + { + ui32 size = 382; + ui32 compressed = CompressedSize(GenDataForLZ4(size)); + UNIT_ASSERT_C(compressed * 20 < size, + size << " -> " << compressed); + } + { + ui32 size = 1752; + ui32 compressed = CompressedSize(GenDataForLZ4(size)); + UNIT_ASSERT_C(compressed * 50 < size, + size << " -> " << compressed); + } + { + ui32 size = 4096; + ui32 compressed = CompressedSize(GenDataForLZ4(size)); + UNIT_ASSERT_C(compressed * 50 < size, + size << " -> " << compressed); + } + { + ui32 size = 1052014; + ui32 compressed = CompressedSize(GenDataForLZ4(size)); + UNIT_ASSERT_C(compressed * 200 < size, + size << " -> " << compressed); + } + } +} +} diff --git a/ydb/core/util/ut/ya.make b/ydb/core/util/ut/ya.make index 392501ea72..315713cad8 100644 --- a/ydb/core/util/ut/ya.make +++ b/ydb/core/util/ut/ya.make @@ -37,13 +37,13 @@ SRCS( intrusive_stack_ut.cpp lf_stack_ut.cpp log_priority_mute_checker_ut.cpp - lz4_data_generator_ut.cpp + lz4_data_generator_ut.cpp operation_queue_ut.cpp operation_queue_priority_ut.cpp page_map_ut.cpp queue_inplace_ut.cpp queue_oneone_inplace_ut.cpp - simple_cache_ut.cpp + simple_cache_ut.cpp time_series_vec_ut.cpp token_bucket_ut.cpp ulid_ut.cpp diff --git a/ydb/core/util/ya.make b/ydb/core/util/ya.make index 9945a8926e..4f325d8fcb 100644 --- a/ydb/core/util/ya.make +++ b/ydb/core/util/ya.make @@ -58,7 +58,7 @@ SRCS( ulid.cpp ulid.h wildcard.h - yverify_stream.h + yverify_stream.h ) PEERDIR( diff --git a/ydb/core/util/yverify_stream.h b/ydb/core/util/yverify_stream.h index af61c96263..fcbf57f565 100644 --- a/ydb/core/util/yverify_stream.h +++ b/ydb/core/util/yverify_stream.h @@ -1,8 +1,8 @@ -#pragma once - -#include <util/system/yassert.h> -#include <util/string/builder.h> - -#define Y_VERIFY_S(expr, msg) Y_VERIFY(expr, "%s", (TStringBuilder() << msg).c_str()) -#define Y_FAIL_S(msg) Y_FAIL("%s", (TStringBuilder() << msg).c_str()) +#pragma once + +#include <util/system/yassert.h> +#include <util/string/builder.h> + +#define Y_VERIFY_S(expr, msg) Y_VERIFY(expr, "%s", (TStringBuilder() << msg).c_str()) +#define Y_FAIL_S(msg) Y_FAIL("%s", (TStringBuilder() << msg).c_str()) #define Y_VERIFY_DEBUG_S(expr, msg) Y_VERIFY_DEBUG(expr, "%s", (TStringBuilder() << msg).c_str()) diff --git a/ydb/core/viewer/browse.h b/ydb/core/viewer/browse.h index 548ad60ad3..c5716f46a7 100644 --- a/ydb/core/viewer/browse.h +++ b/ydb/core/viewer/browse.h @@ -615,7 +615,7 @@ public: } for (auto pDiskKind : PDiskCategories) { - pbCommon.AddPDiskKind(TPDiskCategory(pDiskKind).TypeStrShort()); + pbCommon.AddPDiskKind(TPDiskCategory(pDiskKind).TypeStrShort()); } for (auto vDiskKind : VDiskCategories) { diff --git a/ydb/core/viewer/content/v2/node.js b/ydb/core/viewer/content/v2/node.js index 5da70e1877..422249c57c 100644 --- a/ydb/core/viewer/content/v2/node.js +++ b/ydb/core/viewer/content/v2/node.js @@ -350,8 +350,8 @@ Node.prototype.getUptime = function() { } Node.prototype.getDiskUsage = function() { - if (this.sysInfo !== undefined && this.sysInfo.MaxDiskUsage !== undefined) { - return this.sysInfo.MaxDiskUsage; + if (this.sysInfo !== undefined && this.sysInfo.MaxDiskUsage !== undefined) { + return this.sysInfo.MaxDiskUsage; } else { return 0; } diff --git a/ydb/core/viewer/content/v2/pdisk.js b/ydb/core/viewer/content/v2/pdisk.js index f43bbfc5e6..80a084a73f 100644 --- a/ydb/core/viewer/content/v2/pdisk.js +++ b/ydb/core/viewer/content/v2/pdisk.js @@ -179,8 +179,8 @@ PDisk.prototype.updatePDiskInfo = function(update) { case 'InitialCommonLogParseError': case 'CommonLoggerInitError': case 'OpenFileError': - case 'ChunkQuotaError': - case 'DeviceIoError': + case 'ChunkQuotaError': + case 'DeviceIoError': pDisk.css('background-color', red); this.color = red; break; diff --git a/ydb/core/viewer/content/v2/storage.js b/ydb/core/viewer/content/v2/storage.js index cad654661d..1872e4916d 100644 --- a/ydb/core/viewer/content/v2/storage.js +++ b/ydb/core/viewer/content/v2/storage.js @@ -233,8 +233,8 @@ Storage.prototype.update = function() { this.storageLatency.empty().append(this.getImageFromLatency()); var nodes = this.group.view.nodes; - var allocatedSize; - var availableSize; + var allocatedSize; + var availableSize; var readSpeed = 0; var writeSpeed = 0; @@ -244,18 +244,18 @@ Storage.prototype.update = function() { vDisk.CellId = this.Id; vDisk.StoragePoolName = this.StoragePool.Name; vDisk = this.vDiskMap.updateVDiskInfo(vDisk); - if (vDisk.AllocatedSize !== undefined) { - if (allocatedSize === undefined) { - allocatedSize = 0; - } + if (vDisk.AllocatedSize !== undefined) { + if (allocatedSize === undefined) { + allocatedSize = 0; + } allocatedSize += vDisk.AllocatedSize; } - if (vDisk.AvailableSize !== undefined) { - if (availableSize === undefined) { - availableSize = 0; - } - availableSize += vDisk.AvailableSize; - } + if (vDisk.AvailableSize !== undefined) { + if (availableSize === undefined) { + availableSize = 0; + } + availableSize += vDisk.AvailableSize; + } if (vDisk.ReadThroughput) { readSpeed += vDisk.ReadThroughput; } @@ -277,13 +277,13 @@ Storage.prototype.update = function() { this.vDiskMap.resizeVDisks(); - if (allocatedSize === undefined) { - this.allocatedSize.text('-'); - } else { + if (allocatedSize === undefined) { + this.allocatedSize.text('-'); + } else { this.allocatedSize.text(bytesToGB(allocatedSize)); - } - if (availableSize === undefined) { - this.availableSize.text('-'); + } + if (availableSize === undefined) { + this.availableSize.text('-'); } else { this.availableSize.text(bytesToGB(availableSize)); } @@ -299,21 +299,21 @@ Storage.prototype.update = function() { } } -function isVDiskInErrorState(state) { - if (!state) { - return false; - } - - switch (state) { - case "LocalRecoveryError": - case "SyncGuidRecoveryError": - case "PDiskError": - return true; - default: - return false; - } -} - +function isVDiskInErrorState(state) { + if (!state) { + return false; + } + + switch (state) { + case "LocalRecoveryError": + case "SyncGuidRecoveryError": + case "PDiskError": + return true; + default: + return false; + } +} + Storage.prototype.updateFromStorage = function(update) { if (this.GroupGeneration < update.GroupGeneration && this.visible) { this.disappear(); @@ -350,11 +350,11 @@ Storage.prototype.updateFromStorage = function(update) { if (vDisk.AllocatedSize) { vDisk.AllocatedSize = Number(vDisk.AllocatedSize); } - if (vDisk.AvailableSize) { - vDisk.AvailableSize = Number(vDisk.AvailableSize); - } - // meaingful only if hard disk space division is enabled - //usage = Math.max(usage, vDisk.AllocatedSize / (vDisk.AllocatedSize + vDisk.AvailableSize)); + if (vDisk.AvailableSize) { + vDisk.AvailableSize = Number(vDisk.AvailableSize); + } + // meaingful only if hard disk space division is enabled + //usage = Math.max(usage, vDisk.AllocatedSize / (vDisk.AllocatedSize + vDisk.AvailableSize)); if (vDisk.ReadThroughput) { vDisk.ReadThroughput = Number(vDisk.ReadThroughput); } @@ -362,18 +362,18 @@ Storage.prototype.updateFromStorage = function(update) { vDisk.WriteThroughput = Number(vDisk.WriteThroughput); } } - + var pDisk = vDisk.PDisk; if (pDisk) { - if (pDisk.TotalSize) { - pDisk.TotalSize = Number(pDisk.TotalSize); - } - if (pDisk.AvailableSize) { - pDisk.AvailableSize = Number(pDisk.AvailableSize); - } - } - if (pDisk && pDisk.AvailableSize && pDisk.TotalSize) { - usage = Math.max(usage, 1 - pDisk.AvailableSize / pDisk.TotalSize); + if (pDisk.TotalSize) { + pDisk.TotalSize = Number(pDisk.TotalSize); + } + if (pDisk.AvailableSize) { + pDisk.AvailableSize = Number(pDisk.AvailableSize); + } + } + if (pDisk && pDisk.AvailableSize && pDisk.TotalSize) { + usage = Math.max(usage, 1 - pDisk.AvailableSize / pDisk.TotalSize); if (!pDisk.State || (vDisk.Replicated === false && !vDisk.DonorMode) || isVDiskInErrorState(vDisk.VDiskState) === true) { missingDisks++; } diff --git a/ydb/core/viewer/content/v2/storage_view.js b/ydb/core/viewer/content/v2/storage_view.js index dc250c630c..b4fe4b6a45 100644 --- a/ydb/core/viewer/content/v2/storage_view.js +++ b/ydb/core/viewer/content/v2/storage_view.js @@ -5,7 +5,7 @@ function StorageView(options) { }, options); this.storage = {}; this.storagePools = {}; - this.observer = new IntersectionObserver(this.onVisibilityChange.bind(this), {rootMargin: '50%'}); + this.observer = new IntersectionObserver(this.onVisibilityChange.bind(this), {rootMargin: '50%'}); this.getStorageGroupName = function(storage) { return storage.StoragePool.Name; } this.getStorageGroupHeader = function(storageGroup) { return bytesToGB0(storageGroup.allocatedSizeBytes) + ' / ' + storageGroup.storageTotal + ' groups'; } this.groupOrder = function(prev, next) { return prev < next; } diff --git a/ydb/core/viewer/content/v2/vdisk.js b/ydb/core/viewer/content/v2/vdisk.js index 4b356a507e..6a3e1af417 100644 --- a/ydb/core/viewer/content/v2/vdisk.js +++ b/ydb/core/viewer/content/v2/vdisk.js @@ -50,18 +50,18 @@ VDisk.prototype.colorSeverity = { Red: 5 }; -VDisk.prototype.getStateSeverity = function() { - var sev = this.stateSeverity[this.VDiskState]; +VDisk.prototype.getStateSeverity = function() { + var sev = this.stateSeverity[this.VDiskState]; if (sev === undefined) { sev = 0; } return sev; } -VDisk.prototype.isErrorState = function() { - return this.getStateSeverity() === 5; -} - +VDisk.prototype.isErrorState = function() { + return this.getStateSeverity() === 5; +} + VDisk.prototype.getColorSeverity = function(color) { var sev = this.colorSeverity[color]; if (sev === undefined) { @@ -93,7 +93,7 @@ VDisk.prototype.updateVDiskInfo = function(update) { state = '<table class="tooltip-table"><tr><td>VDisk</td><td>' + this.Id + '</td></tr>'; if (this.VDiskState) { state += '<tr><td>State</td><td>' + this.VDiskState + '</td></tr>'; - severity = this.getStateSeverity(); + severity = this.getStateSeverity(); } else { state += '<tr><td>State</td><td>not available</td></tr>'; } @@ -124,11 +124,11 @@ VDisk.prototype.updateVDiskInfo = function(update) { } if (this.DiskSpace && this.DiskSpace !== 'Green') { state += '<tr><td>Space</td><td>' + this.DiskSpace + '</td></tr>'; - severity = Math.max(severity, this.getColorSeverity(this.DiskSpace)); + severity = Math.max(severity, this.getColorSeverity(this.DiskSpace)); } if (this.FrontQueues && this.FrontQueues !== 'Green') { state += '<tr><td>FronQueues</td><td>' + this.FrontQueues + '</td></tr>'; - severity = Math.max(severity, Math.min(4, this.getColorSeverity(this.FrontQueues))); + severity = Math.max(severity, Math.min(4, this.getColorSeverity(this.FrontQueues))); } dash = false; if (this.DonorMode === true) { diff --git a/ydb/core/viewer/content/v2/viewer.js b/ydb/core/viewer/content/v2/viewer.js index 6abab437ef..74686cbdbf 100644 --- a/ydb/core/viewer/content/v2/viewer.js +++ b/ydb/core/viewer/content/v2/viewer.js @@ -147,8 +147,8 @@ ViewerNodes.prototype.refreshNodeInfo = function() { ViewerNodes.prototype.refreshOverall = function() { this.refreshSysInfo(); //this.refreshNodeInfo(); - //this.refreshPDiskInfo(); - //this.refreshVDiskInfo(); + //this.refreshPDiskInfo(); + //this.refreshVDiskInfo(); } ViewerNodes.prototype.onNodeGroupChange = function(obj) { diff --git a/ydb/core/ymq/actor/service.cpp b/ydb/core/ymq/actor/service.cpp index b410106196..34c9a4ae46 100644 --- a/ydb/core/ymq/actor/service.cpp +++ b/ydb/core/ymq/actor/service.cpp @@ -1297,12 +1297,12 @@ void TSqsService::MakeAndRegisterYcEventsProcessor() { auto root = YcSearchEventsConfig.TenantMode ? TString() : Cfg().GetRoot(); - auto factory = AppData()->SqsEventsWriterFactory; - Y_VERIFY(factory); + auto factory = AppData()->SqsEventsWriterFactory; + Y_VERIFY(factory); Register(new TSearchEventsProcessor( root, YcSearchEventsConfig.ReindexInterval, YcSearchEventsConfig.RescanInterval, MakeSimpleShared<NYdb::NTable::TTableClient>(*YcSearchEventsConfig.Driver), - factory->CreateEventsWriter(Cfg(), GetSqsServiceCounters(AppData()->Counters, "yc_unified_agent")) + factory->CreateEventsWriter(Cfg(), GetSqsServiceCounters(AppData()->Counters, "yc_unified_agent")) )); } // diff --git a/ydb/core/ymq/actor/ya.make b/ydb/core/ymq/actor/ya.make index 4ffc9b3801..070653bb57 100644 --- a/ydb/core/ymq/actor/ya.make +++ b/ydb/core/ymq/actor/ya.make @@ -95,7 +95,7 @@ PEERDIR( YQL_LAST_ABI_VERSION() GENERATE_ENUM_SERIALIZATION(events.h) - + GENERATE_ENUM_SERIALIZATION(metering.h) END() diff --git a/ydb/core/ymq/base/events_writer_iface.h b/ydb/core/ymq/base/events_writer_iface.h index bfc6f0c67c..4fcd6ab50a 100644 --- a/ydb/core/ymq/base/events_writer_iface.h +++ b/ydb/core/ymq/base/events_writer_iface.h @@ -24,10 +24,10 @@ private: }; -class IEventsWriterFactory { +class IEventsWriterFactory { public: virtual IEventsWriterWrapper::TPtr CreateEventsWriter(const NKikimrConfig::TSqsConfig& config, const NMonitoring::TDynamicCounterPtr& counters) const = 0; - virtual ~IEventsWriterFactory() + virtual ~IEventsWriterFactory() {} }; diff --git a/ydb/library/backup/backup.cpp b/ydb/library/backup/backup.cpp index e66633eb06..676cc831ee 100644 --- a/ydb/library/backup/backup.cpp +++ b/ydb/library/backup/backup.cpp @@ -1,385 +1,385 @@ -#include "backup.h" -#include "db_iterator.h" -#include "query_builder.h" -#include "query_uploader.h" -#include "util.h" - +#include "backup.h" +#include "db_iterator.h" +#include "query_builder.h" +#include "query_uploader.h" +#include "util.h" + #include <ydb/public/api/protos/ydb_table.pb.h> #include <ydb/public/lib/yson_value/ydb_yson_value.h> #include <ydb/public/sdk/cpp/client/ydb_table/table.h> #include <ydb/public/lib/ydb_cli/dump/util/util.h> - + #include <library/cpp/containers/stack_vector/stack_vec.h> -#include <library/cpp/string_utils/quote/quote.h> - -#include <util/datetime/base.h> -#include <util/folder/dirut.h> -#include <util/folder/path.h> -#include <util/folder/pathsplit.h> -#include <util/generic/ptr.h> -#include <util/generic/yexception.h> -#include <util/stream/format.h> -#include <util/stream/mem.h> -#include <util/stream/null.h> -#include <util/string/builder.h> -#include <util/string/printf.h> -#include <util/system/file.h> -#include <util/system/fs.h> - -namespace NYdb::NBackup { - - -static constexpr const char *SCHEME_FILE_NAME = "scheme.pb"; -static constexpr const char *INCOMPLETE_DATA_FILE_NAME = "incomplete.csv"; -static constexpr const char *INCOMPLETE_FILE_NAME = "incomplete"; +#include <library/cpp/string_utils/quote/quote.h> + +#include <util/datetime/base.h> +#include <util/folder/dirut.h> +#include <util/folder/path.h> +#include <util/folder/pathsplit.h> +#include <util/generic/ptr.h> +#include <util/generic/yexception.h> +#include <util/stream/format.h> +#include <util/stream/mem.h> +#include <util/stream/null.h> +#include <util/string/builder.h> +#include <util/string/printf.h> +#include <util/system/file.h> +#include <util/system/fs.h> + +namespace NYdb::NBackup { + + +static constexpr const char *SCHEME_FILE_NAME = "scheme.pb"; +static constexpr const char *INCOMPLETE_DATA_FILE_NAME = "incomplete.csv"; +static constexpr const char *INCOMPLETE_FILE_NAME = "incomplete"; static constexpr const char *EMPTY_FILE_NAME = "empty_dir"; - -static constexpr size_t IO_BUFFER_SIZE = 2 << 20; // 2 MiB -static constexpr i64 FILE_SPLIT_THRESHOLD = 128 << 20; // 128 MiB -static constexpr i64 READ_TABLE_RETRIES = 100; - - -//////////////////////////////////////////////////////////////////////////////// -// Util -//////////////////////////////////////////////////////////////////////////////// - -static void VerifyStatus(TStatus status, TString explain = "") { - if (status.IsSuccess()) { - if (status.GetIssues()) { - LOG_DEBUG(status.GetIssues().ToString()); - } - } else { - throw TYdbErrorException(status) << explain; - } -} - -static TString NameFromDbPath(const TString& path) { - TPathSplitUnix split(path); - return TString{split.back()}; -} - -static TString ParentPathFromDbPath(const TString& path) { - TPathSplitUnix split(path); - split.pop_back(); - return split.Reconstruct(); -} - -static TString JoinDatabasePath(const TString& basePath, const TString& path) { - if (basePath.empty()) { - return path; - } else if (path == "/") { - return basePath; - } else if (basePath == "/") { - return path; - } else { - TPathSplitUnix prefixPathSplit(basePath); - prefixPathSplit.AppendComponent(path); - - return prefixPathSplit.Reconstruct(); - } -} - -static TString CreateDataFileName(ui32 i) { - return Sprintf("data_%02d.csv", i); -} - -static TString CreateTemporalBackupName() { - return "backup_" + TInstant::Now().FormatLocalTime("%Y%m%dT%H%M%S"); // YYYYMMDDThhmmss -} - -//////////////////////////////////////////////////////////////////////////////// -// Backup -//////////////////////////////////////////////////////////////////////////////// - -#define CASE_PRINT_PRIMITIVE_TYPE(out, type) \ -case EPrimitiveType::type: \ - out << parser.Get##type(); \ - break; - -#define CASE_PRINT_PRIMITIVE_STRING_TYPE(out, type) \ -case EPrimitiveType::type: { \ - TString str = parser.Get##type(); \ - CGIEscape(str); \ - out << '"' << str << '"'; \ - } \ - break; - -void PrintPrimitive(IOutputStream& out, const TValueParser& parser) { - switch(parser.GetPrimitiveType()) { - CASE_PRINT_PRIMITIVE_TYPE(out, Bool); - case EPrimitiveType::Int8: - out << (i32)parser.GetInt8(); - break; - case EPrimitiveType::Uint8: - out << (ui32)parser.GetUint8(); - break; - CASE_PRINT_PRIMITIVE_TYPE(out, Int16); - CASE_PRINT_PRIMITIVE_TYPE(out, Uint16); - CASE_PRINT_PRIMITIVE_TYPE(out, Int32); - CASE_PRINT_PRIMITIVE_TYPE(out, Uint32); - CASE_PRINT_PRIMITIVE_TYPE(out, Int64); - CASE_PRINT_PRIMITIVE_TYPE(out, Uint64); - CASE_PRINT_PRIMITIVE_TYPE(out, Float); - CASE_PRINT_PRIMITIVE_TYPE(out, Double); + +static constexpr size_t IO_BUFFER_SIZE = 2 << 20; // 2 MiB +static constexpr i64 FILE_SPLIT_THRESHOLD = 128 << 20; // 128 MiB +static constexpr i64 READ_TABLE_RETRIES = 100; + + +//////////////////////////////////////////////////////////////////////////////// +// Util +//////////////////////////////////////////////////////////////////////////////// + +static void VerifyStatus(TStatus status, TString explain = "") { + if (status.IsSuccess()) { + if (status.GetIssues()) { + LOG_DEBUG(status.GetIssues().ToString()); + } + } else { + throw TYdbErrorException(status) << explain; + } +} + +static TString NameFromDbPath(const TString& path) { + TPathSplitUnix split(path); + return TString{split.back()}; +} + +static TString ParentPathFromDbPath(const TString& path) { + TPathSplitUnix split(path); + split.pop_back(); + return split.Reconstruct(); +} + +static TString JoinDatabasePath(const TString& basePath, const TString& path) { + if (basePath.empty()) { + return path; + } else if (path == "/") { + return basePath; + } else if (basePath == "/") { + return path; + } else { + TPathSplitUnix prefixPathSplit(basePath); + prefixPathSplit.AppendComponent(path); + + return prefixPathSplit.Reconstruct(); + } +} + +static TString CreateDataFileName(ui32 i) { + return Sprintf("data_%02d.csv", i); +} + +static TString CreateTemporalBackupName() { + return "backup_" + TInstant::Now().FormatLocalTime("%Y%m%dT%H%M%S"); // YYYYMMDDThhmmss +} + +//////////////////////////////////////////////////////////////////////////////// +// Backup +//////////////////////////////////////////////////////////////////////////////// + +#define CASE_PRINT_PRIMITIVE_TYPE(out, type) \ +case EPrimitiveType::type: \ + out << parser.Get##type(); \ + break; + +#define CASE_PRINT_PRIMITIVE_STRING_TYPE(out, type) \ +case EPrimitiveType::type: { \ + TString str = parser.Get##type(); \ + CGIEscape(str); \ + out << '"' << str << '"'; \ + } \ + break; + +void PrintPrimitive(IOutputStream& out, const TValueParser& parser) { + switch(parser.GetPrimitiveType()) { + CASE_PRINT_PRIMITIVE_TYPE(out, Bool); + case EPrimitiveType::Int8: + out << (i32)parser.GetInt8(); + break; + case EPrimitiveType::Uint8: + out << (ui32)parser.GetUint8(); + break; + CASE_PRINT_PRIMITIVE_TYPE(out, Int16); + CASE_PRINT_PRIMITIVE_TYPE(out, Uint16); + CASE_PRINT_PRIMITIVE_TYPE(out, Int32); + CASE_PRINT_PRIMITIVE_TYPE(out, Uint32); + CASE_PRINT_PRIMITIVE_TYPE(out, Int64); + CASE_PRINT_PRIMITIVE_TYPE(out, Uint64); + CASE_PRINT_PRIMITIVE_TYPE(out, Float); + CASE_PRINT_PRIMITIVE_TYPE(out, Double); CASE_PRINT_PRIMITIVE_TYPE(out, DyNumber); - CASE_PRINT_PRIMITIVE_TYPE(out, Date); - CASE_PRINT_PRIMITIVE_TYPE(out, Datetime); - CASE_PRINT_PRIMITIVE_TYPE(out, Timestamp); - CASE_PRINT_PRIMITIVE_TYPE(out, Interval); - CASE_PRINT_PRIMITIVE_STRING_TYPE(out, TzDate); - CASE_PRINT_PRIMITIVE_STRING_TYPE(out, TzDatetime); - CASE_PRINT_PRIMITIVE_STRING_TYPE(out, TzTimestamp); - CASE_PRINT_PRIMITIVE_STRING_TYPE(out, String); - CASE_PRINT_PRIMITIVE_STRING_TYPE(out, Utf8); - CASE_PRINT_PRIMITIVE_STRING_TYPE(out, Yson); - CASE_PRINT_PRIMITIVE_STRING_TYPE(out, Json); + CASE_PRINT_PRIMITIVE_TYPE(out, Date); + CASE_PRINT_PRIMITIVE_TYPE(out, Datetime); + CASE_PRINT_PRIMITIVE_TYPE(out, Timestamp); + CASE_PRINT_PRIMITIVE_TYPE(out, Interval); + CASE_PRINT_PRIMITIVE_STRING_TYPE(out, TzDate); + CASE_PRINT_PRIMITIVE_STRING_TYPE(out, TzDatetime); + CASE_PRINT_PRIMITIVE_STRING_TYPE(out, TzTimestamp); + CASE_PRINT_PRIMITIVE_STRING_TYPE(out, String); + CASE_PRINT_PRIMITIVE_STRING_TYPE(out, Utf8); + CASE_PRINT_PRIMITIVE_STRING_TYPE(out, Yson); + CASE_PRINT_PRIMITIVE_STRING_TYPE(out, Json); CASE_PRINT_PRIMITIVE_STRING_TYPE(out, JsonDocument); - default: - Y_FAIL("Unsupported type"); - } -} -#undef CASE_PRINT_PRIMITIVE_STRING_TYPE -#undef CASE_PRINT_PRIMITIVE_TYPE - -void PrintValue(IOutputStream& out, TValueParser& parser) { - switch (parser.GetKind()) { - case TTypeParser::ETypeKind::Primitive: - PrintPrimitive(out, parser); - break; - - case TTypeParser::ETypeKind::Decimal: { - auto decimal = parser.GetDecimal(); - out << decimal.ToString(); - break; - } - - case TTypeParser::ETypeKind::Optional: - parser.OpenOptional(); - if (parser.IsNull()) { - out << "null"; - } else { - PrintValue(out, parser); - } - parser.CloseOptional(); - break; - - case TTypeParser::ETypeKind::Void: + default: + Y_FAIL("Unsupported type"); + } +} +#undef CASE_PRINT_PRIMITIVE_STRING_TYPE +#undef CASE_PRINT_PRIMITIVE_TYPE + +void PrintValue(IOutputStream& out, TValueParser& parser) { + switch (parser.GetKind()) { + case TTypeParser::ETypeKind::Primitive: + PrintPrimitive(out, parser); + break; + + case TTypeParser::ETypeKind::Decimal: { + auto decimal = parser.GetDecimal(); + out << decimal.ToString(); + break; + } + + case TTypeParser::ETypeKind::Optional: + parser.OpenOptional(); + if (parser.IsNull()) { + out << "null"; + } else { + PrintValue(out, parser); + } + parser.CloseOptional(); + break; + + case TTypeParser::ETypeKind::Void: out << "Void"sv; - break; - - case TTypeParser::ETypeKind::List: - /* fallthrough */ - case TTypeParser::ETypeKind::Tuple: - /* fallthrough */ - case TTypeParser::ETypeKind::Struct: - /* fallthrough */ - case TTypeParser::ETypeKind::Dict: - /* fallthrough */ - case TTypeParser::ETypeKind::Variant: - Y_ENSURE(false, TStringBuilder() << "This typeKind is not supported for backup now," - " kind: " << parser.GetKind()); - break; - - default: + break; + + case TTypeParser::ETypeKind::List: + /* fallthrough */ + case TTypeParser::ETypeKind::Tuple: + /* fallthrough */ + case TTypeParser::ETypeKind::Struct: + /* fallthrough */ + case TTypeParser::ETypeKind::Dict: + /* fallthrough */ + case TTypeParser::ETypeKind::Variant: + Y_ENSURE(false, TStringBuilder() << "This typeKind is not supported for backup now," + " kind: " << parser.GetKind()); + break; + + default: ThrowFatalError(TStringBuilder() - << "Unexpected type kind: " << parser.GetKind()); - } -} - -TMaybe<TValue> ProcessResultSet(TStringStream& ss, - TResultSetParser resultSetParser, TFile* dataFile, const NTable::TTableDescription* desc) { - TMaybe<TValue> lastReadPK; - - TStackVec<TValueParser*, 32> colParsers; - if (desc) { - Y_ENSURE(resultSetParser.ColumnsCount() == desc->GetColumns().size(), "resultSet got from ReadTable has " - "number of columns differing from DescribeTable"); - for (const auto& col : desc->GetColumns()) { - colParsers.push_back(&resultSetParser.ColumnParser(col.Name)); - } - } else { - for (ui32 i = 0; i < resultSetParser.ColumnsCount(); ++i) { - colParsers.push_back(&resultSetParser.ColumnParser(i)); - } - } - while (resultSetParser.TryNextRow()) { - bool needsComma = false; - for (auto* parser : colParsers) { - if (needsComma) { - ss << ","; - } else { - needsComma = true; - } - PrintValue(ss, *parser); - } - ss << "\n"; - if (dataFile && ss.Size() > IO_BUFFER_SIZE / 2) { - dataFile->Write(ss.Data(), ss.Size()); - ss.Clear(); - } - if (desc) { - TValueBuilder value; - value.BeginTuple(); - for (const auto& col : desc->GetPrimaryKeyColumns()) { - value.AddElement(resultSetParser.GetValue(col)); - } - value.EndTuple(); - lastReadPK = value.Build(); - } - } - return lastReadPK; -} - -TMaybe<TValue> TryReadTable(TDriver driver, const NTable::TTableDescription& desc, const TString& fullTablePath, - const TFsPath& folderPath, TMaybe<TValue> lastWrittenPK, ui32 *fileCounter) { - NTable::TTableClient client(driver); - - - - TMaybe<NTable::TTablePartIterator> iter; - auto readTableJob = [fullTablePath, &lastWrittenPK, &iter] - (NTable::TSession session) -> TStatus { - - NTable::TReadTableSettings settings; - if (lastWrittenPK) { - settings.From(NTable::TKeyBound::Exclusive(*lastWrittenPK)); - } + << "Unexpected type kind: " << parser.GetKind()); + } +} + +TMaybe<TValue> ProcessResultSet(TStringStream& ss, + TResultSetParser resultSetParser, TFile* dataFile, const NTable::TTableDescription* desc) { + TMaybe<TValue> lastReadPK; + + TStackVec<TValueParser*, 32> colParsers; + if (desc) { + Y_ENSURE(resultSetParser.ColumnsCount() == desc->GetColumns().size(), "resultSet got from ReadTable has " + "number of columns differing from DescribeTable"); + for (const auto& col : desc->GetColumns()) { + colParsers.push_back(&resultSetParser.ColumnParser(col.Name)); + } + } else { + for (ui32 i = 0; i < resultSetParser.ColumnsCount(); ++i) { + colParsers.push_back(&resultSetParser.ColumnParser(i)); + } + } + while (resultSetParser.TryNextRow()) { + bool needsComma = false; + for (auto* parser : colParsers) { + if (needsComma) { + ss << ","; + } else { + needsComma = true; + } + PrintValue(ss, *parser); + } + ss << "\n"; + if (dataFile && ss.Size() > IO_BUFFER_SIZE / 2) { + dataFile->Write(ss.Data(), ss.Size()); + ss.Clear(); + } + if (desc) { + TValueBuilder value; + value.BeginTuple(); + for (const auto& col : desc->GetPrimaryKeyColumns()) { + value.AddElement(resultSetParser.GetValue(col)); + } + value.EndTuple(); + lastReadPK = value.Build(); + } + } + return lastReadPK; +} + +TMaybe<TValue> TryReadTable(TDriver driver, const NTable::TTableDescription& desc, const TString& fullTablePath, + const TFsPath& folderPath, TMaybe<TValue> lastWrittenPK, ui32 *fileCounter) { + NTable::TTableClient client(driver); + + + + TMaybe<NTable::TTablePartIterator> iter; + auto readTableJob = [fullTablePath, &lastWrittenPK, &iter] + (NTable::TSession session) -> TStatus { + + NTable::TReadTableSettings settings; + if (lastWrittenPK) { + settings.From(NTable::TKeyBound::Exclusive(*lastWrittenPK)); + } auto result = session.ReadTable(fullTablePath, settings).ExtractValueSync(); - if (result.IsSuccess()) { - iter = result; - } - VerifyStatus(result, TStringBuilder() << "ReadTable result was not successfull," - " path: " << fullTablePath.Quote()); - return result; - - }; - - TStatus status = client.RetryOperationSync(readTableJob, NYdb::NTable::TRetryOperationSettings().MaxRetries(1)); - VerifyStatus(status); - - { - auto resultSetStreamPart = iter->ReadNext().ExtractValueSync(); - if (!resultSetStreamPart.IsSuccess() && resultSetStreamPart.EOS()) { - // Table is empty, so create empty data file - TFile dataFile(folderPath.Child(CreateDataFileName((*fileCounter)++)), CreateAlways | WrOnly); - return {}; - } - VerifyStatus(resultSetStreamPart, TStringBuilder() << "TStreamPart<TResultSet> is not successfull" - " error msg: " << resultSetStreamPart.GetIssues().ToString()); + if (result.IsSuccess()) { + iter = result; + } + VerifyStatus(result, TStringBuilder() << "ReadTable result was not successfull," + " path: " << fullTablePath.Quote()); + return result; + + }; + + TStatus status = client.RetryOperationSync(readTableJob, NYdb::NTable::TRetryOperationSettings().MaxRetries(1)); + VerifyStatus(status); + + { + auto resultSetStreamPart = iter->ReadNext().ExtractValueSync(); + if (!resultSetStreamPart.IsSuccess() && resultSetStreamPart.EOS()) { + // Table is empty, so create empty data file + TFile dataFile(folderPath.Child(CreateDataFileName((*fileCounter)++)), CreateAlways | WrOnly); + return {}; + } + VerifyStatus(resultSetStreamPart, TStringBuilder() << "TStreamPart<TResultSet> is not successfull" + " error msg: " << resultSetStreamPart.GetIssues().ToString()); TResultSet resultSetCurrent = resultSetStreamPart.ExtractPart(); - - TFsPath tmpFileName = folderPath.Child(INCOMPLETE_DATA_FILE_NAME); - TFile tmpFile = TFile(tmpFileName, CreateAlways | WrOnly); - TStringStream ss; - ss.Reserve(IO_BUFFER_SIZE); - - TMaybe<TValue> lastReadPK; - - while (true) { + + TFsPath tmpFileName = folderPath.Child(INCOMPLETE_DATA_FILE_NAME); + TFile tmpFile = TFile(tmpFileName, CreateAlways | WrOnly); + TStringStream ss; + ss.Reserve(IO_BUFFER_SIZE); + + TMaybe<TValue> lastReadPK; + + while (true) { NTable::TAsyncSimpleStreamPart<TResultSet> nextResult; - if (resultSetCurrent.Truncated()) { - nextResult = iter->ReadNext(); - } - lastReadPK = ProcessResultSet(ss, resultSetCurrent, &tmpFile, &desc); - - // Next - if (resultSetCurrent.Truncated()) { + if (resultSetCurrent.Truncated()) { + nextResult = iter->ReadNext(); + } + lastReadPK = ProcessResultSet(ss, resultSetCurrent, &tmpFile, &desc); + + // Next + if (resultSetCurrent.Truncated()) { auto resultSetStreamPart = nextResult.ExtractValueSync(); - if (!resultSetStreamPart.IsSuccess()) { - LOG_DEBUG("resultSetStreamPart is not successful, EOS# " - << (resultSetStreamPart.EOS() ? "true" : "false")); - if (resultSetStreamPart.EOS()) { - break; - } else { - if (ss.Data()) { - tmpFile.Write(ss.Data(), ss.Size()); - lastWrittenPK = *lastReadPK; - ss.Clear(); - } - tmpFile.Close(); - LOG_DEBUG("New file with data is created, fileName# " << CreateDataFileName(*fileCounter)); - tmpFileName.RenameTo(folderPath.Child(CreateDataFileName((*fileCounter)++))); - return lastWrittenPK; - } - } - resultSetCurrent = resultSetStreamPart.ExtractPart(); - } else { - break; - } - if (ss.Size() > IO_BUFFER_SIZE) { - tmpFile.Write(ss.Data(), ss.Size()); - lastWrittenPK = *lastReadPK; - ss.Clear(); - } - if (tmpFile.GetLength() > FILE_SPLIT_THRESHOLD) { - tmpFile.Close(); - LOG_DEBUG("New file with data is created, fileName# " << CreateDataFileName(*fileCounter)); - tmpFileName.RenameTo(folderPath.Child(CreateDataFileName((*fileCounter)++))); - tmpFileName = folderPath.Child(INCOMPLETE_DATA_FILE_NAME); - tmpFile = TFile(tmpFileName, CreateAlways | WrOnly); - } - } - tmpFile.Write(ss.Data(), ss.Size()); - lastWrittenPK = *lastReadPK; - ss.Clear(); - tmpFile.Close(); - LOG_DEBUG("New file with data is created, fileName# " << CreateDataFileName(*fileCounter)); - tmpFileName.RenameTo(folderPath.Child(CreateDataFileName((*fileCounter)++))); - } - - return {}; -} - -void ReadTable(TDriver driver, const NTable::TTableDescription& desc, const TString& fullTablePath, - const TFsPath& folderPath) { - LOG_DEBUG("Going to ReadTable, fullPath: " << fullTablePath); - - auto timer = GetVerbosity() - ? MakeHolder<TScopedTimer>(TStringBuilder() << "Done read table# " << fullTablePath.Quote() << " took# ") - : nullptr; - - TMaybe<TValue> lastWrittenPK; - - i64 retries = READ_TABLE_RETRIES; - ui32 fileCounter = 0; - do { - lastWrittenPK = TryReadTable(driver, desc, fullTablePath, folderPath, lastWrittenPK, &fileCounter); - if (lastWrittenPK && retries) { - LOG_DEBUG("ReadTable was not successfull, going to retry from lastWrittenPK# " - << FormatValueYson(*lastWrittenPK).Quote()); - } - } while (lastWrittenPK && retries--); - - Y_ENSURE(!lastWrittenPK, "For table " << fullTablePath.Quote() << " ReadTable hasn't finished successfully after " - << READ_TABLE_RETRIES << " retries"); -} - -NTable::TTableDescription DescribeTable(TDriver driver, const TString& fullTablePath) { - TMaybe<NTable::TTableDescription> desc; - - NTable::TTableClient client(driver); - - TStatus status = client.RetryOperationSync([fullTablePath, &desc](NTable::TSession session) { + if (!resultSetStreamPart.IsSuccess()) { + LOG_DEBUG("resultSetStreamPart is not successful, EOS# " + << (resultSetStreamPart.EOS() ? "true" : "false")); + if (resultSetStreamPart.EOS()) { + break; + } else { + if (ss.Data()) { + tmpFile.Write(ss.Data(), ss.Size()); + lastWrittenPK = *lastReadPK; + ss.Clear(); + } + tmpFile.Close(); + LOG_DEBUG("New file with data is created, fileName# " << CreateDataFileName(*fileCounter)); + tmpFileName.RenameTo(folderPath.Child(CreateDataFileName((*fileCounter)++))); + return lastWrittenPK; + } + } + resultSetCurrent = resultSetStreamPart.ExtractPart(); + } else { + break; + } + if (ss.Size() > IO_BUFFER_SIZE) { + tmpFile.Write(ss.Data(), ss.Size()); + lastWrittenPK = *lastReadPK; + ss.Clear(); + } + if (tmpFile.GetLength() > FILE_SPLIT_THRESHOLD) { + tmpFile.Close(); + LOG_DEBUG("New file with data is created, fileName# " << CreateDataFileName(*fileCounter)); + tmpFileName.RenameTo(folderPath.Child(CreateDataFileName((*fileCounter)++))); + tmpFileName = folderPath.Child(INCOMPLETE_DATA_FILE_NAME); + tmpFile = TFile(tmpFileName, CreateAlways | WrOnly); + } + } + tmpFile.Write(ss.Data(), ss.Size()); + lastWrittenPK = *lastReadPK; + ss.Clear(); + tmpFile.Close(); + LOG_DEBUG("New file with data is created, fileName# " << CreateDataFileName(*fileCounter)); + tmpFileName.RenameTo(folderPath.Child(CreateDataFileName((*fileCounter)++))); + } + + return {}; +} + +void ReadTable(TDriver driver, const NTable::TTableDescription& desc, const TString& fullTablePath, + const TFsPath& folderPath) { + LOG_DEBUG("Going to ReadTable, fullPath: " << fullTablePath); + + auto timer = GetVerbosity() + ? MakeHolder<TScopedTimer>(TStringBuilder() << "Done read table# " << fullTablePath.Quote() << " took# ") + : nullptr; + + TMaybe<TValue> lastWrittenPK; + + i64 retries = READ_TABLE_RETRIES; + ui32 fileCounter = 0; + do { + lastWrittenPK = TryReadTable(driver, desc, fullTablePath, folderPath, lastWrittenPK, &fileCounter); + if (lastWrittenPK && retries) { + LOG_DEBUG("ReadTable was not successfull, going to retry from lastWrittenPK# " + << FormatValueYson(*lastWrittenPK).Quote()); + } + } while (lastWrittenPK && retries--); + + Y_ENSURE(!lastWrittenPK, "For table " << fullTablePath.Quote() << " ReadTable hasn't finished successfully after " + << READ_TABLE_RETRIES << " retries"); +} + +NTable::TTableDescription DescribeTable(TDriver driver, const TString& fullTablePath) { + TMaybe<NTable::TTableDescription> desc; + + NTable::TTableClient client(driver); + + TStatus status = client.RetryOperationSync([fullTablePath, &desc](NTable::TSession session) { auto settings = NTable::TDescribeTableSettings().WithKeyShardBoundary(true); auto result = session.DescribeTable(fullTablePath, settings).GetValueSync(); - - VerifyStatus(result); - desc = result.GetTableDescription(); - return result; - }); - VerifyStatus(status); - LOG_DEBUG("Table is described, fullPath: " << fullTablePath); - - for (auto& column : desc->GetColumns()) { - LOG_DEBUG("Column, name: " << column.Name << ", type: " << FormatType(column.Type)); - } - return *desc; -} - + + VerifyStatus(result); + desc = result.GetTableDescription(); + return result; + }); + VerifyStatus(status); + LOG_DEBUG("Table is described, fullPath: " << fullTablePath); + + for (auto& column : desc->GetColumns()) { + LOG_DEBUG("Column, name: " << column.Name << ", type: " << FormatType(column.Type)); + } + return *desc; +} + Ydb::Table::CreateTableRequest ProtoFromTableDescription(const NTable::TTableDescription& desc, bool preservePoolKinds) { - Ydb::Table::CreateTableRequest proto; + Ydb::Table::CreateTableRequest proto; desc.SerializeTo(proto); - + if (preservePoolKinds) { return proto; } @@ -414,97 +414,97 @@ Ydb::Table::CreateTableRequest ProtoFromTableDescription(const NTable::TTableDes family.clear_data(); } - return proto; -} - -TAsyncStatus CopyTableAsyncStart(TDriver driver, const TString& src, const TString& dst) { - NTable::TTableClient client(driver); - - return client.RetryOperation([src, dst](NTable::TSession session) { - auto result = session.CopyTable(src, dst); - - return result; - }); -} - -void CopyTableAsyncFinish(const TAsyncStatus& status, const TString& src, const TString& dst) { - VerifyStatus(status.GetValueSync(), TStringBuilder() << "CopyTable, src: " << src.Quote() << " dst: " << dst.Quote()); - LOG_DEBUG("Table is copied, src: " << src.Quote() << " dst: " << dst.Quote()); -} - -void CopyTables(TDriver driver, const TVector<NTable::TCopyItem>& tablesToCopy) { - NTable::TTableClient client(driver); - - TStatus status = client.RetryOperationSync([&tablesToCopy](NTable::TSession session) { - auto result = session.CopyTables(tablesToCopy).GetValueSync(); - - return result; - }); - - // Debug print - TStringStream tablesStr; - bool needsComma = false; - for (const auto& copyItem : tablesToCopy) { - if (needsComma) { - tablesStr << ", "; - } else { - needsComma = true; - } - tablesStr << "{ src# " << copyItem.SourcePath() << ", dst# " << copyItem.DestinationPath().Quote() << "}"; - } - - VerifyStatus(status, TStringBuilder() << "CopyTables error, tables to be copied# " << tablesStr.Str()); - LOG_DEBUG("Tables are copied, " << tablesStr.Str()); -} - -void DropTable(TDriver driver, const TString& path) { - NTable::TTableClient client(driver); - TStatus status = client.RetryOperationSync([path](NTable::TSession session) { - auto result = session.DropTable(path).GetValueSync(); - - return result; - }); - VerifyStatus(status, TStringBuilder() << "DropTable, path" << path.Quote()); - LOG_DEBUG("Table is dropped, path: " << path.Quote()); -} - -void BackupTable(TDriver driver, const TString& dbPrefix, const TString& backupPrefix, const TString& path, + return proto; +} + +TAsyncStatus CopyTableAsyncStart(TDriver driver, const TString& src, const TString& dst) { + NTable::TTableClient client(driver); + + return client.RetryOperation([src, dst](NTable::TSession session) { + auto result = session.CopyTable(src, dst); + + return result; + }); +} + +void CopyTableAsyncFinish(const TAsyncStatus& status, const TString& src, const TString& dst) { + VerifyStatus(status.GetValueSync(), TStringBuilder() << "CopyTable, src: " << src.Quote() << " dst: " << dst.Quote()); + LOG_DEBUG("Table is copied, src: " << src.Quote() << " dst: " << dst.Quote()); +} + +void CopyTables(TDriver driver, const TVector<NTable::TCopyItem>& tablesToCopy) { + NTable::TTableClient client(driver); + + TStatus status = client.RetryOperationSync([&tablesToCopy](NTable::TSession session) { + auto result = session.CopyTables(tablesToCopy).GetValueSync(); + + return result; + }); + + // Debug print + TStringStream tablesStr; + bool needsComma = false; + for (const auto& copyItem : tablesToCopy) { + if (needsComma) { + tablesStr << ", "; + } else { + needsComma = true; + } + tablesStr << "{ src# " << copyItem.SourcePath() << ", dst# " << copyItem.DestinationPath().Quote() << "}"; + } + + VerifyStatus(status, TStringBuilder() << "CopyTables error, tables to be copied# " << tablesStr.Str()); + LOG_DEBUG("Tables are copied, " << tablesStr.Str()); +} + +void DropTable(TDriver driver, const TString& path) { + NTable::TTableClient client(driver); + TStatus status = client.RetryOperationSync([path](NTable::TSession session) { + auto result = session.DropTable(path).GetValueSync(); + + return result; + }); + VerifyStatus(status, TStringBuilder() << "DropTable, path" << path.Quote()); + LOG_DEBUG("Table is dropped, path: " << path.Quote()); +} + +void BackupTable(TDriver driver, const TString& dbPrefix, const TString& backupPrefix, const TString& path, const TFsPath& folderPath, bool schemaOnly, bool preservePoolKinds) { - Y_ENSURE(!path.empty()); - Y_ENSURE(path.back() != '/', path.Quote() << " path contains / in the end"); - - LOG_DEBUG("Going to backup table, dbPrefix: " << dbPrefix - << " backupPrefix: " << backupPrefix << " path: " << path); - - auto desc = DescribeTable(driver, JoinDatabasePath(schemaOnly ? dbPrefix : backupPrefix, path)); + Y_ENSURE(!path.empty()); + Y_ENSURE(path.back() != '/', path.Quote() << " path contains / in the end"); + + LOG_DEBUG("Going to backup table, dbPrefix: " << dbPrefix + << " backupPrefix: " << backupPrefix << " path: " << path); + + auto desc = DescribeTable(driver, JoinDatabasePath(schemaOnly ? dbPrefix : backupPrefix, path)); auto proto = ProtoFromTableDescription(desc, preservePoolKinds); - - TString schemaStr; - google::protobuf::TextFormat::PrintToString(proto, &schemaStr); - LOG_DEBUG("CreateTableRequest.proto: " << schemaStr); - TFile outFile(folderPath.Child(SCHEME_FILE_NAME), CreateAlways | WrOnly); - outFile.Write(schemaStr.data(), schemaStr.size()); - - if (!schemaOnly) { - const TString pathToTemporal = JoinDatabasePath(backupPrefix, path); - ReadTable(driver, desc, pathToTemporal, folderPath); - } -} - -void CreateClusterDirectory(const TDriver& driver, const TString& path) { - NScheme::TSchemeClient client(driver); - TStatus status = client.MakeDirectory(path).GetValueSync(); - VerifyStatus(status, TStringBuilder() << "MakeDirectory, path: " << path.Quote()); - LOG_DEBUG("Directory is created, path: " << path.Quote()); -} - -void RemoveClusterDirectory(const TDriver& driver, const TString& path) { - NScheme::TSchemeClient client(driver); - TStatus status = client.RemoveDirectory(path).GetValueSync(); - VerifyStatus(status, TStringBuilder() << "RemoveDirectory, path: " << path.Quote()); - LOG_DEBUG("Directory is removed, path: " << path.Quote()); -} - + + TString schemaStr; + google::protobuf::TextFormat::PrintToString(proto, &schemaStr); + LOG_DEBUG("CreateTableRequest.proto: " << schemaStr); + TFile outFile(folderPath.Child(SCHEME_FILE_NAME), CreateAlways | WrOnly); + outFile.Write(schemaStr.data(), schemaStr.size()); + + if (!schemaOnly) { + const TString pathToTemporal = JoinDatabasePath(backupPrefix, path); + ReadTable(driver, desc, pathToTemporal, folderPath); + } +} + +void CreateClusterDirectory(const TDriver& driver, const TString& path) { + NScheme::TSchemeClient client(driver); + TStatus status = client.MakeDirectory(path).GetValueSync(); + VerifyStatus(status, TStringBuilder() << "MakeDirectory, path: " << path.Quote()); + LOG_DEBUG("Directory is created, path: " << path.Quote()); +} + +void RemoveClusterDirectory(const TDriver& driver, const TString& path) { + NScheme::TSchemeClient client(driver); + TStatus status = client.RemoveDirectory(path).GetValueSync(); + VerifyStatus(status, TStringBuilder() << "RemoveDirectory, path: " << path.Quote()); + LOG_DEBUG("Directory is removed, path: " << path.Quote()); +} + void RemoveClusterDirectoryRecursive(const TDriver& driver, const TString& path) { NScheme::TSchemeClient schemeClient(driver); NTable::TTableClient tableClient(driver); @@ -523,104 +523,104 @@ static bool IsExcluded(const TString& path, const TVector<TRegExMatch>& exclusio return false; } -void BackupFolderImpl(TDriver driver, const TString& dbPrefix, const TString& backupPrefix, TString path, +void BackupFolderImpl(TDriver driver, const TString& dbPrefix, const TString& backupPrefix, TString path, const TFsPath folderPath, const TVector<TRegExMatch>& exclusionPatterns, bool schemaOnly, bool useConsistentCopyTable, bool avoidCopy, bool preservePoolKinds) { - LOG_DEBUG("Going to backup folder/table, dbPrefix: " << dbPrefix << " path: " << path); - TFile(folderPath.Child(INCOMPLETE_FILE_NAME), CreateAlways); - - TMap<TString, TAsyncStatus> copiedTablesStatuses; - TVector<NTable::TCopyItem> tablesToCopy; - // Copy all tables to temporal folder - { - TDbIterator<ETraverseType::Preordering> dbIt(driver, dbPrefix); - while (dbIt) { + LOG_DEBUG("Going to backup folder/table, dbPrefix: " << dbPrefix << " path: " << path); + TFile(folderPath.Child(INCOMPLETE_FILE_NAME), CreateAlways); + + TMap<TString, TAsyncStatus> copiedTablesStatuses; + TVector<NTable::TCopyItem> tablesToCopy; + // Copy all tables to temporal folder + { + TDbIterator<ETraverseType::Preordering> dbIt(driver, dbPrefix); + while (dbIt) { if (IsExcluded(dbIt.GetFullPath(), exclusionPatterns)) { LOG_DEBUG("skip path# " << dbIt.GetFullPath()); dbIt.Next(); continue; } - TFsPath childFolderPath = folderPath.Child(dbIt.GetRelPath()); - LOG_DEBUG("path to backup# " << childFolderPath.GetPath()); - childFolderPath.MkDir(); - TFile(childFolderPath.Child(INCOMPLETE_FILE_NAME), CreateAlways).Close(); - if (schemaOnly) { - if (dbIt.IsTable()) { - BackupTable(driver, dbIt.GetTraverseRoot(), backupPrefix, dbIt.GetRelPath(), + TFsPath childFolderPath = folderPath.Child(dbIt.GetRelPath()); + LOG_DEBUG("path to backup# " << childFolderPath.GetPath()); + childFolderPath.MkDir(); + TFile(childFolderPath.Child(INCOMPLETE_FILE_NAME), CreateAlways).Close(); + if (schemaOnly) { + if (dbIt.IsTable()) { + BackupTable(driver, dbIt.GetTraverseRoot(), backupPrefix, dbIt.GetRelPath(), childFolderPath, schemaOnly, preservePoolKinds); - childFolderPath.Child(INCOMPLETE_FILE_NAME).DeleteIfExists(); - } + childFolderPath.Child(INCOMPLETE_FILE_NAME).DeleteIfExists(); + } } else if (!avoidCopy) { - if (dbIt.IsTable()) { - const TString tmpTablePath = JoinDatabasePath(backupPrefix, dbIt.GetRelPath()); - if (useConsistentCopyTable) { - tablesToCopy.emplace_back(dbIt.GetFullPath(), tmpTablePath); - } else { - auto status = CopyTableAsyncStart(driver, dbIt.GetFullPath(), tmpTablePath); - copiedTablesStatuses.emplace(dbIt.GetFullPath(), std::move(status)); - } + if (dbIt.IsTable()) { + const TString tmpTablePath = JoinDatabasePath(backupPrefix, dbIt.GetRelPath()); + if (useConsistentCopyTable) { + tablesToCopy.emplace_back(dbIt.GetFullPath(), tmpTablePath); + } else { + auto status = CopyTableAsyncStart(driver, dbIt.GetFullPath(), tmpTablePath); + copiedTablesStatuses.emplace(dbIt.GetFullPath(), std::move(status)); + } } else if (dbIt.IsDir()) { - CreateClusterDirectory(driver, JoinDatabasePath(backupPrefix, dbIt.GetRelPath())); - } - } - dbIt.Next(); - } - } - - if (schemaOnly) { - TDbIterator<ETraverseType::Postordering> dbIt(driver, dbPrefix); - while (dbIt) { + CreateClusterDirectory(driver, JoinDatabasePath(backupPrefix, dbIt.GetRelPath())); + } + } + dbIt.Next(); + } + } + + if (schemaOnly) { + TDbIterator<ETraverseType::Postordering> dbIt(driver, dbPrefix); + while (dbIt) { if (IsExcluded(dbIt.GetFullPath(), exclusionPatterns)) { dbIt.Next(); continue; } - TFsPath childFolderPath = folderPath.Child(dbIt.GetRelPath()); - if (dbIt.IsTable()) { - // If table backup was not successful exception should be thrown, - // so control flow can't reach this line. Check it just to be sure - Y_ENSURE(!childFolderPath.Child(INCOMPLETE_FILE_NAME).Exists()); + TFsPath childFolderPath = folderPath.Child(dbIt.GetRelPath()); + if (dbIt.IsTable()) { + // If table backup was not successful exception should be thrown, + // so control flow can't reach this line. Check it just to be sure + Y_ENSURE(!childFolderPath.Child(INCOMPLETE_FILE_NAME).Exists()); } else if (dbIt.IsDir()) { - childFolderPath.Child(INCOMPLETE_FILE_NAME).DeleteIfExists(); + childFolderPath.Child(INCOMPLETE_FILE_NAME).DeleteIfExists(); TVector<TString> children; childFolderPath.ListNames(children); if (children.empty()) { TFile(childFolderPath.Child(EMPTY_FILE_NAME), CreateAlways); } - } + } childFolderPath.Child(INCOMPLETE_FILE_NAME).DeleteIfExists(); - dbIt.Next(); - } - folderPath.Child(INCOMPLETE_FILE_NAME).DeleteIfExists(); - return; - } - + dbIt.Next(); + } + folderPath.Child(INCOMPLETE_FILE_NAME).DeleteIfExists(); + return; + } + if (useConsistentCopyTable && !avoidCopy && tablesToCopy) { - CopyTables(driver, tablesToCopy); - } - // Read all tables from temporal folder and delete them - { - TDbIterator<ETraverseType::Postordering> dbIt(driver, dbPrefix); - while (dbIt) { + CopyTables(driver, tablesToCopy); + } + // Read all tables from temporal folder and delete them + { + TDbIterator<ETraverseType::Postordering> dbIt(driver, dbPrefix); + while (dbIt) { if (IsExcluded(dbIt.GetFullPath(), exclusionPatterns)) { dbIt.Next(); continue; } - TFsPath childFolderPath = folderPath.Child(dbIt.GetRelPath()); - const TString tmpTablePath = JoinDatabasePath(backupPrefix, dbIt.GetRelPath()); + TFsPath childFolderPath = folderPath.Child(dbIt.GetRelPath()); + const TString tmpTablePath = JoinDatabasePath(backupPrefix, dbIt.GetRelPath()); - if (dbIt.IsTable()) { + if (dbIt.IsTable()) { if (!useConsistentCopyTable && !avoidCopy) { - // CopyTableAsyncFinish(const TAsyncStatus& status, const TString& src, const TString& dst); - Y_ENSURE(copiedTablesStatuses.contains(dbIt.GetFullPath()), - "Table was not copied but going to be backuped, path# " << dbIt.GetFullPath().Quote()); - CopyTableAsyncFinish(copiedTablesStatuses[dbIt.GetFullPath()], dbIt.GetFullPath(), tmpTablePath); - copiedTablesStatuses.erase(dbIt.GetFullPath()); - } + // CopyTableAsyncFinish(const TAsyncStatus& status, const TString& src, const TString& dst); + Y_ENSURE(copiedTablesStatuses.contains(dbIt.GetFullPath()), + "Table was not copied but going to be backuped, path# " << dbIt.GetFullPath().Quote()); + CopyTableAsyncFinish(copiedTablesStatuses[dbIt.GetFullPath()], dbIt.GetFullPath(), tmpTablePath); + copiedTablesStatuses.erase(dbIt.GetFullPath()); + } BackupTable(driver, dbIt.GetTraverseRoot(), avoidCopy ? dbIt.GetTraverseRoot() : backupPrefix, dbIt.GetRelPath(), childFolderPath, schemaOnly, preservePoolKinds); if (!avoidCopy) { @@ -638,43 +638,43 @@ void BackupFolderImpl(TDriver driver, const TString& dbPrefix, const TString& ba if (!avoidCopy) { RemoveClusterDirectory(driver, tmpTablePath); } - } - - childFolderPath.Child(INCOMPLETE_FILE_NAME).DeleteIfExists(); - dbIt.Next(); - } - } - Y_ENSURE(copiedTablesStatuses.empty(), "Some tables was copied but not backuped, example of such table, path# " - << copiedTablesStatuses.begin()->first.Quote()); - folderPath.Child(INCOMPLETE_FILE_NAME).DeleteIfExists(); -} - -void CheckedCreateBackupFolder(const TFsPath& folderPath) { - const bool exists = folderPath.Exists(); - if (exists) { - TVector<TString> children; - folderPath.ListNames(children); - Y_ENSURE(children.empty(), "backup folder: " << folderPath.GetPath().Quote() - << " should either not exists or be empty"); - } else { - folderPath.MkDir(); - } - LOG_DEBUG("Going to backup into folder: " << folderPath.RealPath().GetPath().Quote()); -} - -// relDbPath - relative path to directory/table to be backuped -// folderPath - relative path to folder in local filesystem where backup will be stored -void BackupFolder(TDriver driver, const TString& database, const TString& relDbPath, TFsPath folderPath, + } + + childFolderPath.Child(INCOMPLETE_FILE_NAME).DeleteIfExists(); + dbIt.Next(); + } + } + Y_ENSURE(copiedTablesStatuses.empty(), "Some tables was copied but not backuped, example of such table, path# " + << copiedTablesStatuses.begin()->first.Quote()); + folderPath.Child(INCOMPLETE_FILE_NAME).DeleteIfExists(); +} + +void CheckedCreateBackupFolder(const TFsPath& folderPath) { + const bool exists = folderPath.Exists(); + if (exists) { + TVector<TString> children; + folderPath.ListNames(children); + Y_ENSURE(children.empty(), "backup folder: " << folderPath.GetPath().Quote() + << " should either not exists or be empty"); + } else { + folderPath.MkDir(); + } + LOG_DEBUG("Going to backup into folder: " << folderPath.RealPath().GetPath().Quote()); +} + +// relDbPath - relative path to directory/table to be backuped +// folderPath - relative path to folder in local filesystem where backup will be stored +void BackupFolder(TDriver driver, const TString& database, const TString& relDbPath, TFsPath folderPath, const TVector<TRegExMatch>& exclusionPatterns, bool schemaOnly, bool useConsistentCopyTable, bool avoidCopy, bool savePartialResult, bool preservePoolKinds) { - TString temporalBackupPostfix = CreateTemporalBackupName(); - if (!folderPath) { - folderPath = temporalBackupPostfix; - } - CheckedCreateBackupFolder(folderPath); - - // full path to temporal directory in database - TString tmpDbFolder; + TString temporalBackupPostfix = CreateTemporalBackupName(); + if (!folderPath) { + folderPath = temporalBackupPostfix; + } + CheckedCreateBackupFolder(folderPath); + + // full path to temporal directory in database + TString tmpDbFolder; try { if (!schemaOnly && !avoidCopy) { // Create temporal folder in database's root directory @@ -696,292 +696,292 @@ void BackupFolder(TDriver driver, const TString& database, const TString& relDbP folderPath.ForceDelete(); } throw; - } + } if (!schemaOnly && !avoidCopy) { RemoveClusterDirectoryRecursive(driver, tmpDbFolder); - } -} - -//////////////////////////////////////////////////////////////////////////////// -// Restore -//////////////////////////////////////////////////////////////////////////////// - -TString ProcessColumnType(const TString& name, TTypeParser parser, NTable::TTableBuilder *builder) { - TStringStream ss; - ss << "name: " << name << "; "; - if (parser.GetKind() == TTypeParser::ETypeKind::Optional) { - ss << " optional; "; - parser.OpenOptional(); - } - ss << "kind: " << parser.GetKind() << "; "; - switch (parser.GetKind()) { - case TTypeParser::ETypeKind::Primitive: - ss << " type_id: " << parser.GetPrimitive() << "; "; - if (builder) { - builder->AddNullableColumn(name, parser.GetPrimitive()); - } - break; - case TTypeParser::ETypeKind::Decimal: - ss << " decimal_type: {" - << " precision: " << ui32(parser.GetDecimal().Precision) - << " scale: " << ui32(parser.GetDecimal().Scale) - << "}; "; - if (builder) { - builder->AddNullableColumn(name, parser.GetDecimal()); - } - break; - default: - Y_ENSURE(false, "Unexpected type kind# " << parser.GetKind() << " for column name# " << name.Quote()); - } - return ss.Str(); -} - -NTable::TTableDescription TableDescriptionFromProto(const Ydb::Table::CreateTableRequest& proto) { - NTable::TTableBuilder builder; - - for (const auto &col : proto.Getcolumns()) { - LOG_DEBUG("AddNullableColumn: " << ProcessColumnType(col.Getname(), TType(col.Gettype()), &builder)); - } - - for (const auto &primary : proto.Getprimary_key()) { - LOG_DEBUG("SetPrimaryKeyColumn: name: " << primary); - } - builder.SetPrimaryKeyColumns({proto.Getprimary_key().cbegin(), proto.Getprimary_key().cend()}); - - return builder.Build(); -} - -NTable::TTableDescription TableDescriptionFromFile(const TString& filePath) { - TFile file(filePath, OpenExisting | RdOnly); - TString str = TString::Uninitialized(file.GetLength()); - file.Read(str.Detach(), file.GetLength()); - - Ydb::Table::CreateTableRequest proto; - google::protobuf::TextFormat::ParseFromString(str, &proto); - return TableDescriptionFromProto(proto); -} - -TString SerializeColumnsToString(const TVector<TColumn>& columns, TVector<TString> primary) { - Sort(primary); - TStringStream ss; - for (const auto& col : columns) { - ss << " "; - if (BinarySearch(primary.cbegin(), primary.cend(), col.Name)) { - ss << "primary; "; - } - ss << ProcessColumnType(col.Name, col.Type, nullptr) << Endl; - } - // Cerr << "Parse column to : " << ss.Str() << Endl; - return ss.Str(); -} - -void CheckTableDescriptionIsSame(const NTable::TTableDescription& backupDesc, - const NTable::TTableDescription& realDesc) { - if (backupDesc.GetColumns() != realDesc.GetColumns() || - backupDesc.GetPrimaryKeyColumns() != realDesc.GetPrimaryKeyColumns()) { - LOG_ERR("Error"); - LOG_ERR("Table scheme from backup:"); - LOG_ERR(SerializeColumnsToString(backupDesc.GetColumns(), backupDesc.GetPrimaryKeyColumns())); - LOG_ERR("Table scheme from database:"); - LOG_ERR(SerializeColumnsToString(realDesc.GetColumns(), realDesc.GetPrimaryKeyColumns())); - } else { - LOG_ERR("Ok"); - } -} - -void UploadDataIntoTable(TDriver driver, const NTable::TTableDescription& tableDesc, const TString& relPath, - const TString& absPath, TFsPath folderPath, const TRestoreFolderParams& params) { - Y_ENSURE(!folderPath.Child(INCOMPLETE_DATA_FILE_NAME).Exists(), - "There is incomplete data file in folder, path# " << TString(folderPath).Quote()); - ui32 fileCounter = 0; - TFsPath dataFileName = folderPath.Child(CreateDataFileName(fileCounter++)); - - if (params.UseBulkUpsert) { - LOG_DEBUG("Going to BulkUpsert into table# " << absPath.Quote()); - } - while (dataFileName.Exists()) { - LOG_DEBUG("Going to read new data file, fileName# " << dataFileName); - - - TUploader::TOptions opts; - if (params.UploadBandwidthBPS) { - opts.Rate = (opts.Interval.Seconds() * params.UploadBandwidthBPS + IO_BUFFER_SIZE - 1) / IO_BUFFER_SIZE; - LOG_DEBUG("Custom bandwidth limit is specified, will use bandwidth# " - << HumanReadableSize(params.UploadBandwidthBPS, SF_BYTES) << "B/s" - << " RPS# " << double(opts.Rate) / opts.Interval.Seconds() << " reqs/s" - << " IO buffer size# " << HumanReadableSize(IO_BUFFER_SIZE, SF_BYTES)); - } - if (params.MaxUploadRps) { - opts.Rate = params.MaxUploadRps * opts.Interval.Seconds(); - } - opts.Rate = Max<ui64>(1, opts.Rate); - - TQueryFromFileIterator it(relPath, dataFileName, tableDesc.GetColumns(), IO_BUFFER_SIZE, params.MaxRowsPerQuery, - params.MaxBytesPerQuery); + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Restore +//////////////////////////////////////////////////////////////////////////////// + +TString ProcessColumnType(const TString& name, TTypeParser parser, NTable::TTableBuilder *builder) { + TStringStream ss; + ss << "name: " << name << "; "; + if (parser.GetKind() == TTypeParser::ETypeKind::Optional) { + ss << " optional; "; + parser.OpenOptional(); + } + ss << "kind: " << parser.GetKind() << "; "; + switch (parser.GetKind()) { + case TTypeParser::ETypeKind::Primitive: + ss << " type_id: " << parser.GetPrimitive() << "; "; + if (builder) { + builder->AddNullableColumn(name, parser.GetPrimitive()); + } + break; + case TTypeParser::ETypeKind::Decimal: + ss << " decimal_type: {" + << " precision: " << ui32(parser.GetDecimal().Precision) + << " scale: " << ui32(parser.GetDecimal().Scale) + << "}; "; + if (builder) { + builder->AddNullableColumn(name, parser.GetDecimal()); + } + break; + default: + Y_ENSURE(false, "Unexpected type kind# " << parser.GetKind() << " for column name# " << name.Quote()); + } + return ss.Str(); +} + +NTable::TTableDescription TableDescriptionFromProto(const Ydb::Table::CreateTableRequest& proto) { + NTable::TTableBuilder builder; + + for (const auto &col : proto.Getcolumns()) { + LOG_DEBUG("AddNullableColumn: " << ProcessColumnType(col.Getname(), TType(col.Gettype()), &builder)); + } + + for (const auto &primary : proto.Getprimary_key()) { + LOG_DEBUG("SetPrimaryKeyColumn: name: " << primary); + } + builder.SetPrimaryKeyColumns({proto.Getprimary_key().cbegin(), proto.Getprimary_key().cend()}); + + return builder.Build(); +} + +NTable::TTableDescription TableDescriptionFromFile(const TString& filePath) { + TFile file(filePath, OpenExisting | RdOnly); + TString str = TString::Uninitialized(file.GetLength()); + file.Read(str.Detach(), file.GetLength()); + + Ydb::Table::CreateTableRequest proto; + google::protobuf::TextFormat::ParseFromString(str, &proto); + return TableDescriptionFromProto(proto); +} + +TString SerializeColumnsToString(const TVector<TColumn>& columns, TVector<TString> primary) { + Sort(primary); + TStringStream ss; + for (const auto& col : columns) { + ss << " "; + if (BinarySearch(primary.cbegin(), primary.cend(), col.Name)) { + ss << "primary; "; + } + ss << ProcessColumnType(col.Name, col.Type, nullptr) << Endl; + } + // Cerr << "Parse column to : " << ss.Str() << Endl; + return ss.Str(); +} + +void CheckTableDescriptionIsSame(const NTable::TTableDescription& backupDesc, + const NTable::TTableDescription& realDesc) { + if (backupDesc.GetColumns() != realDesc.GetColumns() || + backupDesc.GetPrimaryKeyColumns() != realDesc.GetPrimaryKeyColumns()) { + LOG_ERR("Error"); + LOG_ERR("Table scheme from backup:"); + LOG_ERR(SerializeColumnsToString(backupDesc.GetColumns(), backupDesc.GetPrimaryKeyColumns())); + LOG_ERR("Table scheme from database:"); + LOG_ERR(SerializeColumnsToString(realDesc.GetColumns(), realDesc.GetPrimaryKeyColumns())); + } else { + LOG_ERR("Ok"); + } +} + +void UploadDataIntoTable(TDriver driver, const NTable::TTableDescription& tableDesc, const TString& relPath, + const TString& absPath, TFsPath folderPath, const TRestoreFolderParams& params) { + Y_ENSURE(!folderPath.Child(INCOMPLETE_DATA_FILE_NAME).Exists(), + "There is incomplete data file in folder, path# " << TString(folderPath).Quote()); + ui32 fileCounter = 0; + TFsPath dataFileName = folderPath.Child(CreateDataFileName(fileCounter++)); + + if (params.UseBulkUpsert) { + LOG_DEBUG("Going to BulkUpsert into table# " << absPath.Quote()); + } + while (dataFileName.Exists()) { + LOG_DEBUG("Going to read new data file, fileName# " << dataFileName); + + + TUploader::TOptions opts; + if (params.UploadBandwidthBPS) { + opts.Rate = (opts.Interval.Seconds() * params.UploadBandwidthBPS + IO_BUFFER_SIZE - 1) / IO_BUFFER_SIZE; + LOG_DEBUG("Custom bandwidth limit is specified, will use bandwidth# " + << HumanReadableSize(params.UploadBandwidthBPS, SF_BYTES) << "B/s" + << " RPS# " << double(opts.Rate) / opts.Interval.Seconds() << " reqs/s" + << " IO buffer size# " << HumanReadableSize(IO_BUFFER_SIZE, SF_BYTES)); + } + if (params.MaxUploadRps) { + opts.Rate = params.MaxUploadRps * opts.Interval.Seconds(); + } + opts.Rate = Max<ui64>(1, opts.Rate); + + TQueryFromFileIterator it(relPath, dataFileName, tableDesc.GetColumns(), IO_BUFFER_SIZE, params.MaxRowsPerQuery, + params.MaxBytesPerQuery); NTable::TTableClient client(driver); TUploader uploader(opts, client, it.GetQueryString()); - if (!params.UseBulkUpsert) { - LOG_DEBUG("Query string:\n" << it.GetQueryString()); - } - - while (!it.Empty()) { - bool ok = false; - if (params.UseBulkUpsert) { - ok = uploader.Push(absPath, it.ReadNextGetValue()); - } else { - ok = uploader.Push(it.ReadNextGetParams()); - } - Y_ENSURE(ok, "Error in uploader.Push()"); - } - uploader.WaitAllJobs(); - dataFileName = folderPath.Child(CreateDataFileName(fileCounter++)); - } -} - -void RestoreTable(TDriver driver, const TString& database, const TString& prefix, TFsPath folderPath, - const TRestoreFolderParams& params) { - Y_ENSURE(!folderPath.Child(INCOMPLETE_FILE_NAME).Exists(), - "There is incomplete file in folder, path# " << TString(folderPath).Quote()); - NTable::TTableClient client(driver); - - const TString relPath = JoinDatabasePath(prefix, folderPath.GetName()); - const TString absPath = JoinDatabasePath(database, relPath); - LOG_DEBUG("Restore table from folder: " << folderPath << " in database path# " << absPath.Quote()); - - NTable::TTableDescription tableDesc = TableDescriptionFromFile(folderPath.Child(SCHEME_FILE_NAME)); - - - if (params.OnlyCheck) { - LOG_ERR("Check table: " << absPath.Quote() << "..."); - NTable::TTableDescription tableDescReal = DescribeTable(driver, absPath); - CheckTableDescriptionIsSame(tableDesc, tableDescReal); - } else { - auto timer = GetVerbosity() - ? MakeHolder<TScopedTimer>(TStringBuilder() << "Done restore table# " << absPath.Quote() << " took# ") - : nullptr; - // Create Table - TStatus status = client.RetryOperationSync([absPath, &tableDesc](NTable::TSession session) { - auto result = session.CreateTable(absPath, std::move(tableDesc)).GetValueSync(); - return result; - }); - VerifyStatus(status, TStringBuilder() << "CreateTable on path: " << absPath.Quote()); - LOG_DEBUG("Table is created, path: " << absPath.Quote()); - if (!params.SchemaOnly) { - UploadDataIntoTable(driver, tableDesc, relPath, absPath, folderPath, params); - } - } -} - -void RestoreFolderImpl(TDriver driver, const TString& database, const TString& prefix, TFsPath folderPath, - const TRestoreFolderParams& params) { - LOG_DEBUG("Restore folder: " << folderPath); - Y_ENSURE(folderPath, "folderPath cannot be empty on restore, please specify path to folder containing backup"); - Y_ENSURE(folderPath.IsDirectory(), "Specified folderPath " << folderPath.GetPath().Quote() << " must be a folder"); - Y_ENSURE(!folderPath.Child(INCOMPLETE_FILE_NAME).Exists(), - "There is incomplete file in folder, path# " << TString(folderPath).Quote()); - - if (prefix != "/" && !params.OnlyCheck) { - LOG_DEBUG("Create prefix folder: " << prefix); - NScheme::TSchemeClient client(driver); - TString path = JoinDatabasePath(database, prefix); - TStatus status = client.MakeDirectory(path).GetValueSync(); - VerifyStatus(status, TStringBuilder() << "MakeDirectory on path: " << path.Quote()); - } - - if (folderPath.Child(SCHEME_FILE_NAME).Exists()) { - RestoreTable(driver, database, prefix, folderPath, params); - } else { - TVector<TFsPath> children; - folderPath.List(children); - for (const auto& child : children) { - Y_ENSURE(folderPath.IsDirectory(), "Non directory and non table folder inside backup tree, " - "path: " << child.GetPath().Quote()); - if (child.Child(SCHEME_FILE_NAME).Exists()) { - RestoreTable(driver, database, prefix, child, params); - } else { - RestoreFolderImpl(driver, database, JoinDatabasePath(prefix, child.GetName()), child, params); - } - } - } -} - -static bool IsNamePresentedInDir(NScheme::TListDirectoryResult listResult, const TString& name) { - for (const auto& child : listResult.GetChildren()) { - if (child.Name == name) { - return true; - } - } - return false; -} - -void CheckTablesAbsence(NScheme::TSchemeClient client, const TString& database, const TString& prefix, TFsPath folderPath) { - Y_ENSURE(folderPath, "folderPath cannot be empty on restore, please specify path to folder containing backup"); - Y_ENSURE(folderPath.IsDirectory(), "Specified folderPath " << folderPath.GetPath().Quote() << " must be a folder"); - Y_ENSURE(!folderPath.Child(INCOMPLETE_FILE_NAME).Exists(), - "There is incomplete file in folder, path# " << TString(folderPath).Quote()); - - const TString path = JoinDatabasePath(database, prefix); - TString name = folderPath.GetName(); - - NScheme::TListDirectoryResult listResult = client.ListDirectory(path).GetValueSync(); - VerifyStatus(listResult, TStringBuilder() << "ListDirectory, path: " << path.Quote()); - - const bool isTable = folderPath.Child(SCHEME_FILE_NAME).Exists(); - if (isTable) { - Y_ENSURE(!IsNamePresentedInDir(listResult, name), "Table with name# " << name.Quote() - << " is presented in path# " << path.Quote()); - LOG_DEBUG("\tOk! Table " << name.Quote() << " is absent in database path# " << path.Quote()); - } else { - TVector<TFsPath> children; - folderPath.List(children); - for (const auto& child : children) { - const bool isChildTable = child.Child(SCHEME_FILE_NAME).Exists(); - const TString childName = child.GetName(); - const bool isChildPresented = IsNamePresentedInDir(listResult, childName); - if (isChildTable) { - Y_ENSURE(!isChildPresented, "Table with name# " << childName.Quote() - << " is presented in path# " << path.Quote()); - LOG_DEBUG("\tOk! Table " << childName.Quote() << " is absent in database path# " - << path.Quote()); - } else { - if (isChildPresented) { - LOG_DEBUG("\tOk! Directory " << childName.Quote() << " is presented in database path# " - << path.Quote() << ", so check tables in that dir"); - CheckTablesAbsence(client, database, JoinDatabasePath(prefix, child.GetName()), child); - } else { - LOG_DEBUG("\tOk! Directory " << childName.Quote() << " is absent in database path# " - << path.Quote()); - } - } - } - } -} - -void RestoreFolder(TDriver driver, const TString& database, const TString& prefix, const TFsPath folderPath, - const TRestoreFolderParams& params) { - NScheme::TSchemeClient client(driver); - Y_ENSURE(prefix, "restore prefix cannot be empty, database# " << database.Quote() << " prefix# " << prefix.Quote()); - - if (params.CheckTablesAbsence && !params.OnlyCheck) { - LOG_DEBUG("Check absence of tables to be restored"); - if (prefix != "/") { - TString path = JoinDatabasePath(database, prefix); - TString parent = ParentPathFromDbPath(path); - TString name = NameFromDbPath(path); - LOG_DEBUG("Going to list parent# " << parent.Quote() << " for path path# " << path.Quote()); - NScheme::TListDirectoryResult listResult = client.ListDirectory(parent).GetValueSync(); - VerifyStatus(listResult, TStringBuilder() << "ListDirectory, path# " << parent.Quote()); - if (IsNamePresentedInDir(listResult, name)) { - CheckTablesAbsence(client, database, prefix, folderPath); - } else { - LOG_DEBUG("\tOk! restore directory# " << path.Quote() << " is absent in database"); - } - } else { - CheckTablesAbsence(client, database, prefix, folderPath); - } - LOG_DEBUG("Check done, everything is Ok"); - } - RestoreFolderImpl(driver, database, prefix, folderPath, params); -} - -} // NYdb::NBackup + if (!params.UseBulkUpsert) { + LOG_DEBUG("Query string:\n" << it.GetQueryString()); + } + + while (!it.Empty()) { + bool ok = false; + if (params.UseBulkUpsert) { + ok = uploader.Push(absPath, it.ReadNextGetValue()); + } else { + ok = uploader.Push(it.ReadNextGetParams()); + } + Y_ENSURE(ok, "Error in uploader.Push()"); + } + uploader.WaitAllJobs(); + dataFileName = folderPath.Child(CreateDataFileName(fileCounter++)); + } +} + +void RestoreTable(TDriver driver, const TString& database, const TString& prefix, TFsPath folderPath, + const TRestoreFolderParams& params) { + Y_ENSURE(!folderPath.Child(INCOMPLETE_FILE_NAME).Exists(), + "There is incomplete file in folder, path# " << TString(folderPath).Quote()); + NTable::TTableClient client(driver); + + const TString relPath = JoinDatabasePath(prefix, folderPath.GetName()); + const TString absPath = JoinDatabasePath(database, relPath); + LOG_DEBUG("Restore table from folder: " << folderPath << " in database path# " << absPath.Quote()); + + NTable::TTableDescription tableDesc = TableDescriptionFromFile(folderPath.Child(SCHEME_FILE_NAME)); + + + if (params.OnlyCheck) { + LOG_ERR("Check table: " << absPath.Quote() << "..."); + NTable::TTableDescription tableDescReal = DescribeTable(driver, absPath); + CheckTableDescriptionIsSame(tableDesc, tableDescReal); + } else { + auto timer = GetVerbosity() + ? MakeHolder<TScopedTimer>(TStringBuilder() << "Done restore table# " << absPath.Quote() << " took# ") + : nullptr; + // Create Table + TStatus status = client.RetryOperationSync([absPath, &tableDesc](NTable::TSession session) { + auto result = session.CreateTable(absPath, std::move(tableDesc)).GetValueSync(); + return result; + }); + VerifyStatus(status, TStringBuilder() << "CreateTable on path: " << absPath.Quote()); + LOG_DEBUG("Table is created, path: " << absPath.Quote()); + if (!params.SchemaOnly) { + UploadDataIntoTable(driver, tableDesc, relPath, absPath, folderPath, params); + } + } +} + +void RestoreFolderImpl(TDriver driver, const TString& database, const TString& prefix, TFsPath folderPath, + const TRestoreFolderParams& params) { + LOG_DEBUG("Restore folder: " << folderPath); + Y_ENSURE(folderPath, "folderPath cannot be empty on restore, please specify path to folder containing backup"); + Y_ENSURE(folderPath.IsDirectory(), "Specified folderPath " << folderPath.GetPath().Quote() << " must be a folder"); + Y_ENSURE(!folderPath.Child(INCOMPLETE_FILE_NAME).Exists(), + "There is incomplete file in folder, path# " << TString(folderPath).Quote()); + + if (prefix != "/" && !params.OnlyCheck) { + LOG_DEBUG("Create prefix folder: " << prefix); + NScheme::TSchemeClient client(driver); + TString path = JoinDatabasePath(database, prefix); + TStatus status = client.MakeDirectory(path).GetValueSync(); + VerifyStatus(status, TStringBuilder() << "MakeDirectory on path: " << path.Quote()); + } + + if (folderPath.Child(SCHEME_FILE_NAME).Exists()) { + RestoreTable(driver, database, prefix, folderPath, params); + } else { + TVector<TFsPath> children; + folderPath.List(children); + for (const auto& child : children) { + Y_ENSURE(folderPath.IsDirectory(), "Non directory and non table folder inside backup tree, " + "path: " << child.GetPath().Quote()); + if (child.Child(SCHEME_FILE_NAME).Exists()) { + RestoreTable(driver, database, prefix, child, params); + } else { + RestoreFolderImpl(driver, database, JoinDatabasePath(prefix, child.GetName()), child, params); + } + } + } +} + +static bool IsNamePresentedInDir(NScheme::TListDirectoryResult listResult, const TString& name) { + for (const auto& child : listResult.GetChildren()) { + if (child.Name == name) { + return true; + } + } + return false; +} + +void CheckTablesAbsence(NScheme::TSchemeClient client, const TString& database, const TString& prefix, TFsPath folderPath) { + Y_ENSURE(folderPath, "folderPath cannot be empty on restore, please specify path to folder containing backup"); + Y_ENSURE(folderPath.IsDirectory(), "Specified folderPath " << folderPath.GetPath().Quote() << " must be a folder"); + Y_ENSURE(!folderPath.Child(INCOMPLETE_FILE_NAME).Exists(), + "There is incomplete file in folder, path# " << TString(folderPath).Quote()); + + const TString path = JoinDatabasePath(database, prefix); + TString name = folderPath.GetName(); + + NScheme::TListDirectoryResult listResult = client.ListDirectory(path).GetValueSync(); + VerifyStatus(listResult, TStringBuilder() << "ListDirectory, path: " << path.Quote()); + + const bool isTable = folderPath.Child(SCHEME_FILE_NAME).Exists(); + if (isTable) { + Y_ENSURE(!IsNamePresentedInDir(listResult, name), "Table with name# " << name.Quote() + << " is presented in path# " << path.Quote()); + LOG_DEBUG("\tOk! Table " << name.Quote() << " is absent in database path# " << path.Quote()); + } else { + TVector<TFsPath> children; + folderPath.List(children); + for (const auto& child : children) { + const bool isChildTable = child.Child(SCHEME_FILE_NAME).Exists(); + const TString childName = child.GetName(); + const bool isChildPresented = IsNamePresentedInDir(listResult, childName); + if (isChildTable) { + Y_ENSURE(!isChildPresented, "Table with name# " << childName.Quote() + << " is presented in path# " << path.Quote()); + LOG_DEBUG("\tOk! Table " << childName.Quote() << " is absent in database path# " + << path.Quote()); + } else { + if (isChildPresented) { + LOG_DEBUG("\tOk! Directory " << childName.Quote() << " is presented in database path# " + << path.Quote() << ", so check tables in that dir"); + CheckTablesAbsence(client, database, JoinDatabasePath(prefix, child.GetName()), child); + } else { + LOG_DEBUG("\tOk! Directory " << childName.Quote() << " is absent in database path# " + << path.Quote()); + } + } + } + } +} + +void RestoreFolder(TDriver driver, const TString& database, const TString& prefix, const TFsPath folderPath, + const TRestoreFolderParams& params) { + NScheme::TSchemeClient client(driver); + Y_ENSURE(prefix, "restore prefix cannot be empty, database# " << database.Quote() << " prefix# " << prefix.Quote()); + + if (params.CheckTablesAbsence && !params.OnlyCheck) { + LOG_DEBUG("Check absence of tables to be restored"); + if (prefix != "/") { + TString path = JoinDatabasePath(database, prefix); + TString parent = ParentPathFromDbPath(path); + TString name = NameFromDbPath(path); + LOG_DEBUG("Going to list parent# " << parent.Quote() << " for path path# " << path.Quote()); + NScheme::TListDirectoryResult listResult = client.ListDirectory(parent).GetValueSync(); + VerifyStatus(listResult, TStringBuilder() << "ListDirectory, path# " << parent.Quote()); + if (IsNamePresentedInDir(listResult, name)) { + CheckTablesAbsence(client, database, prefix, folderPath); + } else { + LOG_DEBUG("\tOk! restore directory# " << path.Quote() << " is absent in database"); + } + } else { + CheckTablesAbsence(client, database, prefix, folderPath); + } + LOG_DEBUG("Check done, everything is Ok"); + } + RestoreFolderImpl(driver, database, prefix, folderPath, params); +} + +} // NYdb::NBackup diff --git a/ydb/library/backup/backup.h b/ydb/library/backup/backup.h index a0e7eb21c4..269f96d9b9 100644 --- a/ydb/library/backup/backup.h +++ b/ydb/library/backup/backup.h @@ -1,70 +1,70 @@ -#pragma once - +#pragma once + #include <ydb/public/sdk/cpp/client/ydb_driver/driver.h> #include <ydb/public/sdk/cpp/client/ydb_table/table.h> #include <ydb/public/sdk/cpp/client/ydb_value/value.h> - + #include <library/cpp/regex/pcre/regexp.h> -#include <util/folder/path.h> -#include <util/generic/singleton.h> -#include <util/stream/output.h> -#include <util/system/file.h> - -#include "util.h" - -namespace NYdb { -namespace NBackup { - -class TYdbErrorException : public yexception { -public: - TStatus Status; - - TYdbErrorException(const TStatus& status) - : Status(status) {} - - void LogToStderr() const { - LOG_ERR("Ydb error, status# " << Status.GetStatus()); - if (what()) { - LOG_ERR("\t" << "What# " << what()); - } - LOG_ERR("\t" << Status.GetIssues().ToString()); - } -}; - -void BackupFolder(TDriver driver, const TString& database, const TString& relDbPath, TFsPath folderPath, +#include <util/folder/path.h> +#include <util/generic/singleton.h> +#include <util/stream/output.h> +#include <util/system/file.h> + +#include "util.h" + +namespace NYdb { +namespace NBackup { + +class TYdbErrorException : public yexception { +public: + TStatus Status; + + TYdbErrorException(const TStatus& status) + : Status(status) {} + + void LogToStderr() const { + LOG_ERR("Ydb error, status# " << Status.GetStatus()); + if (what()) { + LOG_ERR("\t" << "What# " << what()); + } + LOG_ERR("\t" << Status.GetIssues().ToString()); + } +}; + +void BackupFolder(TDriver driver, const TString& database, const TString& relDbPath, TFsPath folderPath, const TVector<TRegExMatch>& exclusionPatterns, bool schemaOnly, bool useConsistentCopyTable, bool avoidCopy = false, bool savePartialResult = false, bool preservePoolKinds = false); - -struct TRestoreFolderParams { - bool OnlyCheck = false; - bool SchemaOnly = false; - bool CheckTablesAbsence = true; - //////////////////////////////////////// - // Only one parameters set can be used. Either - ui64 UploadBandwidthBPS = 0; - // or - ui64 MaxRowsPerQuery = 0; - ui64 MaxBytesPerQuery = 0; - ui64 MaxUploadRps = 0; - //////////////////////////////////////// - bool UseBulkUpsert = false; - - bool CheckRps() const { - bool oldBPSLimit = UploadBandwidthBPS > 0; - bool newRpsLimit = MaxRowsPerQuery > 0 || MaxBytesPerQuery > 0 || MaxUploadRps > 0; - return !oldBPSLimit || !newRpsLimit; - } -}; - -void RestoreFolder(TDriver driver, const TString& database, const TString& prefix, const TFsPath folderPath, - const TRestoreFolderParams& params); - -// For unit-tests only -TMaybe<TValue> ProcessResultSet(TStringStream& ss, TResultSetParser resultSetParser, - TFile* dataFile = nullptr, const NTable::TTableDescription* desc = nullptr); -void PrintValue(IOutputStream& out, TValueParser& parser); - -} // NBackup -} // NYdb + +struct TRestoreFolderParams { + bool OnlyCheck = false; + bool SchemaOnly = false; + bool CheckTablesAbsence = true; + //////////////////////////////////////// + // Only one parameters set can be used. Either + ui64 UploadBandwidthBPS = 0; + // or + ui64 MaxRowsPerQuery = 0; + ui64 MaxBytesPerQuery = 0; + ui64 MaxUploadRps = 0; + //////////////////////////////////////// + bool UseBulkUpsert = false; + + bool CheckRps() const { + bool oldBPSLimit = UploadBandwidthBPS > 0; + bool newRpsLimit = MaxRowsPerQuery > 0 || MaxBytesPerQuery > 0 || MaxUploadRps > 0; + return !oldBPSLimit || !newRpsLimit; + } +}; + +void RestoreFolder(TDriver driver, const TString& database, const TString& prefix, const TFsPath folderPath, + const TRestoreFolderParams& params); + +// For unit-tests only +TMaybe<TValue> ProcessResultSet(TStringStream& ss, TResultSetParser resultSetParser, + TFile* dataFile = nullptr, const NTable::TTableDescription* desc = nullptr); +void PrintValue(IOutputStream& out, TValueParser& parser); + +} // NBackup +} // NYdb diff --git a/ydb/library/backup/db_iterator.h b/ydb/library/backup/db_iterator.h index 121385fa77..3abc8ecb99 100644 --- a/ydb/library/backup/db_iterator.h +++ b/ydb/library/backup/db_iterator.h @@ -1,200 +1,200 @@ -#pragma once - +#pragma once + #include <ydb/public/sdk/cpp/client/ydb_table/table.h> #include <ydb/public/sdk/cpp/client/ydb_scheme/scheme.h> - -#include <util/folder/path.h> + +#include <util/folder/path.h> #include <util/generic/deque.h> - -namespace NYdb { - -//////////////////////////////////////////////////////////////////////////////// -// Traverse a directory in a database in DepthFirst order -//////////////////////////////////////////////////////////////////////////////// - -struct TSchemeEntryWithPath { - NScheme::TSchemeEntry Entry; - // Path relative to TraverseRoot - TString RelParentPath; - - bool IsListed; - - TSchemeEntryWithPath(const NScheme::TSchemeEntry& entry, const TString& relParentPath, bool isListed) - : Entry(entry) - , RelParentPath(relParentPath) - , IsListed(isListed) - {} -}; - -enum class ETraverseType { - Preordering, - Postordering, -}; - -template<ETraverseType Ordering> -class TDbIterator { -private: - NScheme::TSchemeClient Client; - - TString TraverseRoot; - TDeque<TSchemeEntryWithPath> NextNodes; - -public: - TDbIterator(TDriver driver, const TString& fullPath) - : Client(driver) - { - NScheme::TListDirectoryResult listResult = Client.ListDirectory(fullPath).GetValueSync(); - Y_ENSURE(listResult.IsSuccess(), "Can't list directory, maybe it doesn't exist, dbPath# " - << fullPath.Quote()); - - if (listResult.GetEntry().Type == NScheme::ESchemeEntryType::Table) { - TPathSplitUnix parentPath(fullPath); - parentPath.pop_back(); - TraverseRoot = parentPath.Reconstruct(); - NextNodes.emplace_front(listResult.GetEntry(), "", true); - } else { - TraverseRoot = fullPath; - for (const auto& x : listResult.GetChildren()) { - NextNodes.emplace_front(x, "", false); - } - - while (NextNodes && IsSkipped()) { - NextNodes.pop_front(); - } - - switch (Ordering) { - case ETraverseType::Preordering: { - break; - } - case ETraverseType::Postordering: { + +namespace NYdb { + +//////////////////////////////////////////////////////////////////////////////// +// Traverse a directory in a database in DepthFirst order +//////////////////////////////////////////////////////////////////////////////// + +struct TSchemeEntryWithPath { + NScheme::TSchemeEntry Entry; + // Path relative to TraverseRoot + TString RelParentPath; + + bool IsListed; + + TSchemeEntryWithPath(const NScheme::TSchemeEntry& entry, const TString& relParentPath, bool isListed) + : Entry(entry) + , RelParentPath(relParentPath) + , IsListed(isListed) + {} +}; + +enum class ETraverseType { + Preordering, + Postordering, +}; + +template<ETraverseType Ordering> +class TDbIterator { +private: + NScheme::TSchemeClient Client; + + TString TraverseRoot; + TDeque<TSchemeEntryWithPath> NextNodes; + +public: + TDbIterator(TDriver driver, const TString& fullPath) + : Client(driver) + { + NScheme::TListDirectoryResult listResult = Client.ListDirectory(fullPath).GetValueSync(); + Y_ENSURE(listResult.IsSuccess(), "Can't list directory, maybe it doesn't exist, dbPath# " + << fullPath.Quote()); + + if (listResult.GetEntry().Type == NScheme::ESchemeEntryType::Table) { + TPathSplitUnix parentPath(fullPath); + parentPath.pop_back(); + TraverseRoot = parentPath.Reconstruct(); + NextNodes.emplace_front(listResult.GetEntry(), "", true); + } else { + TraverseRoot = fullPath; + for (const auto& x : listResult.GetChildren()) { + NextNodes.emplace_front(x, "", false); + } + + while (NextNodes && IsSkipped()) { + NextNodes.pop_front(); + } + + switch (Ordering) { + case ETraverseType::Preordering: { + break; + } + case ETraverseType::Postordering: { while (NextNodes && IsDir() && !IsListed()) { - const TString& fullPath = GetFullPath(); - NScheme::TListDirectoryResult childList = Client.ListDirectory(fullPath).GetValueSync(); - Y_ENSURE(childList.IsSuccess(), "Can't list directory, maybe it doesn't exist, dbPath# " - << fullPath.Quote()); - NextNodes.front().IsListed = true; - - const auto& children = childList.GetChildren(); - if (!children) { - break; - } - const auto& currRelPath = GetRelPath(); - for (const auto& x : children) { - NextNodes.emplace_front(x, currRelPath, false); - } - - while (NextNodes && IsSkipped()) { - NextNodes.pop_front(); - } - } - break; - } - default: - Y_FAIL(); - } - } - } - - const NScheme::TSchemeEntry *GetCurrentNode() const { - Y_ENSURE(NextNodes, "Empty TDbIterator dereference"); - return &NextNodes.front().Entry; - } - - TString GetFullPath() const { - Y_ENSURE(NextNodes, "Empty TDbIterator dereference"); - TPathSplitUnix path(TraverseRoot); - path.AppendComponent(NextNodes.front().RelParentPath); - path.AppendComponent(NextNodes.front().Entry.Name); - return path.Reconstruct(); - } - - TString GetTraverseRoot() const { - return TraverseRoot; - } - - TString GetRelParentPath() const { - Y_ENSURE(NextNodes, "Empty TDbIterator dereference"); - return NextNodes.front().RelParentPath; - } - - TString GetRelPath() const { - Y_ENSURE(NextNodes, "Empty TDbIterator dereference"); - TPathSplitUnix path(NextNodes.front().RelParentPath); - path.AppendComponent(NextNodes.front().Entry.Name); - return path.Reconstruct(); - } - - bool IsTable() const { - return GetCurrentNode()->Type == NScheme::ESchemeEntryType::Table; - } - - bool IsDir() const { - return GetCurrentNode()->Type == NScheme::ESchemeEntryType::Directory; - } - - bool IsListed() const { - return NextNodes.front().IsListed; - } - - explicit operator bool() const { - return bool{NextNodes}; - } - - bool IsSkipped() const { - return IsDir() && GetCurrentNode()->Name.StartsWith("~") || GetCurrentNode()->Name.StartsWith(".sys"); - } - - void Next() { - switch (Ordering) { - case ETraverseType::Preordering: { - if (IsDir()) { - NScheme::TListDirectoryResult listResult = Client.ListDirectory(GetFullPath()).GetValueSync(); - Y_ENSURE(listResult.IsSuccess(), "Can't list directory, maybe it doesn't exist, dbPath# " - << GetFullPath().Quote()); - - for (const auto& x : listResult.GetChildren()) { - NextNodes.emplace_back(x, GetRelPath(), false); - } - } - NextNodes.pop_front(); - - while (NextNodes && IsSkipped()) { - NextNodes.pop_front(); - } - break; - } - case ETraverseType::Postordering: { - if (!IsDir() || IsListed()) { - NextNodes.pop_front(); - } - while (NextNodes && IsSkipped()) { - NextNodes.pop_front(); - } - if (!NextNodes) { - return; - } - - while (IsDir() && !IsListed()) { - const TString& fullPath = GetFullPath(); - NScheme::TListDirectoryResult listResult = Client.ListDirectory(fullPath).GetValueSync(); - Y_ENSURE(listResult.IsSuccess(), "Can't list directory, maybe it doesn't exist, dbPath# " - << fullPath.Quote()); - const auto& currRelPath = GetRelPath(); - NextNodes.front().IsListed = true; - for (const auto& x : listResult.GetChildren()) { - NextNodes.emplace_front(x, currRelPath, false); - } - - while (NextNodes && IsSkipped()) { - NextNodes.pop_front(); - } - } - break; - } - default: - Y_FAIL(); - } - } -}; - -} + const TString& fullPath = GetFullPath(); + NScheme::TListDirectoryResult childList = Client.ListDirectory(fullPath).GetValueSync(); + Y_ENSURE(childList.IsSuccess(), "Can't list directory, maybe it doesn't exist, dbPath# " + << fullPath.Quote()); + NextNodes.front().IsListed = true; + + const auto& children = childList.GetChildren(); + if (!children) { + break; + } + const auto& currRelPath = GetRelPath(); + for (const auto& x : children) { + NextNodes.emplace_front(x, currRelPath, false); + } + + while (NextNodes && IsSkipped()) { + NextNodes.pop_front(); + } + } + break; + } + default: + Y_FAIL(); + } + } + } + + const NScheme::TSchemeEntry *GetCurrentNode() const { + Y_ENSURE(NextNodes, "Empty TDbIterator dereference"); + return &NextNodes.front().Entry; + } + + TString GetFullPath() const { + Y_ENSURE(NextNodes, "Empty TDbIterator dereference"); + TPathSplitUnix path(TraverseRoot); + path.AppendComponent(NextNodes.front().RelParentPath); + path.AppendComponent(NextNodes.front().Entry.Name); + return path.Reconstruct(); + } + + TString GetTraverseRoot() const { + return TraverseRoot; + } + + TString GetRelParentPath() const { + Y_ENSURE(NextNodes, "Empty TDbIterator dereference"); + return NextNodes.front().RelParentPath; + } + + TString GetRelPath() const { + Y_ENSURE(NextNodes, "Empty TDbIterator dereference"); + TPathSplitUnix path(NextNodes.front().RelParentPath); + path.AppendComponent(NextNodes.front().Entry.Name); + return path.Reconstruct(); + } + + bool IsTable() const { + return GetCurrentNode()->Type == NScheme::ESchemeEntryType::Table; + } + + bool IsDir() const { + return GetCurrentNode()->Type == NScheme::ESchemeEntryType::Directory; + } + + bool IsListed() const { + return NextNodes.front().IsListed; + } + + explicit operator bool() const { + return bool{NextNodes}; + } + + bool IsSkipped() const { + return IsDir() && GetCurrentNode()->Name.StartsWith("~") || GetCurrentNode()->Name.StartsWith(".sys"); + } + + void Next() { + switch (Ordering) { + case ETraverseType::Preordering: { + if (IsDir()) { + NScheme::TListDirectoryResult listResult = Client.ListDirectory(GetFullPath()).GetValueSync(); + Y_ENSURE(listResult.IsSuccess(), "Can't list directory, maybe it doesn't exist, dbPath# " + << GetFullPath().Quote()); + + for (const auto& x : listResult.GetChildren()) { + NextNodes.emplace_back(x, GetRelPath(), false); + } + } + NextNodes.pop_front(); + + while (NextNodes && IsSkipped()) { + NextNodes.pop_front(); + } + break; + } + case ETraverseType::Postordering: { + if (!IsDir() || IsListed()) { + NextNodes.pop_front(); + } + while (NextNodes && IsSkipped()) { + NextNodes.pop_front(); + } + if (!NextNodes) { + return; + } + + while (IsDir() && !IsListed()) { + const TString& fullPath = GetFullPath(); + NScheme::TListDirectoryResult listResult = Client.ListDirectory(fullPath).GetValueSync(); + Y_ENSURE(listResult.IsSuccess(), "Can't list directory, maybe it doesn't exist, dbPath# " + << fullPath.Quote()); + const auto& currRelPath = GetRelPath(); + NextNodes.front().IsListed = true; + for (const auto& x : listResult.GetChildren()) { + NextNodes.emplace_front(x, currRelPath, false); + } + + while (NextNodes && IsSkipped()) { + NextNodes.pop_front(); + } + } + break; + } + default: + Y_FAIL(); + } + } +}; + +} diff --git a/ydb/library/backup/query_builder.cpp b/ydb/library/backup/query_builder.cpp index c7249a898d..dd931c82bb 100644 --- a/ydb/library/backup/query_builder.cpp +++ b/ydb/library/backup/query_builder.cpp @@ -1,184 +1,184 @@ -#include "query_builder.h" - -#include "backup.h" - +#include "query_builder.h" + +#include "backup.h" + #include <ydb/library/dynumber/dynumber.h> #include <ydb/public/api/protos/ydb_value.pb.h> #include <ydb/public/sdk/cpp/client/ydb_proto/accessor.h> - -#include <util/string/builder.h> + +#include <util/string/builder.h> #include <library/cpp/string_utils/quote/quote.h> - -namespace NYdb::NBackup { - -static constexpr i64 METERING_ROW_PRECISION = 1024; - -//////////////////////////////////////////////////////////////////////////////// -// TQueryBuilder -//////////////////////////////////////////////////////////////////////////////// - -TString TQueryBuilder::BuildQuery(const TString &path) { - TStringStream query; - query.Reserve(1024); - query << "--!syntax_v1\n"; - query << "DECLARE $items AS List<Struct<"; - - bool needsComma = false; - for (auto& col : Columns) { - if (needsComma) { - query << ", "; - } - query << "'" << col.Name << "'" << ": " << col.Type; - needsComma = true; - } - query << ">>; "; - query << "REPLACE INTO `" << path << "` " - << "SELECT * FROM AS_TABLE($items);"; - - return query.Str(); -} - -template<typename T> -TMaybe<T> TryParse(const TStringBuf& buf) { - if (buf == "null") { - return {}; - } - - T tmp; - TMemoryInput stream(buf); - stream >> tmp; - return tmp; -} - -template<> -TMaybe<TString> TryParse(const TStringBuf& buf) { - if (buf == "null") { - return {}; - } - - Y_ENSURE(buf.Size() >= 1 && buf.front() == '"' && buf.back() == '"', + +namespace NYdb::NBackup { + +static constexpr i64 METERING_ROW_PRECISION = 1024; + +//////////////////////////////////////////////////////////////////////////////// +// TQueryBuilder +//////////////////////////////////////////////////////////////////////////////// + +TString TQueryBuilder::BuildQuery(const TString &path) { + TStringStream query; + query.Reserve(1024); + query << "--!syntax_v1\n"; + query << "DECLARE $items AS List<Struct<"; + + bool needsComma = false; + for (auto& col : Columns) { + if (needsComma) { + query << ", "; + } + query << "'" << col.Name << "'" << ": " << col.Type; + needsComma = true; + } + query << ">>; "; + query << "REPLACE INTO `" << path << "` " + << "SELECT * FROM AS_TABLE($items);"; + + return query.Str(); +} + +template<typename T> +TMaybe<T> TryParse(const TStringBuf& buf) { + if (buf == "null") { + return {}; + } + + T tmp; + TMemoryInput stream(buf); + stream >> tmp; + return tmp; +} + +template<> +TMaybe<TString> TryParse(const TStringBuf& buf) { + if (buf == "null") { + return {}; + } + + Y_ENSURE(buf.Size() >= 1 && buf.front() == '"' && buf.back() == '"', "Source string neither surrounded by quotes nor equals to null, string# " << TString{buf}.Quote()); - TString tmp; - TMemoryInput stream(buf.Data() + 1, buf.Size() - 2); - stream >> tmp; - CGIUnescape(tmp); - return tmp; -} - -template<> -TMaybe<TInstant> TryParse(const TStringBuf& buf) { - if (buf == "null") { - return {}; - } - - return TInstant::ParseIso8601(buf); -} - -template<> -TMaybe<bool> TryParse(const TStringBuf& buf) { - auto tmp = TryParse<ui32>(buf); - if (tmp) { - return *tmp ? true : false; - } else { - return {}; - } -} - -void TQueryBuilder::AddPrimitiveMember(EPrimitiveType type, TStringBuf buf) { - switch (type) { - - case EPrimitiveType::Bool: - Value.OptionalBool(TryParse<bool>(buf)); - break; - - case EPrimitiveType::Int8: - Value.OptionalInt8(TryParse<i32>(buf)); - break; - - case EPrimitiveType::Uint8: - Value.OptionalUint8(TryParse<ui32>(buf)); - break; - - case EPrimitiveType::Int16: - Value.OptionalInt16(TryParse<i32>(buf)); - break; - - case EPrimitiveType::Uint16: - Value.OptionalUint16(TryParse<ui32>(buf)); - break; - - case EPrimitiveType::Int32: - Value.OptionalInt32(TryParse<i32>(buf)); - break; - - case EPrimitiveType::Uint32: - Value.OptionalUint32(TryParse<ui32>(buf)); - break; - - case EPrimitiveType::Int64: - Value.OptionalInt64(TryParse<i64>(buf)); - break; - - case EPrimitiveType::Uint64: - Value.OptionalUint64(TryParse<ui64>(buf)); - break; - - case EPrimitiveType::Float: - Value.OptionalFloat(TryParse<float>(buf)); - break; - - case EPrimitiveType::Double: - Value.OptionalDouble(TryParse<double>(buf)); - break; - - case EPrimitiveType::Date: - Value.OptionalDate(TryParse<TInstant>(buf)); - break; - - case EPrimitiveType::Datetime: - Value.OptionalDatetime(TryParse<TInstant>(buf)); - break; - - case EPrimitiveType::Timestamp: - Value.OptionalTimestamp(TryParse<TInstant>(buf)); - break; - - case EPrimitiveType::Interval: - Value.OptionalInterval(TryParse<i64>(buf)); - break; - - case EPrimitiveType::TzDate: - Value.OptionalTzDate(TryParse<TString>(buf)); - break; - - case EPrimitiveType::TzDatetime: - Value.OptionalTzDatetime(TryParse<TString>(buf)); - break; - - case EPrimitiveType::TzTimestamp: - Value.OptionalTzTimestamp(TryParse<TString>(buf)); - break; - - case EPrimitiveType::String: - Value.OptionalString(TryParse<TString>(buf)); - break; - - case EPrimitiveType::Utf8: - Value.OptionalUtf8(TryParse<TString>(buf)); - break; - - case EPrimitiveType::Yson: - Value.OptionalYson(TryParse<TString>(buf)); - break; - - case EPrimitiveType::Json: - Value.OptionalJson(TryParse<TString>(buf)); - break; - + TString tmp; + TMemoryInput stream(buf.Data() + 1, buf.Size() - 2); + stream >> tmp; + CGIUnescape(tmp); + return tmp; +} + +template<> +TMaybe<TInstant> TryParse(const TStringBuf& buf) { + if (buf == "null") { + return {}; + } + + return TInstant::ParseIso8601(buf); +} + +template<> +TMaybe<bool> TryParse(const TStringBuf& buf) { + auto tmp = TryParse<ui32>(buf); + if (tmp) { + return *tmp ? true : false; + } else { + return {}; + } +} + +void TQueryBuilder::AddPrimitiveMember(EPrimitiveType type, TStringBuf buf) { + switch (type) { + + case EPrimitiveType::Bool: + Value.OptionalBool(TryParse<bool>(buf)); + break; + + case EPrimitiveType::Int8: + Value.OptionalInt8(TryParse<i32>(buf)); + break; + + case EPrimitiveType::Uint8: + Value.OptionalUint8(TryParse<ui32>(buf)); + break; + + case EPrimitiveType::Int16: + Value.OptionalInt16(TryParse<i32>(buf)); + break; + + case EPrimitiveType::Uint16: + Value.OptionalUint16(TryParse<ui32>(buf)); + break; + + case EPrimitiveType::Int32: + Value.OptionalInt32(TryParse<i32>(buf)); + break; + + case EPrimitiveType::Uint32: + Value.OptionalUint32(TryParse<ui32>(buf)); + break; + + case EPrimitiveType::Int64: + Value.OptionalInt64(TryParse<i64>(buf)); + break; + + case EPrimitiveType::Uint64: + Value.OptionalUint64(TryParse<ui64>(buf)); + break; + + case EPrimitiveType::Float: + Value.OptionalFloat(TryParse<float>(buf)); + break; + + case EPrimitiveType::Double: + Value.OptionalDouble(TryParse<double>(buf)); + break; + + case EPrimitiveType::Date: + Value.OptionalDate(TryParse<TInstant>(buf)); + break; + + case EPrimitiveType::Datetime: + Value.OptionalDatetime(TryParse<TInstant>(buf)); + break; + + case EPrimitiveType::Timestamp: + Value.OptionalTimestamp(TryParse<TInstant>(buf)); + break; + + case EPrimitiveType::Interval: + Value.OptionalInterval(TryParse<i64>(buf)); + break; + + case EPrimitiveType::TzDate: + Value.OptionalTzDate(TryParse<TString>(buf)); + break; + + case EPrimitiveType::TzDatetime: + Value.OptionalTzDatetime(TryParse<TString>(buf)); + break; + + case EPrimitiveType::TzTimestamp: + Value.OptionalTzTimestamp(TryParse<TString>(buf)); + break; + + case EPrimitiveType::String: + Value.OptionalString(TryParse<TString>(buf)); + break; + + case EPrimitiveType::Utf8: + Value.OptionalUtf8(TryParse<TString>(buf)); + break; + + case EPrimitiveType::Yson: + Value.OptionalYson(TryParse<TString>(buf)); + break; + + case EPrimitiveType::Json: + Value.OptionalJson(TryParse<TString>(buf)); + break; + case EPrimitiveType::JsonDocument: Value.OptionalJsonDocument(TryParse<TString>(buf)); break; - + case EPrimitiveType::DyNumber: if (buf == "null") { Value.OptionalDyNumber(Nothing()); @@ -188,133 +188,133 @@ void TQueryBuilder::AddPrimitiveMember(EPrimitiveType type, TStringBuf buf) { } break; - case EPrimitiveType::Uuid: - Y_ENSURE(false, TStringBuilder() << "Unexpected Primitive kind while parsing line: " << type); - break; - - } -} - -void TQueryBuilder::AddMemberFromString(const TColumn &col, TStringBuf buf) { - TTypeParser type(col.Type); - Y_ENSURE(type.GetKind() == TTypeParser::ETypeKind::Optional); - type.OpenOptional(); - - Value.AddMember(col.Name); - switch (type.GetKind()) { - case TTypeParser::ETypeKind::Primitive: - AddPrimitiveMember(type.GetPrimitive(), buf); - break; - case TTypeParser::ETypeKind::Decimal: - if (buf == "null") { - Value.EmptyOptional(); - } else { - Value.BeginOptional(); - Value.Decimal(TDecimalValue(TString(buf), type.GetDecimal().Precision, type.GetDecimal().Scale)); - Value.EndOptional(); - } - break; - default: - Y_FAIL(""); - } -} - -void TQueryBuilder::Begin() { - Value.BeginList(); -} - -void TQueryBuilder::AddLine(TStringBuf line) { - Value.AddListItem(); - Value.BeginStruct(); - for (const auto& col : Columns) { - TStringBuf tok = line.NextTok(','); - Y_ENSURE(tok, "Empty token on line"); - AddMemberFromString(col, tok); - } - Value.EndStruct(); -} - -TValue TQueryBuilder::EndAndGetResultingValue() { - Value.EndList(); - return Value.Build(); -} - -TParams TQueryBuilder::EndAndGetResultingParams() { - // TODO: Use that size in bandwidth limit calculation - // const auto& valueProto = TProtoAccessor::GetProto(buildedValue); - // Cout << "Size of valueProto# " << valueProto.ByteSizeLong() << Endl; - TParamsBuilder paramsBuilder; - paramsBuilder.AddParam("$items", EndAndGetResultingValue()); - return paramsBuilder.Build(); -} - -TString TQueryBuilder::GetQueryString() const { - return Query; -} - -//////////////////////////////////////////////////////////////////////////////// -// TQueryFromFileIterator -//////////////////////////////////////////////////////////////////////////////// - -void TQueryFromFileIterator::TryReadNextLines() { - if (!LinesBunch.Empty() || BytesRemaining == 0) { - return; - } - - const auto bytesToRead = Min<i64>(BufferMaxSize, BytesRemaining); - Y_ENSURE(bytesToRead > 0, "There is no more bytes to read!" << - " BufferMaxSize# " << BufferMaxSize << - " BytesRemaining# " << BytesRemaining << - " CurrentOffset# " << CurrentOffset << - " this->Empty()# " << this->Empty()); - IoBuff.resize(bytesToRead); - i64 bytesRead = DataFile.Pread(IoBuff.Detach(), bytesToRead, CurrentOffset); - IoBuff.resize(bytesRead); - size_t newSize = IoBuff.rfind("\n"); - Y_ENSURE(newSize != TString::npos, "Can't find new line symbol in buffer read from file," - " bytesRead# " << bytesRead); - // +1 for newline symbol - newSize += 1; - IoBuff.resize(newSize); - BytesRemaining -= newSize; - CurrentOffset += newSize; - LinesBunch = IoBuff; -} - -template<bool GetValue> -std::conditional_t<GetValue, TValue, TParams> TQueryFromFileIterator::ReadNext() { - TryReadNextLines(); - - TStringBuf line = LinesBunch.NextTok('\n'); - Query.Begin(); - i64 querySizeRows = 0; - i64 querySizeBytes = 0; - while (line || LinesBunch) { + case EPrimitiveType::Uuid: + Y_ENSURE(false, TStringBuilder() << "Unexpected Primitive kind while parsing line: " << type); + break; + + } +} + +void TQueryBuilder::AddMemberFromString(const TColumn &col, TStringBuf buf) { + TTypeParser type(col.Type); + Y_ENSURE(type.GetKind() == TTypeParser::ETypeKind::Optional); + type.OpenOptional(); + + Value.AddMember(col.Name); + switch (type.GetKind()) { + case TTypeParser::ETypeKind::Primitive: + AddPrimitiveMember(type.GetPrimitive(), buf); + break; + case TTypeParser::ETypeKind::Decimal: + if (buf == "null") { + Value.EmptyOptional(); + } else { + Value.BeginOptional(); + Value.Decimal(TDecimalValue(TString(buf), type.GetDecimal().Precision, type.GetDecimal().Scale)); + Value.EndOptional(); + } + break; + default: + Y_FAIL(""); + } +} + +void TQueryBuilder::Begin() { + Value.BeginList(); +} + +void TQueryBuilder::AddLine(TStringBuf line) { + Value.AddListItem(); + Value.BeginStruct(); + for (const auto& col : Columns) { + TStringBuf tok = line.NextTok(','); + Y_ENSURE(tok, "Empty token on line"); + AddMemberFromString(col, tok); + } + Value.EndStruct(); +} + +TValue TQueryBuilder::EndAndGetResultingValue() { + Value.EndList(); + return Value.Build(); +} + +TParams TQueryBuilder::EndAndGetResultingParams() { + // TODO: Use that size in bandwidth limit calculation + // const auto& valueProto = TProtoAccessor::GetProto(buildedValue); + // Cout << "Size of valueProto# " << valueProto.ByteSizeLong() << Endl; + TParamsBuilder paramsBuilder; + paramsBuilder.AddParam("$items", EndAndGetResultingValue()); + return paramsBuilder.Build(); +} + +TString TQueryBuilder::GetQueryString() const { + return Query; +} + +//////////////////////////////////////////////////////////////////////////////// +// TQueryFromFileIterator +//////////////////////////////////////////////////////////////////////////////// + +void TQueryFromFileIterator::TryReadNextLines() { + if (!LinesBunch.Empty() || BytesRemaining == 0) { + return; + } + + const auto bytesToRead = Min<i64>(BufferMaxSize, BytesRemaining); + Y_ENSURE(bytesToRead > 0, "There is no more bytes to read!" << + " BufferMaxSize# " << BufferMaxSize << + " BytesRemaining# " << BytesRemaining << + " CurrentOffset# " << CurrentOffset << + " this->Empty()# " << this->Empty()); + IoBuff.resize(bytesToRead); + i64 bytesRead = DataFile.Pread(IoBuff.Detach(), bytesToRead, CurrentOffset); + IoBuff.resize(bytesRead); + size_t newSize = IoBuff.rfind("\n"); + Y_ENSURE(newSize != TString::npos, "Can't find new line symbol in buffer read from file," + " bytesRead# " << bytesRead); + // +1 for newline symbol + newSize += 1; + IoBuff.resize(newSize); + BytesRemaining -= newSize; + CurrentOffset += newSize; + LinesBunch = IoBuff; +} + +template<bool GetValue> +std::conditional_t<GetValue, TValue, TParams> TQueryFromFileIterator::ReadNext() { + TryReadNextLines(); + + TStringBuf line = LinesBunch.NextTok('\n'); + Query.Begin(); + i64 querySizeRows = 0; + i64 querySizeBytes = 0; + while (line || LinesBunch) { if (line.empty()) { - continue; - } - Query.AddLine(line); - ++querySizeRows; - querySizeBytes += AlignUp<i64>(line.Size(), METERING_ROW_PRECISION); - if (MaxRowsPerQuery > 0 && querySizeRows >= MaxRowsPerQuery - || MaxBytesPerQuery > 0 && querySizeBytes >= MaxBytesPerQuery) { - break; - } - line = LinesBunch.NextTok('\n'); - } - Y_ENSURE(querySizeRows > 0, "No new lines is read from file. Maybe buffer size is less then size of single row"); - if constexpr (GetValue) { - return Query.EndAndGetResultingValue(); - } else { - return Query.EndAndGetResultingParams(); - } -} - -template std::conditional_t<true, TValue, TParams> TQueryFromFileIterator::ReadNext<true>(); -template std::conditional_t<false, TValue, TParams> TQueryFromFileIterator::ReadNext<false>(); - -TString TQueryFromFileIterator::GetQueryString() const { - return Query.GetQueryString(); -} - -} // NYdb::NBackup + continue; + } + Query.AddLine(line); + ++querySizeRows; + querySizeBytes += AlignUp<i64>(line.Size(), METERING_ROW_PRECISION); + if (MaxRowsPerQuery > 0 && querySizeRows >= MaxRowsPerQuery + || MaxBytesPerQuery > 0 && querySizeBytes >= MaxBytesPerQuery) { + break; + } + line = LinesBunch.NextTok('\n'); + } + Y_ENSURE(querySizeRows > 0, "No new lines is read from file. Maybe buffer size is less then size of single row"); + if constexpr (GetValue) { + return Query.EndAndGetResultingValue(); + } else { + return Query.EndAndGetResultingParams(); + } +} + +template std::conditional_t<true, TValue, TParams> TQueryFromFileIterator::ReadNext<true>(); +template std::conditional_t<false, TValue, TParams> TQueryFromFileIterator::ReadNext<false>(); + +TString TQueryFromFileIterator::GetQueryString() const { + return Query.GetQueryString(); +} + +} // NYdb::NBackup diff --git a/ydb/library/backup/query_builder.h b/ydb/library/backup/query_builder.h index 0cf4f2d267..559f66ed99 100644 --- a/ydb/library/backup/query_builder.h +++ b/ydb/library/backup/query_builder.h @@ -1,87 +1,87 @@ -#pragma once - +#pragma once + #include <ydb/public/sdk/cpp/client/ydb_params/params.h> #include <ydb/public/sdk/cpp/client/ydb_result/result.h> #include <ydb/public/sdk/cpp/client/ydb_value/value.h> #include <ydb/public/sdk/cpp/client/ydb_table/table.h> - -#include <util/generic/string.h> -#include <util/generic/vector.h> -#include <util/stream/mem.h> -#include <util/system/file.h> - -namespace NYdb::NBackup { - -class TQueryBuilder { - TVector<TColumn> Columns; - const TString Query; - TValueBuilder Value; - - TString BuildQuery(const TString& path); - void AddMemberFromString(const TColumn& col, TStringBuf ss); - void AddPrimitiveMember(EPrimitiveType type, TStringBuf buf); - -public: - TQueryBuilder(const TString& path, TVector<TColumn> columns) - : Columns(std::move(columns)) - , Query(BuildQuery(path)) - {} - - void Begin(); - void AddLine(TStringBuf line); - TValue GetLinesAsValue(); - TValue EndAndGetResultingValue(); - TParams EndAndGetResultingParams(); - TString GetQueryString() const; -}; - - -class TQueryFromFileIterator { - TFile DataFile; - TQueryBuilder Query; - - const i64 BufferMaxSize; - TString IoBuff; - i64 CurrentOffset; - i64 BytesRemaining; - const i64 MaxRowsPerQuery; // 0 for inf - const i64 MaxBytesPerQuery; // 0 for inf - - TStringBuf LinesBunch; - - void TryReadNextLines(); - - template<bool GetValue> - std::conditional_t<GetValue, TValue, TParams> ReadNext(); - -public: - TQueryFromFileIterator(const TString& path, const TString& dataFileName, TVector<TColumn> columns, i64 buffSize, - i64 maxRowsPerQuery, i64 maxBytesPerQuery) - : DataFile(dataFileName, OpenExisting | RdOnly) - , Query(path, std::move(columns)) - , BufferMaxSize(buffSize) - , IoBuff(TString::Uninitialized(BufferMaxSize)) - , CurrentOffset(0) - , BytesRemaining(DataFile.GetLength()) - , MaxRowsPerQuery(maxRowsPerQuery) - // If MaxBytesPerQuery is not specified use 2MiB as default value. Since size of each row is rounded up - // to nearest multiple of 1024 this effectively limits number of rows in query in case of small rows - , MaxBytesPerQuery(maxBytesPerQuery ? maxBytesPerQuery : BufferMaxSize) - {} - - bool Empty() const { + +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/stream/mem.h> +#include <util/system/file.h> + +namespace NYdb::NBackup { + +class TQueryBuilder { + TVector<TColumn> Columns; + const TString Query; + TValueBuilder Value; + + TString BuildQuery(const TString& path); + void AddMemberFromString(const TColumn& col, TStringBuf ss); + void AddPrimitiveMember(EPrimitiveType type, TStringBuf buf); + +public: + TQueryBuilder(const TString& path, TVector<TColumn> columns) + : Columns(std::move(columns)) + , Query(BuildQuery(path)) + {} + + void Begin(); + void AddLine(TStringBuf line); + TValue GetLinesAsValue(); + TValue EndAndGetResultingValue(); + TParams EndAndGetResultingParams(); + TString GetQueryString() const; +}; + + +class TQueryFromFileIterator { + TFile DataFile; + TQueryBuilder Query; + + const i64 BufferMaxSize; + TString IoBuff; + i64 CurrentOffset; + i64 BytesRemaining; + const i64 MaxRowsPerQuery; // 0 for inf + const i64 MaxBytesPerQuery; // 0 for inf + + TStringBuf LinesBunch; + + void TryReadNextLines(); + + template<bool GetValue> + std::conditional_t<GetValue, TValue, TParams> ReadNext(); + +public: + TQueryFromFileIterator(const TString& path, const TString& dataFileName, TVector<TColumn> columns, i64 buffSize, + i64 maxRowsPerQuery, i64 maxBytesPerQuery) + : DataFile(dataFileName, OpenExisting | RdOnly) + , Query(path, std::move(columns)) + , BufferMaxSize(buffSize) + , IoBuff(TString::Uninitialized(BufferMaxSize)) + , CurrentOffset(0) + , BytesRemaining(DataFile.GetLength()) + , MaxRowsPerQuery(maxRowsPerQuery) + // If MaxBytesPerQuery is not specified use 2MiB as default value. Since size of each row is rounded up + // to nearest multiple of 1024 this effectively limits number of rows in query in case of small rows + , MaxBytesPerQuery(maxBytesPerQuery ? maxBytesPerQuery : BufferMaxSize) + {} + + bool Empty() const { return BytesRemaining == 0 && LinesBunch.empty(); - } - - TParams ReadNextGetParams() { - return ReadNext<false>(); - } - - TValue ReadNextGetValue() { - return ReadNext<true>(); - } - - TString GetQueryString() const; -}; - -} // NYdb::NBackup + } + + TParams ReadNextGetParams() { + return ReadNext<false>(); + } + + TValue ReadNextGetValue() { + return ReadNext<true>(); + } + + TString GetQueryString() const; +}; + +} // NYdb::NBackup diff --git a/ydb/library/backup/query_uploader.cpp b/ydb/library/backup/query_uploader.cpp index 0e98882745..c9689c3b2e 100644 --- a/ydb/library/backup/query_uploader.cpp +++ b/ydb/library/backup/query_uploader.cpp @@ -1,157 +1,157 @@ -#include <util/datetime/cputimer.h> - -#include "query_uploader.h" -#include "util.h" - -namespace NYdb::NBackup { - +#include <util/datetime/cputimer.h> + +#include "query_uploader.h" +#include "util.h" + +namespace NYdb::NBackup { + static const char DOC_API_REQUEST_TYPE[] = "_document_api_request"; -//////////////////////////////////////////////////////////////////////////////// -// TUploader -//////////////////////////////////////////////////////////////////////////////// - +//////////////////////////////////////////////////////////////////////////////// +// TUploader +//////////////////////////////////////////////////////////////////////////////// + ui32 TUploader::TOptions::GetRps() const { return Rate * TDuration::Seconds(1).MilliSeconds() / Interval.MilliSeconds(); } TUploader::TUploader(const TUploader::TOptions &opts, NYdb::NTable::TTableClient& client, const TString &query) - : Opts(opts) - , Query(query) - , ShouldStop(0) + : Opts(opts) + , Query(query) + , ShouldStop(0) , RequestLimiter(opts.GetRps(), opts.GetRps()) , Client(client) -{ +{ TasksQueue = MakeSimpleShared<TThreadPool>(TThreadPool::TParams().SetBlocking(true).SetCatching(true)); - TasksQueue->Start(opts.InFly, opts.InFly + 1); -} - -bool TUploader::Push(const TString& path, TValue&& value) { - if (IsStopped()) { - return false; - } - - auto task = [this, taskValue = std::move(value), &path, retrySleep = BulkUpsertRetryDuration] () mutable { - ui32 retry = 0; - while (true) { + TasksQueue->Start(opts.InFly, opts.InFly + 1); +} + +bool TUploader::Push(const TString& path, TValue&& value) { + if (IsStopped()) { + return false; + } + + auto task = [this, taskValue = std::move(value), &path, retrySleep = BulkUpsertRetryDuration] () mutable { + ui32 retry = 0; + while (true) { while (!RequestLimiter.IsAvail()) { Sleep(Min(TDuration::MicroSeconds(RequestLimiter.GetWaitTime()), Opts.ReactionTime)); - if (IsStopped()) { - return; - } - } - - if (IsStopped()) { - return; - } - + if (IsStopped()) { + return; + } + } + + if (IsStopped()) { + return; + } + RequestLimiter.Use(1); - auto upsert = [&] (NYdb::NTable::TSession) -> TStatus { - auto settings = NTable::TBulkUpsertSettings() + auto upsert = [&] (NYdb::NTable::TSession) -> TStatus { + auto settings = NTable::TBulkUpsertSettings() .RequestType(DOC_API_REQUEST_TYPE) - .OperationTimeout(TDuration::Seconds(30)) - .ClientTimeout(TDuration::Seconds(35)); - - // Make copy of taskValue to save initial data for case of error - return Client.BulkUpsert(path, TValue(taskValue), settings).GetValueSync(); - }; - auto settings = NYdb::NTable::TRetryOperationSettings() - .MaxRetries(Opts.RetryOperaionMaxRetries); - auto status = Client.RetryOperationSync(upsert, settings); - - if (status.IsSuccess()) { - if (status.GetIssues()) { - LOG_ERR("BulkUpsert has finished successfull, but has issues# {" - << status.GetIssues().ToString() << "}"); - } - return; - // Since upsert of data is an idempotent operation it is possible to retry transport errors - } else if (status.IsTransportError() && retry < Opts.TransportErrorsMaxRetries) { - LOG_DEBUG("Notice: transport error in BulkUpsert, issues# {" << status.GetIssues().ToString() << "}" - << " current Retry is " << retry - << " < MaxRetries# " << Opts.TransportErrorsMaxRetries - << ", so sleep for " << retrySleep.Seconds() << "s" - << " and try again"); - ++retry; - TInstant deadline = retrySleep.ToDeadLine(); - while (TInstant::Now() < deadline) { - if (IsStopped()) { - return; - } - Sleep(TDuration::Seconds(1)); - } - retrySleep *= 2; - continue; - } else { - LOG_ERR("Error in BulkUpsert, so stop working. Issues# {" << status.GetIssues().ToString() << "}" - << " IsTransportError# " << (status.IsTransportError() ? "true" : "false") - << " retries done# " << retry); - PleaseStop(); - return; - } - } - }; - - return TasksQueue->AddFunc(task); -} - -bool TUploader::Push(TParams params) { - if (IsStopped()) { - return false; - } - - auto upload = [this, params] (NYdb::NTable::TSession session) -> NYdb::TStatus { + .OperationTimeout(TDuration::Seconds(30)) + .ClientTimeout(TDuration::Seconds(35)); + + // Make copy of taskValue to save initial data for case of error + return Client.BulkUpsert(path, TValue(taskValue), settings).GetValueSync(); + }; + auto settings = NYdb::NTable::TRetryOperationSettings() + .MaxRetries(Opts.RetryOperaionMaxRetries); + auto status = Client.RetryOperationSync(upsert, settings); + + if (status.IsSuccess()) { + if (status.GetIssues()) { + LOG_ERR("BulkUpsert has finished successfull, but has issues# {" + << status.GetIssues().ToString() << "}"); + } + return; + // Since upsert of data is an idempotent operation it is possible to retry transport errors + } else if (status.IsTransportError() && retry < Opts.TransportErrorsMaxRetries) { + LOG_DEBUG("Notice: transport error in BulkUpsert, issues# {" << status.GetIssues().ToString() << "}" + << " current Retry is " << retry + << " < MaxRetries# " << Opts.TransportErrorsMaxRetries + << ", so sleep for " << retrySleep.Seconds() << "s" + << " and try again"); + ++retry; + TInstant deadline = retrySleep.ToDeadLine(); + while (TInstant::Now() < deadline) { + if (IsStopped()) { + return; + } + Sleep(TDuration::Seconds(1)); + } + retrySleep *= 2; + continue; + } else { + LOG_ERR("Error in BulkUpsert, so stop working. Issues# {" << status.GetIssues().ToString() << "}" + << " IsTransportError# " << (status.IsTransportError() ? "true" : "false") + << " retries done# " << retry); + PleaseStop(); + return; + } + } + }; + + return TasksQueue->AddFunc(task); +} + +bool TUploader::Push(TParams params) { + if (IsStopped()) { + return false; + } + + auto upload = [this, params] (NYdb::NTable::TSession session) -> NYdb::TStatus { auto prepareSettings = NTable::TPrepareDataQuerySettings() .RequestType(DOC_API_REQUEST_TYPE); auto prepareResult = session.PrepareDataQuery(Query, prepareSettings).GetValueSync(); - if (!prepareResult.IsSuccess()) { - return prepareResult; - } - - auto dataQuery = prepareResult.GetQuery(); - auto transaction = NYdb::NTable::TTxControl::BeginTx(NYdb::NTable::TTxSettings::SerializableRW()).CommitTx(); + if (!prepareResult.IsSuccess()) { + return prepareResult; + } + + auto dataQuery = prepareResult.GetQuery(); + auto transaction = NYdb::NTable::TTxControl::BeginTx(NYdb::NTable::TTxSettings::SerializableRW()).CommitTx(); auto settings = NTable::TExecDataQuerySettings() .RequestType(DOC_API_REQUEST_TYPE) .OperationTimeout(TDuration::Seconds(30)) .ClientTimeout(TDuration::Seconds(35)); return dataQuery.Execute(transaction, std::move(params), settings).GetValueSync(); - }; - - auto task = [this, upload] () { + }; + + auto task = [this, upload] () { while (!RequestLimiter.IsAvail()) { Sleep(Min(TDuration::MilliSeconds(RequestLimiter.GetWaitTime()), Opts.ReactionTime)); - if (IsStopped()) { - return; - } - } - - if (IsStopped()) { - return; - } - + if (IsStopped()) { + return; + } + } + + if (IsStopped()) { + return; + } + RequestLimiter.Use(1); - auto settings = NYdb::NTable::TRetryOperationSettings() - .MaxRetries(Opts.RetryOperaionMaxRetries) - .Idempotent(true); - - auto status = Client.RetryOperationSync(upload, settings); - - - if (status.IsSuccess()) { - if (status.GetIssues()) { - LOG_ERR("Upload tx has finished successfull, but has issues# {" - << status.GetIssues().ToString() << "}"); - } - } else { - LOG_ERR("Error in upload tx, issues# {" << status.GetIssues().ToString() << "}"); - PleaseStop(); - return; - } - }; - - return TasksQueue->AddFunc(task); -} - -} // NYdb::NBackup + auto settings = NYdb::NTable::TRetryOperationSettings() + .MaxRetries(Opts.RetryOperaionMaxRetries) + .Idempotent(true); + + auto status = Client.RetryOperationSync(upload, settings); + + + if (status.IsSuccess()) { + if (status.GetIssues()) { + LOG_ERR("Upload tx has finished successfull, but has issues# {" + << status.GetIssues().ToString() << "}"); + } + } else { + LOG_ERR("Error in upload tx, issues# {" << status.GetIssues().ToString() << "}"); + PleaseStop(); + return; + } + }; + + return TasksQueue->AddFunc(task); +} + +} // NYdb::NBackup diff --git a/ydb/library/backup/query_uploader.h b/ydb/library/backup/query_uploader.h index c98be50c21..b72a31cea2 100644 --- a/ydb/library/backup/query_uploader.h +++ b/ydb/library/backup/query_uploader.h @@ -1,58 +1,58 @@ -#pragma once - +#pragma once + #include <ydb/public/sdk/cpp/client/ydb_table/table.h> - + #include <library/cpp/bucket_quoter/bucket_quoter.h> - -#include <util/thread/pool.h> -#include <util/generic/string.h> -#include <util/generic/map.h> - -namespace NYdb::NBackup { - -class TUploader { -public: - struct TOptions { - ui32 Rate = 20; // requests per Interval - TDuration Interval = TDuration::Seconds(1); - ui32 InFly = 10; - ui32 RetryOperaionMaxRetries = 30; - ui32 TransportErrorsMaxRetries = 9; - TDuration ReactionTime = TDuration::MilliSeconds(50); + +#include <util/thread/pool.h> +#include <util/generic/string.h> +#include <util/generic/map.h> + +namespace NYdb::NBackup { + +class TUploader { +public: + struct TOptions { + ui32 Rate = 20; // requests per Interval + TDuration Interval = TDuration::Seconds(1); + ui32 InFly = 10; + ui32 RetryOperaionMaxRetries = 30; + ui32 TransportErrorsMaxRetries = 9; + TDuration ReactionTime = TDuration::MilliSeconds(50); ui32 GetRps() const; - }; - -private: - const TOptions Opts; - const TString Query; - - TAtomic ShouldStop; - TSimpleSharedPtr<IThreadPool> TasksQueue; - // Total wait is 1 * (2 ** TransportErrorsMaxRetries - 1), for TransportErrorsMaxRetries == 9 it gives ~8.5 minutes - TDuration BulkUpsertRetryDuration = TDuration::Seconds(1); - + }; + +private: + const TOptions Opts; + const TString Query; + + TAtomic ShouldStop; + TSimpleSharedPtr<IThreadPool> TasksQueue; + // Total wait is 1 * (2 ** TransportErrorsMaxRetries - 1), for TransportErrorsMaxRetries == 9 it gives ~8.5 minutes + TDuration BulkUpsertRetryDuration = TDuration::Seconds(1); + using TRpsLimiter = TBucketQuoter<ui64>; - TRpsLimiter RequestLimiter; + TRpsLimiter RequestLimiter; NYdb::NTable::TTableClient& Client; - -public: + +public: TUploader(const TOptions& opts, NYdb::NTable::TTableClient& client, const TString& query); - - bool Push(TParams params); - bool Push(const TString& path, TValue&& value); - - void WaitAllJobs() { - TasksQueue->Stop(); - } - - void PleaseStop() { - AtomicSet(ShouldStop, 1); - } - - bool IsStopped() const { - return AtomicGet(ShouldStop) == 1; - } -}; - -} // NYdb::Backup + + bool Push(TParams params); + bool Push(const TString& path, TValue&& value); + + void WaitAllJobs() { + TasksQueue->Stop(); + } + + void PleaseStop() { + AtomicSet(ShouldStop, 1); + } + + bool IsStopped() const { + return AtomicGet(ShouldStop) == 1; + } +}; + +} // NYdb::Backup diff --git a/ydb/library/backup/ut/ut.cpp b/ydb/library/backup/ut/ut.cpp index 7f4c23f741..5967e9dfa3 100644 --- a/ydb/library/backup/ut/ut.cpp +++ b/ydb/library/backup/ut/ut.cpp @@ -1,490 +1,490 @@ #include <ydb/library/backup/backup.h> #include <ydb/library/backup/query_builder.h> #include <ydb/library/backup/util.h> - + #include <ydb/public/api/protos/ydb_table.pb.h> #include <ydb/public/sdk/cpp/client/ydb_table/table.h> - + #include <library/cpp/testing/unittest/registar.h> - -#include <util/folder/tempdir.h> -#include <util/generic/strbuf.h> + +#include <util/folder/tempdir.h> +#include <util/generic/strbuf.h> #include <library/cpp/string_utils/quote/quote.h> - -namespace NYdb { - -Y_UNIT_TEST_SUITE(BackupToolValuePrintParse) { - -void TestResultSetParsedOk(const TString& protoStr, const TString& expect) { + +namespace NYdb { + +Y_UNIT_TEST_SUITE(BackupToolValuePrintParse) { + +void TestResultSetParsedOk(const TString& protoStr, const TString& expect) { Ydb::ResultSet proto; - google::protobuf::TextFormat::ParseFromString(protoStr, &proto); - + google::protobuf::TextFormat::ParseFromString(protoStr, &proto); + TResultSet result(proto); - - TStringStream got; - got.Reserve(1 << 10); - NBackup::ProcessResultSet(got, result); - UNIT_ASSERT(got.Size()); - UNIT_ASSERT_NO_DIFF(got.Str(), expect); -} - -Y_UNIT_TEST(ParseValuesFromString) { - constexpr ui32 ColSize = 3; - constexpr ui32 RowSize = 2; - const EPrimitiveType colType[ColSize] = {EPrimitiveType::Uint32, EPrimitiveType::String, EPrimitiveType::Int64}; - - auto tableDesc = NTable::TTableBuilder() - .AddNullableColumn("ColUint", colType[0]) - .AddNullableColumn("ColStr", colType[1]) - .AddNullableColumn("ColInt", colType[2]) - .Build(); - - NBackup::TQueryBuilder qb("path/to/table", tableDesc.GetColumns()); - qb.Begin(); - qb.AddLine("123,\"qwe\",-6454"); - qb.AddLine("984213,\"bwijertqw\",512993"); - TParams params = qb.EndAndGetResultingParams(); - - auto value = params.GetValue("$items"); - UNIT_ASSERT(value); - - TValueParser parser(*value); - UNIT_ASSERT(parser.GetKind() == TTypeParser::ETypeKind::List); - - parser.OpenList(); - for (ui32 row = 0; row < RowSize; ++row) { - const bool nextItemOk = parser.TryNextListItem(); - UNIT_ASSERT(nextItemOk); - UNIT_ASSERT(parser.GetKind() == TTypeParser::ETypeKind::Struct); - parser.OpenStruct(); - for (ui32 col = 0; col < ColSize; ++col) { - const bool nextMemberOk = parser.TryNextMember(); - UNIT_ASSERT(nextMemberOk); - UNIT_ASSERT(parser.GetKind() == TTypeParser::ETypeKind::Optional); - parser.OpenOptional(); - UNIT_ASSERT(parser.GetPrimitiveType() == colType[col]); - parser.CloseOptional(); - } - parser.CloseStruct(); - } - parser.CloseList(); -} - -Y_UNIT_TEST(ParseValuesFromFile) { - constexpr ui32 ColSize = 3; - constexpr ui32 RowSize = 2000; - const EPrimitiveType colType[ColSize] = {EPrimitiveType::Uint32, EPrimitiveType::String, EPrimitiveType::Int64}; - - TTempDir tempDir; - const TString dataFileName = tempDir.Name() + Sprintf("data_%02d.csv", 0); - { - TFile dataFile(dataFileName, CreateAlways | WrOnly); - TStringStream ss; - for (ui32 row = 0; row < RowSize; ++row) { - if (row % 2 == 0) { - TString col2str = TStringBuilder() << "TestString" << 2 * row << "with number"; - CGIEscape(col2str); - ss << "null" << ",\"" << col2str << "\"," << row*row << Endl; - } else { - ss << row << ",null," << row*row << Endl; - } - } - TString str = ss.Str(); - dataFile.Write(str.Detach(), str.Size()); - } - - auto tableDesc = NTable::TTableBuilder() - .AddNullableColumn("ColUint", colType[0]) - .AddNullableColumn("ColStr", colType[1]) - .AddNullableColumn("ColInt", colType[2]) - .Build(); - - NBackup::TQueryFromFileIterator it("table_path", dataFileName, tableDesc.GetColumns(), 4096, 0, 0); - - ui32 rowsRead = 0; - while (!it.Empty()) { - auto params = it.ReadNextGetParams(); - - auto value = params.GetValue("$items"); - UNIT_ASSERT(value); - TValueParser parser(*value); - - UNIT_ASSERT(parser.GetKind() == TTypeParser::ETypeKind::List); - parser.OpenList(); - while (parser.TryNextListItem()) { - UNIT_ASSERT(parser.GetKind() == TTypeParser::ETypeKind::Struct); - parser.OpenStruct(); - for (ui32 col = 0; col < ColSize; ++col) { - const bool nextMemberOk = parser.TryNextMember(); - UNIT_ASSERT(nextMemberOk); - UNIT_ASSERT(parser.GetKind() == TTypeParser::ETypeKind::Optional); - parser.OpenOptional(); - UNIT_ASSERT(parser.GetPrimitiveType() == colType[col]); - switch (col) { - case 0: { - UNIT_ASSERT(parser.GetPrimitiveType() == EPrimitiveType::Uint32); - parser.CloseOptional(); - const TMaybe<ui32> val = parser.GetOptionalUint32(); - if (rowsRead % 2 == 0) { - UNIT_ASSERT(!val); - } else { - UNIT_ASSERT(val); - UNIT_ASSERT(*val == rowsRead); - } - break; - } - case 1: { - UNIT_ASSERT(parser.GetPrimitiveType() == EPrimitiveType::String); - parser.CloseOptional(); - TString col2str = TStringBuilder() << "TestString" << 2 * rowsRead << "with number"; - const TMaybe<TString> val = parser.GetOptionalString(); - if (rowsRead % 2 == 0) { - UNIT_ASSERT(val); - UNIT_ASSERT_STRINGS_EQUAL(*val, col2str); - } else { - UNIT_ASSERT(!val); - } - break; - } - case 2: { - UNIT_ASSERT(parser.GetPrimitiveType() == EPrimitiveType::Int64); - parser.CloseOptional(); - const TMaybe<i64> val = parser.GetOptionalInt64(); - UNIT_ASSERT(val); - UNIT_ASSERT(*val == rowsRead*rowsRead); - break; - } - default: - UNIT_FAIL("Unexpected columt number"); - } - } - const bool nextMemberOk = parser.TryNextMember(); - UNIT_ASSERT(!nextMemberOk); - parser.CloseStruct(); - ++rowsRead; - } - parser.CloseList(); - } - UNIT_ASSERT(rowsRead == RowSize); -} - -Y_UNIT_TEST(ResultSetBoolPrintTest) { - TString resultSetStr = R"_( + + TStringStream got; + got.Reserve(1 << 10); + NBackup::ProcessResultSet(got, result); + UNIT_ASSERT(got.Size()); + UNIT_ASSERT_NO_DIFF(got.Str(), expect); +} + +Y_UNIT_TEST(ParseValuesFromString) { + constexpr ui32 ColSize = 3; + constexpr ui32 RowSize = 2; + const EPrimitiveType colType[ColSize] = {EPrimitiveType::Uint32, EPrimitiveType::String, EPrimitiveType::Int64}; + + auto tableDesc = NTable::TTableBuilder() + .AddNullableColumn("ColUint", colType[0]) + .AddNullableColumn("ColStr", colType[1]) + .AddNullableColumn("ColInt", colType[2]) + .Build(); + + NBackup::TQueryBuilder qb("path/to/table", tableDesc.GetColumns()); + qb.Begin(); + qb.AddLine("123,\"qwe\",-6454"); + qb.AddLine("984213,\"bwijertqw\",512993"); + TParams params = qb.EndAndGetResultingParams(); + + auto value = params.GetValue("$items"); + UNIT_ASSERT(value); + + TValueParser parser(*value); + UNIT_ASSERT(parser.GetKind() == TTypeParser::ETypeKind::List); + + parser.OpenList(); + for (ui32 row = 0; row < RowSize; ++row) { + const bool nextItemOk = parser.TryNextListItem(); + UNIT_ASSERT(nextItemOk); + UNIT_ASSERT(parser.GetKind() == TTypeParser::ETypeKind::Struct); + parser.OpenStruct(); + for (ui32 col = 0; col < ColSize; ++col) { + const bool nextMemberOk = parser.TryNextMember(); + UNIT_ASSERT(nextMemberOk); + UNIT_ASSERT(parser.GetKind() == TTypeParser::ETypeKind::Optional); + parser.OpenOptional(); + UNIT_ASSERT(parser.GetPrimitiveType() == colType[col]); + parser.CloseOptional(); + } + parser.CloseStruct(); + } + parser.CloseList(); +} + +Y_UNIT_TEST(ParseValuesFromFile) { + constexpr ui32 ColSize = 3; + constexpr ui32 RowSize = 2000; + const EPrimitiveType colType[ColSize] = {EPrimitiveType::Uint32, EPrimitiveType::String, EPrimitiveType::Int64}; + + TTempDir tempDir; + const TString dataFileName = tempDir.Name() + Sprintf("data_%02d.csv", 0); + { + TFile dataFile(dataFileName, CreateAlways | WrOnly); + TStringStream ss; + for (ui32 row = 0; row < RowSize; ++row) { + if (row % 2 == 0) { + TString col2str = TStringBuilder() << "TestString" << 2 * row << "with number"; + CGIEscape(col2str); + ss << "null" << ",\"" << col2str << "\"," << row*row << Endl; + } else { + ss << row << ",null," << row*row << Endl; + } + } + TString str = ss.Str(); + dataFile.Write(str.Detach(), str.Size()); + } + + auto tableDesc = NTable::TTableBuilder() + .AddNullableColumn("ColUint", colType[0]) + .AddNullableColumn("ColStr", colType[1]) + .AddNullableColumn("ColInt", colType[2]) + .Build(); + + NBackup::TQueryFromFileIterator it("table_path", dataFileName, tableDesc.GetColumns(), 4096, 0, 0); + + ui32 rowsRead = 0; + while (!it.Empty()) { + auto params = it.ReadNextGetParams(); + + auto value = params.GetValue("$items"); + UNIT_ASSERT(value); + TValueParser parser(*value); + + UNIT_ASSERT(parser.GetKind() == TTypeParser::ETypeKind::List); + parser.OpenList(); + while (parser.TryNextListItem()) { + UNIT_ASSERT(parser.GetKind() == TTypeParser::ETypeKind::Struct); + parser.OpenStruct(); + for (ui32 col = 0; col < ColSize; ++col) { + const bool nextMemberOk = parser.TryNextMember(); + UNIT_ASSERT(nextMemberOk); + UNIT_ASSERT(parser.GetKind() == TTypeParser::ETypeKind::Optional); + parser.OpenOptional(); + UNIT_ASSERT(parser.GetPrimitiveType() == colType[col]); + switch (col) { + case 0: { + UNIT_ASSERT(parser.GetPrimitiveType() == EPrimitiveType::Uint32); + parser.CloseOptional(); + const TMaybe<ui32> val = parser.GetOptionalUint32(); + if (rowsRead % 2 == 0) { + UNIT_ASSERT(!val); + } else { + UNIT_ASSERT(val); + UNIT_ASSERT(*val == rowsRead); + } + break; + } + case 1: { + UNIT_ASSERT(parser.GetPrimitiveType() == EPrimitiveType::String); + parser.CloseOptional(); + TString col2str = TStringBuilder() << "TestString" << 2 * rowsRead << "with number"; + const TMaybe<TString> val = parser.GetOptionalString(); + if (rowsRead % 2 == 0) { + UNIT_ASSERT(val); + UNIT_ASSERT_STRINGS_EQUAL(*val, col2str); + } else { + UNIT_ASSERT(!val); + } + break; + } + case 2: { + UNIT_ASSERT(parser.GetPrimitiveType() == EPrimitiveType::Int64); + parser.CloseOptional(); + const TMaybe<i64> val = parser.GetOptionalInt64(); + UNIT_ASSERT(val); + UNIT_ASSERT(*val == rowsRead*rowsRead); + break; + } + default: + UNIT_FAIL("Unexpected columt number"); + } + } + const bool nextMemberOk = parser.TryNextMember(); + UNIT_ASSERT(!nextMemberOk); + parser.CloseStruct(); + ++rowsRead; + } + parser.CloseList(); + } + UNIT_ASSERT(rowsRead == RowSize); +} + +Y_UNIT_TEST(ResultSetBoolPrintTest) { + TString resultSetStr = R"_( + columns:{ + name: "Col1" + type: { type_id: BOOL } + } + rows:{ items:{ bool_value: true } } + rows:{ items:{ bool_value: false } } + truncated: false + )_"; + + const TString expect = TStringBuilder() + << "1" << Endl + << "0" << Endl; + TestResultSetParsedOk(resultSetStr, expect); +} + +Y_UNIT_TEST(ResultSetInt8PrintTest) { + TString resultSetStr = R"_( + columns:{ + name: "Col1" + type: { type_id: INT8 } + } + columns:{ + name: "Col2" + type: { type_id: UINT8 } + } + rows:{ + items:{ int32_value: 5 } + items:{ uint32_value: 230 } + } + rows:{ + items:{ int32_value: -66 } + items:{ uint32_value: 152 } + } + truncated: false + )_"; + + const TString expect = TStringBuilder() + << "5,230" << Endl + << "-66,152" << Endl; + TestResultSetParsedOk(resultSetStr, expect); +} + +Y_UNIT_TEST(ResultSetInt16PrintTest) { + TString resultSetStr = R"_( columns:{ - name: "Col1" - type: { type_id: BOOL } - } - rows:{ items:{ bool_value: true } } - rows:{ items:{ bool_value: false } } - truncated: false - )_"; - - const TString expect = TStringBuilder() - << "1" << Endl - << "0" << Endl; - TestResultSetParsedOk(resultSetStr, expect); -} - -Y_UNIT_TEST(ResultSetInt8PrintTest) { - TString resultSetStr = R"_( + name: "Col1" + type: { type_id: INT16 } + } columns:{ - name: "Col1" - type: { type_id: INT8 } - } + name: "Col2" + type: { type_id: UINT16 } + } + rows:{ + items:{ int32_value: 6141 } + items:{ uint32_value: 60192 } + } + + rows:{ + items:{ int32_value: -6491 } + items:{ uint32_value: 10612 } + } + + truncated: false + )_"; + + const TString expect = TStringBuilder() + << "6141,60192" << Endl + << "-6491,10612" << Endl; + TestResultSetParsedOk(resultSetStr, expect); +} + +Y_UNIT_TEST(ResultSetInt32PrintTest) { + TString resultSetStr = R"_( columns:{ - name: "Col2" - type: { type_id: UINT8 } - } - rows:{ - items:{ int32_value: 5 } - items:{ uint32_value: 230 } - } - rows:{ - items:{ int32_value: -66 } - items:{ uint32_value: 152 } - } - truncated: false - )_"; - - const TString expect = TStringBuilder() - << "5,230" << Endl - << "-66,152" << Endl; - TestResultSetParsedOk(resultSetStr, expect); -} - -Y_UNIT_TEST(ResultSetInt16PrintTest) { - TString resultSetStr = R"_( + name: "Col6" + type: { type_id: INT32 } + } columns:{ - name: "Col1" - type: { type_id: INT16 } - } + name: "Col7" + type: { type_id: UINT32 } + } + rows:{ + items:{ int32_value: 15120 } + items:{ uint32_value: 5219612 } + } + rows:{ + items:{ int32_value: -5052 } + items:{ uint32_value: 14245121 } + } + truncated: false + )_"; + + const TString expect = TStringBuilder() + << "15120,5219612" << Endl + << "-5052,14245121" << Endl; + TestResultSetParsedOk(resultSetStr, expect); +} + +Y_UNIT_TEST(ResultSetInt64PrintTest) { + TString resultSetStr = R"_( + columns:{ + name: "Col8" + type: { type_id: INT64 } + } + columns:{ + name: "Col9" + type: { type_id: UINT64 } + } + rows:{ + items:{ int64_value: 612421 } + items:{ uint64_value: 6512460 } + } + rows:{ + items:{ int64_value: -6124211241 } + items:{ uint64_value: 961223124 } + } + truncated: false + )_"; + + const TString expect = TStringBuilder() + << "612421,6512460" << Endl + << "-6124211241,961223124" << Endl; + TestResultSetParsedOk(resultSetStr, expect); +} + +Y_UNIT_TEST(ResultSetFloatPrintTest) { + TString resultSetStr = R"_( columns:{ - name: "Col2" - type: { type_id: UINT16 } - } - rows:{ - items:{ int32_value: 6141 } - items:{ uint32_value: 60192 } - } - - rows:{ - items:{ int32_value: -6491 } - items:{ uint32_value: 10612 } - } - - truncated: false - )_"; - - const TString expect = TStringBuilder() - << "6141,60192" << Endl - << "-6491,10612" << Endl; - TestResultSetParsedOk(resultSetStr, expect); -} - -Y_UNIT_TEST(ResultSetInt32PrintTest) { - TString resultSetStr = R"_( + name: "Col1" + type: { type_id: FLOAT } + } columns:{ - name: "Col6" - type: { type_id: INT32 } - } + name: "Col2" + type: { type_id: DOUBLE } + } + rows:{ + items:{ float_value: .23 } + items:{ double_value: 5125.123155 } + } + rows:{ + items:{ float_value: -6531.124 } + items:{ double_value: -5012.23123155 } + } + rows:{ + items:{ float_value: -inf } + items:{ double_value: inf } + } + rows:{ + items:{ float_value: nan } + items:{ double_value: -nan } + } + truncated: false + )_"; + + const TString expect = TStringBuilder() + << "0.23,5125.123155" << Endl + << "-6531.12,-5012.231232" << Endl + << "-inf,inf" << Endl + << "nan,nan" << Endl; + TestResultSetParsedOk(resultSetStr, expect); +} + + +Y_UNIT_TEST(ResultSetIntarvalsPrintTest) { + TString resultSetStr = R"_( columns:{ - name: "Col7" - type: { type_id: UINT32 } - } - rows:{ - items:{ int32_value: 15120 } - items:{ uint32_value: 5219612 } - } - rows:{ - items:{ int32_value: -5052 } - items:{ uint32_value: 14245121 } - } - truncated: false - )_"; - - const TString expect = TStringBuilder() - << "15120,5219612" << Endl - << "-5052,14245121" << Endl; - TestResultSetParsedOk(resultSetStr, expect); -} - -Y_UNIT_TEST(ResultSetInt64PrintTest) { - TString resultSetStr = R"_( + name: "Col1" + type: { type_id: DATE } + } columns:{ - name: "Col8" - type: { type_id: INT64 } - } + name: "Col2" + type: { type_id: DATETIME } + } columns:{ - name: "Col9" - type: { type_id: UINT64 } - } - rows:{ - items:{ int64_value: 612421 } - items:{ uint64_value: 6512460 } - } - rows:{ - items:{ int64_value: -6124211241 } - items:{ uint64_value: 961223124 } - } - truncated: false - )_"; - - const TString expect = TStringBuilder() - << "612421,6512460" << Endl - << "-6124211241,961223124" << Endl; - TestResultSetParsedOk(resultSetStr, expect); -} - -Y_UNIT_TEST(ResultSetFloatPrintTest) { - TString resultSetStr = R"_( + name: "Col3" + type: { type_id: TIMESTAMP } + } + rows:{ + items:{ uint32_value: 17966 } + items:{ uint32_value: 1552321844 } + items:{ uint64_value: 1552321844314382 } + } + rows:{ + items:{ uint32_value: 8905 } + items:{ uint32_value: 769417971 } + items:{ uint64_value: 769417971968123 } + } + truncated: false + )_"; + + const TString expect = TStringBuilder() + << "2019-03-11T00:00:00.000000Z,2019-03-11T16:30:44.000000Z,2019-03-11T16:30:44.314382Z" << Endl + << "1994-05-20T00:00:00.000000Z,1994-05-20T07:12:51.000000Z,1994-05-20T07:12:51.968123Z" << Endl; + TestResultSetParsedOk(resultSetStr, expect); +} + +Y_UNIT_TEST(ResultSetStringPrintTest) { + TString resultSetStr = R"_( columns:{ - name: "Col1" - type: { type_id: FLOAT } - } + name: "Col1" + type: { type_id: STRING } + } columns:{ - name: "Col2" - type: { type_id: DOUBLE } - } - rows:{ - items:{ float_value: .23 } - items:{ double_value: 5125.123155 } - } - rows:{ - items:{ float_value: -6531.124 } - items:{ double_value: -5012.23123155 } - } - rows:{ - items:{ float_value: -inf } - items:{ double_value: inf } - } - rows:{ - items:{ float_value: nan } - items:{ double_value: -nan } - } - truncated: false - )_"; - - const TString expect = TStringBuilder() - << "0.23,5125.123155" << Endl - << "-6531.12,-5012.231232" << Endl - << "-inf,inf" << Endl - << "nan,nan" << Endl; - TestResultSetParsedOk(resultSetStr, expect); -} - - -Y_UNIT_TEST(ResultSetIntarvalsPrintTest) { - TString resultSetStr = R"_( + name: "Col2" + type: { type_id: STRING } + } columns:{ - name: "Col1" - type: { type_id: DATE } - } - columns:{ - name: "Col2" - type: { type_id: DATETIME } - } - columns:{ - name: "Col3" - type: { type_id: TIMESTAMP } - } - rows:{ - items:{ uint32_value: 17966 } - items:{ uint32_value: 1552321844 } - items:{ uint64_value: 1552321844314382 } - } - rows:{ - items:{ uint32_value: 8905 } - items:{ uint32_value: 769417971 } - items:{ uint64_value: 769417971968123 } - } - truncated: false - )_"; - - const TString expect = TStringBuilder() - << "2019-03-11T00:00:00.000000Z,2019-03-11T16:30:44.000000Z,2019-03-11T16:30:44.314382Z" << Endl - << "1994-05-20T00:00:00.000000Z,1994-05-20T07:12:51.000000Z,1994-05-20T07:12:51.968123Z" << Endl; - TestResultSetParsedOk(resultSetStr, expect); -} - -Y_UNIT_TEST(ResultSetStringPrintTest) { - TString resultSetStr = R"_( - columns:{ - name: "Col1" - type: { type_id: STRING } - } + name: "Col3" + type: { type_id: STRING } + } + rows:{ + items:{ bytes_value: "simplestring" } + items:{ bytes_value: "Space_And_Underscore Containing String" } + items:{ bytes_value: "String\"with\"quote\"marks" } + } + rows:{ + items:{ bytes_value: "~Allowed.symbols_string;!*@$^/" } + items:{ bytes_value: "NotAllowed\":\n#%&(),\\|" } + items:{ bytes_value: "String,with,commas.and.dots" } + } + truncated: false + )_"; + + const TString expect = TStringBuilder() + << "\"simplestring\"," + "\"Space_And_Underscore+Containing+String\"," + "\"String%22with%22quote%22marks\"" << Endl + << "\"~Allowed.symbols_string;!*@$^/\"," + "\"NotAllowed%22%3A%0A%23%25%26%28%29%2C%5C%7C\"," + "\"String%2Cwith%2Ccommas.and.dots\"" << Endl; + TestResultSetParsedOk(resultSetStr, expect); +} + +Y_UNIT_TEST(ResultSetUtf8PrintTest) { + TString resultSetStr = R"_( columns:{ - name: "Col2" - type: { type_id: STRING } - } + name: "Col1" + type: { type_id: UTF8 } + } columns:{ - name: "Col3" - type: { type_id: STRING } - } - rows:{ - items:{ bytes_value: "simplestring" } - items:{ bytes_value: "Space_And_Underscore Containing String" } - items:{ bytes_value: "String\"with\"quote\"marks" } - } - rows:{ - items:{ bytes_value: "~Allowed.symbols_string;!*@$^/" } - items:{ bytes_value: "NotAllowed\":\n#%&(),\\|" } - items:{ bytes_value: "String,with,commas.and.dots" } - } - truncated: false - )_"; - - const TString expect = TStringBuilder() - << "\"simplestring\"," - "\"Space_And_Underscore+Containing+String\"," - "\"String%22with%22quote%22marks\"" << Endl - << "\"~Allowed.symbols_string;!*@$^/\"," - "\"NotAllowed%22%3A%0A%23%25%26%28%29%2C%5C%7C\"," - "\"String%2Cwith%2Ccommas.and.dots\"" << Endl; - TestResultSetParsedOk(resultSetStr, expect); -} - -Y_UNIT_TEST(ResultSetUtf8PrintTest) { - TString resultSetStr = R"_( + name: "Col12" + type: { type_id: UTF8 } + } + rows:{ + items:{ text_value: "Текст на русском" } + items:{ text_value: "Русские.,/mixed?with_ASCII" } + } + rows:{ + items:{ text_value: "Just-utf–8—text" } + items:{ text_value: "Another┼text" } + } + truncated: false + )_"; + + const TString expect = TStringBuilder() + << "\"%D0%A2%D0%B5%D0%BA%D1%81%D1%82+%D0%BD%D0%B0+%D1%80%D1%83%D1%81%D1%81%D0%BA%D0%BE%D0%BC\"," + "\"%D0%A0%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B5.%2C/mixed%3Fwith_ASCII\"" << Endl + << "\"Just-utf%E2%80%938%E2%80%94text\",\"Another%E2%94%BCtext\"" << Endl; + TestResultSetParsedOk(resultSetStr, expect); +} + +Y_UNIT_TEST(ResultSetVoidPrintTest) { + TString resultSetStr = R"_( columns:{ - name: "Col1" - type: { type_id: UTF8 } - } + name: "Col1" + type:{ optional_type:{ item:{ type_id: INT32 } } } + } columns:{ - name: "Col12" - type: { type_id: UTF8 } - } - rows:{ - items:{ text_value: "Текст на русском" } - items:{ text_value: "Русские.,/mixed?with_ASCII" } - } - rows:{ - items:{ text_value: "Just-utf–8—text" } - items:{ text_value: "Another┼text" } - } - truncated: false - )_"; - - const TString expect = TStringBuilder() - << "\"%D0%A2%D0%B5%D0%BA%D1%81%D1%82+%D0%BD%D0%B0+%D1%80%D1%83%D1%81%D1%81%D0%BA%D0%BE%D0%BC\"," - "\"%D0%A0%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B5.%2C/mixed%3Fwith_ASCII\"" << Endl - << "\"Just-utf%E2%80%938%E2%80%94text\",\"Another%E2%94%BCtext\"" << Endl; - TestResultSetParsedOk(resultSetStr, expect); -} - -Y_UNIT_TEST(ResultSetVoidPrintTest) { - TString resultSetStr = R"_( + name: "Col12" + type:{ optional_type:{ item:{ type_id: STRING } } } + } + rows:{ + items:{ null_flag_value: NULL_VALUE } + items:{ nested_value:{ bytes_value: "Not_null_string" } } + } + rows:{ + items:{ nested_value:{ int32_value: -752192 } } + items:{ null_flag_value: NULL_VALUE } + } + truncated: false + )_"; + + const TString expect = TStringBuilder() + << "null,\"Not_null_string\"" << Endl + << "-752192,null" << Endl; + TestResultSetParsedOk(resultSetStr, expect); +} + +Y_UNIT_TEST(ResultSetDecimalPrintTest) { + TString resultSetStr = R"_( columns:{ - name: "Col1" - type:{ optional_type:{ item:{ type_id: INT32 } } } - } + name: "Col1" + type:{ optional_type:{ item:{ type_id: INT32 } } } + } columns:{ - name: "Col12" - type:{ optional_type:{ item:{ type_id: STRING } } } - } - rows:{ - items:{ null_flag_value: NULL_VALUE } - items:{ nested_value:{ bytes_value: "Not_null_string" } } - } - rows:{ - items:{ nested_value:{ int32_value: -752192 } } - items:{ null_flag_value: NULL_VALUE } - } - truncated: false - )_"; - - const TString expect = TStringBuilder() - << "null,\"Not_null_string\"" << Endl - << "-752192,null" << Endl; - TestResultSetParsedOk(resultSetStr, expect); -} - -Y_UNIT_TEST(ResultSetDecimalPrintTest) { - TString resultSetStr = R"_( - columns:{ - name: "Col1" - type:{ optional_type:{ item:{ type_id: INT32 } } } - } - columns:{ - name: "Col12" - type:{ decimal_type: { precision: 23 scale: 8 } } - } - rows:{ - items:{ nested_value:{ int32_value: 1 } } - items:{ high_128: 123 low_128: 456 } - } - rows:{ - items:{ nested_value:{ int32_value: 2 } } - items:{ high_128: 93 low_128: 72 } - } - truncated: false - )_"; - - const TString expect = TStringBuilder() - << "1,22689495210662.74849224" << Endl - << "2,17155471988549.8830036" << Endl; - TestResultSetParsedOk(resultSetStr, expect); -} - + name: "Col12" + type:{ decimal_type: { precision: 23 scale: 8 } } + } + rows:{ + items:{ nested_value:{ int32_value: 1 } } + items:{ high_128: 123 low_128: 456 } + } + rows:{ + items:{ nested_value:{ int32_value: 2 } } + items:{ high_128: 93 low_128: 72 } + } + truncated: false + )_"; + + const TString expect = TStringBuilder() + << "1,22689495210662.74849224" << Endl + << "2,17155471988549.8830036" << Endl; + TestResultSetParsedOk(resultSetStr, expect); +} + Y_UNIT_TEST(ResultSetDyNumberPrintTest) { TString resultSetStr = R"_( columns:{ @@ -516,8 +516,8 @@ Y_UNIT_TEST(ResultSetDyNumberPrintTest) { << "-1E-130" << Endl << "-9.9999999999999999999999999999999999999E+125" << Endl; TestResultSetParsedOk(resultSetStr, expect); -} - +} + Y_UNIT_TEST(ResultSetJsonDocumentPrintTest) { TString resultSetStr = R"_( columns:{ @@ -545,64 +545,64 @@ Y_UNIT_TEST(ResultSetJsonDocumentPrintTest) { } -Y_UNIT_TEST_SUITE(UtilTest) { - -Y_UNIT_TEST(SizeFromStringParsing) { - UNIT_ASSERT_EQUAL(SizeFromString("1"), 1); - UNIT_ASSERT_EQUAL(SizeFromString("582"), 582); - UNIT_ASSERT_EQUAL(SizeFromString("852421"), 852421); -} - -Y_UNIT_TEST(SizeFromStringParsingWithDecimalPrefix) { - UNIT_ASSERT_EQUAL(SizeFromString("1K"), 1000); - UNIT_ASSERT_EQUAL(SizeFromString("9238M"), ui64{9238}*1000*1000); - UNIT_ASSERT_EQUAL(SizeFromString("12315G"), ui64{12315}*1000*1000*1000); - UNIT_ASSERT_EQUAL(SizeFromString("642T"), ui64{642}*1000*1000*1000*1000); -} - -Y_UNIT_TEST(SizeFromStringParsingWithBinaryPrefix) { - UNIT_ASSERT_EQUAL(SizeFromString("1Ki"), 1024); - UNIT_ASSERT_EQUAL(SizeFromString("692Mi"), ui64{692}*1024*1024); - UNIT_ASSERT_EQUAL(SizeFromString("42851Gi"), ui64{42851}*1024*1024*1024); - UNIT_ASSERT_EQUAL(SizeFromString("8321Ti"), ui64{8321}*1024*1024*1024*1024); -} - -Y_UNIT_TEST(SizeFromStringParsingErrors) { - UNIT_CHECK_GENERATED_EXCEPTION(SizeFromString(""), yexception); - UNIT_CHECK_GENERATED_EXCEPTION(SizeFromString("123i321"), yexception); - UNIT_CHECK_GENERATED_EXCEPTION(SizeFromString("12KK"), yexception); - UNIT_CHECK_GENERATED_EXCEPTION(SizeFromString("12k"), yexception); - UNIT_CHECK_GENERATED_EXCEPTION(SizeFromString("12UI"), yexception); - UNIT_CHECK_GENERATED_EXCEPTION(SizeFromString("NR"), yexception); -} - -Y_UNIT_TEST(PathParseTest) { - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db/", "/my_db/"), "/"); - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db/", "/my_db"), "/"); - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db", "/my_db/"), "/"); - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db", "/my_db"), "/"); - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("my_db/", "/my_db/"), "/"); - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("my_db/", "/my_db"), "/"); - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("my_db", "/my_db/"), "/"); - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("my_db", "/my_db"), "/"); - UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/my_db", "my_db"), yexception); - - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db/", "/my_db/my_folder"), "my_folder"); - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db/", "/my_db/my_folder/"), "my_folder/"); - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db", "/my_db/my_folder"), "my_folder"); - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db", "/my_db/my_folder/"), "my_folder/"); - UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/my_db", "my_db/my_folder"), yexception); - UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/my_db", "/other_db/my_folder"), yexception); - - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/ru/my_db/", "/ru/my_db/my_folder"), "my_folder"); - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/ru/my_db/", "/ru/my_db/"), "/"); - UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/ru/my_db/", "/ru/my_db"), "/"); - UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/ru/my_db", "/ru/my_"), yexception); - UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/ru/my_db", "/ru"), yexception); - UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/ru/my_db", "/"), yexception); - UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/ru/my_db", ""), yexception); -} - -} - -} // NYdb +Y_UNIT_TEST_SUITE(UtilTest) { + +Y_UNIT_TEST(SizeFromStringParsing) { + UNIT_ASSERT_EQUAL(SizeFromString("1"), 1); + UNIT_ASSERT_EQUAL(SizeFromString("582"), 582); + UNIT_ASSERT_EQUAL(SizeFromString("852421"), 852421); +} + +Y_UNIT_TEST(SizeFromStringParsingWithDecimalPrefix) { + UNIT_ASSERT_EQUAL(SizeFromString("1K"), 1000); + UNIT_ASSERT_EQUAL(SizeFromString("9238M"), ui64{9238}*1000*1000); + UNIT_ASSERT_EQUAL(SizeFromString("12315G"), ui64{12315}*1000*1000*1000); + UNIT_ASSERT_EQUAL(SizeFromString("642T"), ui64{642}*1000*1000*1000*1000); +} + +Y_UNIT_TEST(SizeFromStringParsingWithBinaryPrefix) { + UNIT_ASSERT_EQUAL(SizeFromString("1Ki"), 1024); + UNIT_ASSERT_EQUAL(SizeFromString("692Mi"), ui64{692}*1024*1024); + UNIT_ASSERT_EQUAL(SizeFromString("42851Gi"), ui64{42851}*1024*1024*1024); + UNIT_ASSERT_EQUAL(SizeFromString("8321Ti"), ui64{8321}*1024*1024*1024*1024); +} + +Y_UNIT_TEST(SizeFromStringParsingErrors) { + UNIT_CHECK_GENERATED_EXCEPTION(SizeFromString(""), yexception); + UNIT_CHECK_GENERATED_EXCEPTION(SizeFromString("123i321"), yexception); + UNIT_CHECK_GENERATED_EXCEPTION(SizeFromString("12KK"), yexception); + UNIT_CHECK_GENERATED_EXCEPTION(SizeFromString("12k"), yexception); + UNIT_CHECK_GENERATED_EXCEPTION(SizeFromString("12UI"), yexception); + UNIT_CHECK_GENERATED_EXCEPTION(SizeFromString("NR"), yexception); +} + +Y_UNIT_TEST(PathParseTest) { + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db/", "/my_db/"), "/"); + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db/", "/my_db"), "/"); + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db", "/my_db/"), "/"); + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db", "/my_db"), "/"); + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("my_db/", "/my_db/"), "/"); + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("my_db/", "/my_db"), "/"); + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("my_db", "/my_db/"), "/"); + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("my_db", "/my_db"), "/"); + UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/my_db", "my_db"), yexception); + + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db/", "/my_db/my_folder"), "my_folder"); + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db/", "/my_db/my_folder/"), "my_folder/"); + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db", "/my_db/my_folder"), "my_folder"); + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/my_db", "/my_db/my_folder/"), "my_folder/"); + UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/my_db", "my_db/my_folder"), yexception); + UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/my_db", "/other_db/my_folder"), yexception); + + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/ru/my_db/", "/ru/my_db/my_folder"), "my_folder"); + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/ru/my_db/", "/ru/my_db/"), "/"); + UNIT_ASSERT_EQUAL(RelPathFromAbsolute("/ru/my_db/", "/ru/my_db"), "/"); + UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/ru/my_db", "/ru/my_"), yexception); + UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/ru/my_db", "/ru"), yexception); + UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/ru/my_db", "/"), yexception); + UNIT_CHECK_GENERATED_EXCEPTION(RelPathFromAbsolute("/ru/my_db", ""), yexception); +} + +} + +} // NYdb diff --git a/ydb/library/backup/ut/ya.make b/ydb/library/backup/ut/ya.make index a20001d58c..2819acff50 100644 --- a/ydb/library/backup/ut/ya.make +++ b/ydb/library/backup/ut/ya.make @@ -2,18 +2,18 @@ OWNER( va-kuznecov g:kikimr ) - + UNITTEST_FOR(ydb/library/backup) - -SIZE(SMALL) -TIMEOUT(60) - +SIZE(SMALL) + +TIMEOUT(60) + SRC(ut.cpp) - + PEERDIR( library/cpp/string_utils/quote ydb/library/backup ) -END() +END() diff --git a/ydb/library/backup/util.cpp b/ydb/library/backup/util.cpp index 4b4f97e83f..332d29ac8e 100644 --- a/ydb/library/backup/util.cpp +++ b/ydb/library/backup/util.cpp @@ -1,97 +1,97 @@ -#include "util.h" - -#include <util/generic/yexception.h> -#include <util/string/builder.h> -#include <util/string/cast.h> -#include <util/generic/map.h> -#include <util/generic/ymath.h> - -#include <ctype.h> - -namespace NYdb { - -TString RelPathFromAbsolute(TString db, TString path) { - if (!db.StartsWith('/')) { - db.prepend('/'); - } - - if (db.EndsWith('/')) { - db.pop_back(); - } - - TString info = TStringBuilder() << "db# " << db.Quote() << " path# " << path.Quote(); - - if (!path.StartsWith("/")) { - throw yexception() << "path should be absolute, " << info; - } - - if (!path.StartsWith(db)) { - throw yexception() << "the path should starts with a name of the database, " << info; - } - - db.push_back('/'); - path = path.erase(0, Min(path.Size(), db.Size())); - return path ? path : "/"; -} - -namespace { - -template<typename T> -static constexpr T pow(T x, size_t p) { - T res = 1; - for (size_t i = 0; i < p; ++i) { - res *= x; - } - return res; -} - -TMap<TStringBuf, ui64> SizeSuffix { - {"", 1}, - {"K", Power<ui64>(1000, 1)}, - {"M", Power<ui64>(1000, 2)}, - {"G", Power<ui64>(1000, 3)}, - {"T", Power<ui64>(1000, 4)}, - {"Ki", Power<ui64>(1024, 1)}, +#include "util.h" + +#include <util/generic/yexception.h> +#include <util/string/builder.h> +#include <util/string/cast.h> +#include <util/generic/map.h> +#include <util/generic/ymath.h> + +#include <ctype.h> + +namespace NYdb { + +TString RelPathFromAbsolute(TString db, TString path) { + if (!db.StartsWith('/')) { + db.prepend('/'); + } + + if (db.EndsWith('/')) { + db.pop_back(); + } + + TString info = TStringBuilder() << "db# " << db.Quote() << " path# " << path.Quote(); + + if (!path.StartsWith("/")) { + throw yexception() << "path should be absolute, " << info; + } + + if (!path.StartsWith(db)) { + throw yexception() << "the path should starts with a name of the database, " << info; + } + + db.push_back('/'); + path = path.erase(0, Min(path.Size(), db.Size())); + return path ? path : "/"; +} + +namespace { + +template<typename T> +static constexpr T pow(T x, size_t p) { + T res = 1; + for (size_t i = 0; i < p; ++i) { + res *= x; + } + return res; +} + +TMap<TStringBuf, ui64> SizeSuffix { + {"", 1}, + {"K", Power<ui64>(1000, 1)}, + {"M", Power<ui64>(1000, 2)}, + {"G", Power<ui64>(1000, 3)}, + {"T", Power<ui64>(1000, 4)}, + {"Ki", Power<ui64>(1024, 1)}, {"KiB", Power<ui64>(1024, 1)}, - {"Mi", Power<ui64>(1024, 2)}, + {"Mi", Power<ui64>(1024, 2)}, {"MiB", Power<ui64>(1024, 2)}, - {"Gi", Power<ui64>(1024, 3)}, + {"Gi", Power<ui64>(1024, 3)}, {"GiB", Power<ui64>(1024, 3)}, - {"Ti", Power<ui64>(1024, 4)}, + {"Ti", Power<ui64>(1024, 4)}, {"TiB", Power<ui64>(1024, 4)}, -}; - -} - -ui64 SizeFromString(TStringBuf s) { - size_t pos = s.Size(); +}; + +} + +ui64 SizeFromString(TStringBuf s) { + size_t pos = s.Size(); while (pos > 0 && !isdigit(s[pos - 1])) { - --pos; - } - - TStringBuf suffix; - TStringBuf number; - s.SplitAt(pos, number, suffix); - auto it = SizeSuffix.find(suffix); + --pos; + } + + TStringBuf suffix; + TStringBuf number; + s.SplitAt(pos, number, suffix); + auto it = SizeSuffix.find(suffix); Y_ENSURE(it != SizeSuffix.end(), "Cannot parse string, unknown suffix# " << TString{suffix}.Quote()); - return FromString<ui64>(number) * it->second; -} - -namespace { - -struct TIsVerbosePrint { - bool IsVerbose = false; -}; - -} - -void SetVerbosity(bool isVerbose) { - Singleton<TIsVerbosePrint>()->IsVerbose = isVerbose; -} - -bool GetVerbosity() { - return Singleton<TIsVerbosePrint>()->IsVerbose; -} - - -} + return FromString<ui64>(number) * it->second; +} + +namespace { + +struct TIsVerbosePrint { + bool IsVerbose = false; +}; + +} + +void SetVerbosity(bool isVerbose) { + Singleton<TIsVerbosePrint>()->IsVerbose = isVerbose; +} + +bool GetVerbosity() { + return Singleton<TIsVerbosePrint>()->IsVerbose; +} + + +} diff --git a/ydb/library/backup/util.h b/ydb/library/backup/util.h index fd05125a08..607e84d2bc 100644 --- a/ydb/library/backup/util.h +++ b/ydb/library/backup/util.h @@ -1,46 +1,46 @@ -#pragma once - -#include <util/datetime/base.h> -#include <util/generic/string.h> -#include <util/stream/output.h> -#include <util/stream/null.h> - -namespace NYdb { - -void SetVerbosity(bool isVerbose); -bool GetVerbosity(); - -#define LOG_NULL(s) Cnull << s -#define EXTEND_MSG(s) TInstant::Now().ToIsoStringLocal() << ": " << s << Endl - -#define LOG_INFO(s) Cout << EXTEND_MSG(s) -#define LOG_ERR(s) Cerr << EXTEND_MSG(s) -#define LOG_DEBUG(s) (NYdb::GetVerbosity() ? LOG_INFO(s) : LOG_NULL(s)) - - - -// Retrive path relative to database root from absolute -TString RelPathFromAbsolute(TString db, TString path); - -// Parses strings from human readable format to ui64 -// Suppores decimal prefixes such as K(1000), M, G, T -// Suppores binary prefixes such as Ki(1024), Mi, Gi, Ti -// Example: "2Ki" -> 2048 -ui64 SizeFromString(TStringBuf s); - -class TScopedTimer { - TInstant Start; - TString Msg; - -public: - TScopedTimer(const TString& msg) - : Start(TInstant::Now()) - , Msg(msg) - {} - - ~TScopedTimer() { - LOG_INFO(Msg << (TInstant::Now() - Start).SecondsFloat() << "s"); - } -}; - -} +#pragma once + +#include <util/datetime/base.h> +#include <util/generic/string.h> +#include <util/stream/output.h> +#include <util/stream/null.h> + +namespace NYdb { + +void SetVerbosity(bool isVerbose); +bool GetVerbosity(); + +#define LOG_NULL(s) Cnull << s +#define EXTEND_MSG(s) TInstant::Now().ToIsoStringLocal() << ": " << s << Endl + +#define LOG_INFO(s) Cout << EXTEND_MSG(s) +#define LOG_ERR(s) Cerr << EXTEND_MSG(s) +#define LOG_DEBUG(s) (NYdb::GetVerbosity() ? LOG_INFO(s) : LOG_NULL(s)) + + + +// Retrive path relative to database root from absolute +TString RelPathFromAbsolute(TString db, TString path); + +// Parses strings from human readable format to ui64 +// Suppores decimal prefixes such as K(1000), M, G, T +// Suppores binary prefixes such as Ki(1024), Mi, Gi, Ti +// Example: "2Ki" -> 2048 +ui64 SizeFromString(TStringBuf s); + +class TScopedTimer { + TInstant Start; + TString Msg; + +public: + TScopedTimer(const TString& msg) + : Start(TInstant::Now()) + , Msg(msg) + {} + + ~TScopedTimer() { + LOG_INFO(Msg << (TInstant::Now() - Start).SecondsFloat() << "s"); + } +}; + +} diff --git a/ydb/library/backup/ya.make b/ydb/library/backup/ya.make index 7f94e738a2..9ea7b93961 100644 --- a/ydb/library/backup/ya.make +++ b/ydb/library/backup/ya.make @@ -1,15 +1,15 @@ -LIBRARY(kikimr_backup) - +LIBRARY(kikimr_backup) + OWNER( va-kuznecov g:kikimr ) - -PEERDIR( + +PEERDIR( library/cpp/bucket_quoter library/cpp/regex/pcre library/cpp/string_utils/quote - util + util ydb/library/dynumber ydb/public/api/grpc ydb/public/api/protos @@ -18,17 +18,17 @@ PEERDIR( ydb/public/sdk/cpp/client/ydb_proto ydb/public/sdk/cpp/client/ydb_scheme ydb/public/sdk/cpp/client/ydb_table -) - -SRCS( - backup.cpp - query_builder.cpp - query_uploader.cpp - util.cpp -) - -END() - -RECURSE_FOR_TESTS( - ut -) +) + +SRCS( + backup.cpp + query_builder.cpp + query_uploader.cpp + util.cpp +) + +END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/ydb/library/pdisk_io/aio.cpp b/ydb/library/pdisk_io/aio.cpp index 7ad813fb20..e877d5fe17 100644 --- a/ydb/library/pdisk_io/aio.cpp +++ b/ydb/library/pdisk_io/aio.cpp @@ -1,26 +1,26 @@ -#include "aio.h" -#include "file_params.h" - -namespace NKikimr { -namespace NPDisk { - -std::unique_ptr<IAsyncIoContext> TIoContextFactoryOSS::CreateAsyncIoContext(const TString &path, ui32 pDiskId, - TDeviceMode::TFlags flags, TIntrusivePtr<TSectorMap> sectorMap) const { - if (sectorMap) { - return CreateAsyncIoContextMap(path, pDiskId, sectorMap); - } else { - return CreateAsyncIoContextReal(path, pDiskId, flags); - } -}; - -ISpdkState *TIoContextFactoryOSS::CreateSpdkState() const { - return Singleton<TSpdkStateOSS>(); -} - -void TIoContextFactoryOSS::DetectFileParameters(const TString &path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice) const { - ::NKikimr::DetectFileParameters(path, outDiskSizeBytes, outIsBlockDevice); -} - - -} // NPDisk -} // NKikimr +#include "aio.h" +#include "file_params.h" + +namespace NKikimr { +namespace NPDisk { + +std::unique_ptr<IAsyncIoContext> TIoContextFactoryOSS::CreateAsyncIoContext(const TString &path, ui32 pDiskId, + TDeviceMode::TFlags flags, TIntrusivePtr<TSectorMap> sectorMap) const { + if (sectorMap) { + return CreateAsyncIoContextMap(path, pDiskId, sectorMap); + } else { + return CreateAsyncIoContextReal(path, pDiskId, flags); + } +}; + +ISpdkState *TIoContextFactoryOSS::CreateSpdkState() const { + return Singleton<TSpdkStateOSS>(); +} + +void TIoContextFactoryOSS::DetectFileParameters(const TString &path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice) const { + ::NKikimr::DetectFileParameters(path, outDiskSizeBytes, outIsBlockDevice); +} + + +} // NPDisk +} // NKikimr diff --git a/ydb/library/pdisk_io/aio.h b/ydb/library/pdisk_io/aio.h index 00a6b9050c..61d0ee20e6 100644 --- a/ydb/library/pdisk_io/aio.h +++ b/ydb/library/pdisk_io/aio.h @@ -1,155 +1,155 @@ -#pragma once - -#include "buffer_pool.h" -#include "sector_map.h" -#include "spdk_state.h" - +#pragma once + +#include "buffer_pool.h" +#include "sector_map.h" +#include "spdk_state.h" + #include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_request_id.h> #include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_devicemode.h> - + #include <ydb/library/wilson/wilson_event.h> - -#include <util/system/file.h> -#include <util/generic/string.h> -#include <util/string/printf.h> - -namespace NKikimr { - -struct TPDiskMon; - -namespace NPDisk { - -using namespace NActors; - -struct TAsyncIoOperationResult; - -struct ICallback { - virtual void Exec(TAsyncIoOperationResult *) = 0; - - virtual ~ICallback() = default; -}; - -struct IAsyncIoOperation { - enum class EType { - PRead, - PWrite, - PTrim - }; - - virtual ~IAsyncIoOperation() { - }; - - virtual void* GetCookie() = 0; - virtual NWilson::TTraceId *GetTraceIdPtr() = 0; - virtual void* GetData() = 0; - virtual ui64 GetOffset() = 0; - virtual ui64 GetSize() = 0; - virtual EType GetType() = 0; - virtual TReqId GetReqId() = 0; - - virtual void SetCallback(ICallback *) = 0; - virtual void ExecCallback(TAsyncIoOperationResult *) = 0; - - virtual TString Str() { - TStringStream str; - str << "LibaioAsyncOperation { Data# " << (ui64)GetData() << " Offset# " << GetOffset() << " Size# " << - GetSize() << " ReqId# " << GetReqId() << " Type# " << GetType(); - return str.Str(); - } -}; - -struct TPDiskDebugInfo { - TString Path; - TString Info; - ui32 PDiskId; - - TPDiskDebugInfo(const TString &path, ui32 pDiskId, const TString &ioLibrary) - : Path(path) - , Info(TStringBuilder() << "PDiskId# " << pDiskId << " path# \"" << path << "\"" << " IoLibrary# " << ioLibrary) - , PDiskId(pDiskId) - {} - - const char *c_str() const { - return Info.c_str(); - } - - TString Str() const { - return Info; - } -}; - -inline IOutputStream &operator<<(IOutputStream &out, const TPDiskDebugInfo& info) { - out << info.Str(); - return out; -} - -enum class EIoResult : i64 { - Unknown = 1, // - Ok = 2, // - BadFileNumber = 3, // aka EBADF: Submit - TryAgain = 4, // aka EAGAIN: Setup Submit - BadAddress = 5, // aka EFAULT: Setup Submit GetEvents Destroy - InvalidArgument = 6, // aka EINVAL: Setup Submit GetEvents Destroy - FunctionNotImplemented = 7, // aka ENOSYS: Setup Submit GetEvents Destroy - InterruptedSystemCall = 8, // aka EINTR: GetEvents - OutOfMemory = 9, // aka ENOMEM: Setup Result - IOError = 10, // aka EIO: Result - FileOpenError = 11, // Setup - FileLockError = 12, // Setup - FakeError = 13, // Setup - - // From kernel maillist: "this error is not fatal. One can fix it easily by rewriting affected sector" - InvalidSequence = 14, // aka EILSEQ: GetEvents - // for broken disk's error-log: "READ_ERROR: The read data could not be recovered from the media" - NoData = 15 // aka ENODATA: GetEvents -}; - -struct TAsyncIoOperationResult { - IAsyncIoOperation *Operation = nullptr; - EIoResult Result = EIoResult::Unknown; -}; - -class IAsyncIoContext { -public: - virtual ~IAsyncIoContext() { - } - - virtual IAsyncIoOperation* CreateAsyncIoOperation(void *cookie, TReqId reqId, NWilson::TTraceId *traceId) = 0; - virtual void DestroyAsyncIoOperation(IAsyncIoOperation *operation) = 0; - virtual EIoResult Destroy() = 0; - // Returns -EIoResult in case of error - virtual i64 GetEvents(ui64 minEvents, ui64 maxEvents, TAsyncIoOperationResult *events, TDuration timeout) = 0; - virtual void PreparePRead(IAsyncIoOperation *op, void *destination, size_t size, size_t offset) = 0; - virtual void PreparePWrite(IAsyncIoOperation *op, const void *source, size_t size, size_t offset) = 0; - virtual void PreparePTrim(IAsyncIoOperation *op, size_t size, size_t offset) = 0; - virtual bool DoTrim(IAsyncIoOperation *op) = 0; - virtual EIoResult Setup(ui64 maxevents, bool doLock) = 0; - virtual void InitializeMonitoring(TPDiskMon &mon) = 0; - virtual EIoResult Submit(IAsyncIoOperation *op, ICallback *callback) = 0; - virtual void SetActorSystem(TActorSystem *actorSystem) = 0; - virtual int GetLastErrno() = 0; - virtual TString GetPDiskInfo() = 0; - virtual TFileHandle *GetFileHandle() = 0; -}; - -std::unique_ptr<IAsyncIoContext> CreateAsyncIoContextReal(const TString &path, ui32 pDiskId, TDeviceMode::TFlags flags); -std::unique_ptr<IAsyncIoContext> CreateAsyncIoContextMap(const TString &path, ui32 pDiskId, TIntrusivePtr<TSectorMap> sectorMap); - -struct IIoContextFactory { - virtual std::unique_ptr<IAsyncIoContext> CreateAsyncIoContext(const TString &path, ui32 pDiskId, - TDeviceMode::TFlags flags, TIntrusivePtr<TSectorMap> sectorMap) const = 0; - virtual void DetectFileParameters(const TString &path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice) const = 0; - virtual ISpdkState *CreateSpdkState() const = 0; - virtual ~IIoContextFactory() {} -}; - -struct TIoContextFactoryOSS : IIoContextFactory { - std::unique_ptr<IAsyncIoContext> CreateAsyncIoContext(const TString &path, ui32 pDiskId, TDeviceMode::TFlags flags, - TIntrusivePtr<TSectorMap> sectorMap) const override; - - ISpdkState *CreateSpdkState() const override; - void DetectFileParameters(const TString &path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice) const override; -}; - -} // NPDisk -} // NKikimr + +#include <util/system/file.h> +#include <util/generic/string.h> +#include <util/string/printf.h> + +namespace NKikimr { + +struct TPDiskMon; + +namespace NPDisk { + +using namespace NActors; + +struct TAsyncIoOperationResult; + +struct ICallback { + virtual void Exec(TAsyncIoOperationResult *) = 0; + + virtual ~ICallback() = default; +}; + +struct IAsyncIoOperation { + enum class EType { + PRead, + PWrite, + PTrim + }; + + virtual ~IAsyncIoOperation() { + }; + + virtual void* GetCookie() = 0; + virtual NWilson::TTraceId *GetTraceIdPtr() = 0; + virtual void* GetData() = 0; + virtual ui64 GetOffset() = 0; + virtual ui64 GetSize() = 0; + virtual EType GetType() = 0; + virtual TReqId GetReqId() = 0; + + virtual void SetCallback(ICallback *) = 0; + virtual void ExecCallback(TAsyncIoOperationResult *) = 0; + + virtual TString Str() { + TStringStream str; + str << "LibaioAsyncOperation { Data# " << (ui64)GetData() << " Offset# " << GetOffset() << " Size# " << + GetSize() << " ReqId# " << GetReqId() << " Type# " << GetType(); + return str.Str(); + } +}; + +struct TPDiskDebugInfo { + TString Path; + TString Info; + ui32 PDiskId; + + TPDiskDebugInfo(const TString &path, ui32 pDiskId, const TString &ioLibrary) + : Path(path) + , Info(TStringBuilder() << "PDiskId# " << pDiskId << " path# \"" << path << "\"" << " IoLibrary# " << ioLibrary) + , PDiskId(pDiskId) + {} + + const char *c_str() const { + return Info.c_str(); + } + + TString Str() const { + return Info; + } +}; + +inline IOutputStream &operator<<(IOutputStream &out, const TPDiskDebugInfo& info) { + out << info.Str(); + return out; +} + +enum class EIoResult : i64 { + Unknown = 1, // + Ok = 2, // + BadFileNumber = 3, // aka EBADF: Submit + TryAgain = 4, // aka EAGAIN: Setup Submit + BadAddress = 5, // aka EFAULT: Setup Submit GetEvents Destroy + InvalidArgument = 6, // aka EINVAL: Setup Submit GetEvents Destroy + FunctionNotImplemented = 7, // aka ENOSYS: Setup Submit GetEvents Destroy + InterruptedSystemCall = 8, // aka EINTR: GetEvents + OutOfMemory = 9, // aka ENOMEM: Setup Result + IOError = 10, // aka EIO: Result + FileOpenError = 11, // Setup + FileLockError = 12, // Setup + FakeError = 13, // Setup + + // From kernel maillist: "this error is not fatal. One can fix it easily by rewriting affected sector" + InvalidSequence = 14, // aka EILSEQ: GetEvents + // for broken disk's error-log: "READ_ERROR: The read data could not be recovered from the media" + NoData = 15 // aka ENODATA: GetEvents +}; + +struct TAsyncIoOperationResult { + IAsyncIoOperation *Operation = nullptr; + EIoResult Result = EIoResult::Unknown; +}; + +class IAsyncIoContext { +public: + virtual ~IAsyncIoContext() { + } + + virtual IAsyncIoOperation* CreateAsyncIoOperation(void *cookie, TReqId reqId, NWilson::TTraceId *traceId) = 0; + virtual void DestroyAsyncIoOperation(IAsyncIoOperation *operation) = 0; + virtual EIoResult Destroy() = 0; + // Returns -EIoResult in case of error + virtual i64 GetEvents(ui64 minEvents, ui64 maxEvents, TAsyncIoOperationResult *events, TDuration timeout) = 0; + virtual void PreparePRead(IAsyncIoOperation *op, void *destination, size_t size, size_t offset) = 0; + virtual void PreparePWrite(IAsyncIoOperation *op, const void *source, size_t size, size_t offset) = 0; + virtual void PreparePTrim(IAsyncIoOperation *op, size_t size, size_t offset) = 0; + virtual bool DoTrim(IAsyncIoOperation *op) = 0; + virtual EIoResult Setup(ui64 maxevents, bool doLock) = 0; + virtual void InitializeMonitoring(TPDiskMon &mon) = 0; + virtual EIoResult Submit(IAsyncIoOperation *op, ICallback *callback) = 0; + virtual void SetActorSystem(TActorSystem *actorSystem) = 0; + virtual int GetLastErrno() = 0; + virtual TString GetPDiskInfo() = 0; + virtual TFileHandle *GetFileHandle() = 0; +}; + +std::unique_ptr<IAsyncIoContext> CreateAsyncIoContextReal(const TString &path, ui32 pDiskId, TDeviceMode::TFlags flags); +std::unique_ptr<IAsyncIoContext> CreateAsyncIoContextMap(const TString &path, ui32 pDiskId, TIntrusivePtr<TSectorMap> sectorMap); + +struct IIoContextFactory { + virtual std::unique_ptr<IAsyncIoContext> CreateAsyncIoContext(const TString &path, ui32 pDiskId, + TDeviceMode::TFlags flags, TIntrusivePtr<TSectorMap> sectorMap) const = 0; + virtual void DetectFileParameters(const TString &path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice) const = 0; + virtual ISpdkState *CreateSpdkState() const = 0; + virtual ~IIoContextFactory() {} +}; + +struct TIoContextFactoryOSS : IIoContextFactory { + std::unique_ptr<IAsyncIoContext> CreateAsyncIoContext(const TString &path, ui32 pDiskId, TDeviceMode::TFlags flags, + TIntrusivePtr<TSectorMap> sectorMap) const override; + + ISpdkState *CreateSpdkState() const override; + void DetectFileParameters(const TString &path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice) const override; +}; + +} // NPDisk +} // NKikimr diff --git a/ydb/library/pdisk_io/aio_linux.cpp b/ydb/library/pdisk_io/aio_linux.cpp index 254cec9f82..d3dc74d6d8 100644 --- a/ydb/library/pdisk_io/aio_linux.cpp +++ b/ydb/library/pdisk_io/aio_linux.cpp @@ -1,372 +1,372 @@ -#include "aio.h" -#include "buffers.h" - +#include "aio.h" +#include "buffers.h" + //#include <ydb/core/blobstorage/base/wilson_events.h> #include <ydb/core/debug/valgrind_check.h> #include <ydb/core/util/yverify_stream.h> - + #include <ydb/library/pdisk_io/spdk_state.h> -#include <library/cpp/actors/util/intrinsics.h> -#include <library/cpp/containers/stack_vector/stack_vec.h> -#include <util/system/file.h> -#include <util/stream/format.h> -#include <contrib/libs/libaio/libaio.h> - -#include <linux/fs.h> -#include <sys/ioctl.h> - -namespace NKikimr { -namespace NPDisk { - -class TCallbackContext; - -struct TAsyncIoOperation : iocb, IAsyncIoOperation { - void* Cookie; - ICallback *Callback; - TReqId ReqId; - NWilson::TTraceId TraceId; - bool IsTrim; // Trim is special case of IO_CMD_PWRITE operation - - TAsyncIoOperation() = default; - - TAsyncIoOperation(void *cookie, TReqId reqId, NWilson::TTraceId *traceId) - : Cookie(cookie) - , Callback(nullptr) - , ReqId(reqId) - , TraceId(traceId ? std::move(*traceId) : NWilson::TTraceId()) - , IsTrim(false) - {} - - ~TAsyncIoOperation() override { - } - - void* GetCookie() override { - return Cookie; - } - - NWilson::TTraceId *GetTraceIdPtr() override { - return &TraceId; - } - - void* GetData() override { - return u.c.buf; - } - - ui64 GetOffset() override { - return u.c.offset; - }; - - ui64 GetSize() override { - return u.c.nbytes; - }; - - TReqId GetReqId() override { - return ReqId; - } - - EType GetType() override { - switch (aio_lio_opcode) { - case IO_CMD_PWRITE: return IsTrim ? EType::PTrim : EType::PWrite; - case IO_CMD_PREAD: return EType::PRead; - default: - Y_FAIL_S("Libaio TAsyncIoOperation::GetType(), unknown type# " << Hex(aio_lio_opcode)); - } - }; - - void SetCallback(ICallback *callback) override { - Callback = callback; - } - - void ExecCallback(TAsyncIoOperationResult *result) override { - Callback->Exec(result); - } -}; - -class TAsyncIoContextLibaio : public IAsyncIoContext { - io_context_t IoContext; - TActorSystem *ActorSystem; - TPool<TAsyncIoOperation, 1024> Pool; - THolder<TFileHandle> File; - int LastErrno = 0; - - TPDiskDebugInfo PDiskInfo; -public: - - TAsyncIoContextLibaio(const TString &path, ui32 pDiskId, TDeviceMode::TFlags flags) - : IoContext(nullptr) - , ActorSystem(nullptr) - , PDiskInfo(path, pDiskId, "libaio") - { - Y_UNUSED(flags); - } - - ~TAsyncIoContextLibaio() { - } - - void InitializeMonitoring(TPDiskMon &/*mon*/) override { - //Pool.InitializeMonitoring(mon); - } - - IAsyncIoOperation* CreateAsyncIoOperation(void* cookie, TReqId reqId, NWilson::TTraceId *traceId) override { - void *p = Pool.Pop(); - IAsyncIoOperation *operation = new (p) TAsyncIoOperation(cookie, reqId, traceId); - return operation; - } - - void DestroyAsyncIoOperation(IAsyncIoOperation* operation) override { - Pool.Push(static_cast<TAsyncIoOperation*>(operation)); - } - - EIoResult Destroy() override { - int ret = io_destroy(IoContext); - if (ret < 0) { - switch (-ret) { - case EFAULT: return EIoResult::BadAddress; - case EINVAL: return EIoResult::InvalidArgument; - case ENOSYS: return EIoResult::FunctionNotImplemented; - default: Y_FAIL_S(PDiskInfo << " unexpected error in io_destroy, error# " << -ret - << " strerror# " << strerror(-ret)); - } - } - if (File) { - ret = File->Flock(LOCK_UN); - Y_VERIFY_S(ret == 0, "Error in Flock(LOCK_UN), errno# " << errno << " strerror# " << strerror(errno)); - bool isOk = File->Close(); - Y_VERIFY_S(isOk, PDiskInfo << " error on file close, errno# " << errno << " strerror# " << strerror(errno)); - } - return EIoResult::Ok; - } - - i64 GetEvents(ui64 minEvents, ui64 maxEvents, TAsyncIoOperationResult *events, TDuration timeout) override { - TStackVec<io_event, 64> ioEvents; - ioEvents.resize(maxEvents); - timespec ioTimeout = { (time_t)timeout.Seconds(), timeout.NanoSecondsOfSecond() }; - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&IoContext, sizeof(IoContext)); - int ret = io_getevents(IoContext, minEvents, maxEvents, &ioEvents[0], &ioTimeout); - if (ret < 0) { - return -static_cast<i64>(RetErrnoToContextError(ret, "io_getevents")); - } - for (int i = 0; i < ret; ++i) { - IAsyncIoOperation *op = static_cast<TAsyncIoOperation*>(ioEvents[i].obj); - -#if defined(__has_feature) -# if __has_feature(thread_sanitizer) - // - // Thread Sanitizer does not consider io_submit / io_getevents synchronization. - // - AtomicLoad((char*)op); -# endif -#endif - - events[i].Operation = op; - - events[i].Result = RetErrnoToContextError(ioEvents[i].res, "ioEvents[].res"); - - events[i].Operation->ExecCallback(&events[i]); - - //if (ActorSystem) { - // WILSON_TRACE(*ActorSystem, op->GetTraceIdPtr(), AsyncIoFinished); - //} - } - return ret; - } - - EIoResult RetErrnoToContextError(i64 ret, const char *info) { - if (ret < 0) { - switch(-ret) { - case EAGAIN: return EIoResult::TryAgain; - case EBADF: return EIoResult::BadFileNumber; - case EFAULT: return EIoResult::BadAddress; - case EINTR: return EIoResult::InterruptedSystemCall; - case EINVAL: return EIoResult::InvalidArgument; - case EIO: return EIoResult::IOError; - case ENOMEM: return EIoResult::OutOfMemory; - case ENOSYS: return EIoResult::FunctionNotImplemented; - case EILSEQ: return EIoResult::InvalidSequence; - case ENODATA: return EIoResult::NoData; - default: Y_FAIL_S(PDiskInfo << " unexpected error in " << info << ", error# " << -ret - << " strerror# " << strerror(-ret)); - } - } else { - return EIoResult::Ok; - } - } - - void PreparePRead(IAsyncIoOperation *op, void *destination, size_t size, size_t offset) override { - Y_VERIFY_DEBUG(File); - iocb* cb = static_cast<iocb*>(static_cast<TAsyncIoOperation*>(op)); - io_prep_pread(cb, static_cast<FHANDLE>(*File), destination, size, offset); - } - - void PreparePWrite(IAsyncIoOperation *op, const void *source, size_t size, size_t offset) override { - Y_VERIFY_DEBUG(File); - iocb* cb = static_cast<iocb*>(static_cast<TAsyncIoOperation*>(op)); - io_prep_pwrite(cb, static_cast<FHANDLE>(*File), const_cast<void*>(source), size, offset); - } - - void PreparePTrim(IAsyncIoOperation *op, size_t size, size_t offset) override { - PreparePWrite(op, nullptr, size, offset); - static_cast<TAsyncIoOperation*>(op)->IsTrim = true; - } - - bool DoTrim(IAsyncIoOperation *op) override { - TAsyncIoOperation *trim = static_cast<TAsyncIoOperation*>(op); - Y_VERIFY(trim->IsTrim); - - ui64 range[2] = {trim->GetOffset(), trim->GetSize()}; - bool tryAgain = true; - TStringStream str; - str << "BLKDISCARD " << PDiskInfo; - errno = 0; - if (ioctl((FHANDLE)*File.Get(), BLKDISCARD, &range) == -1) { - int errorId = errno; - if (errorId == EOPNOTSUPP) { - str << " failed, operation not supported, trimming will be disabled for the device"; - tryAgain = false; - } else if (errorId == ENOTTY) { - str << " failed, device is not a typewriter! Trimming will be disabled for the device"; - tryAgain = false; - } else { - str << " failed, errno# " << errorId << " strerror# " << strerror(errorId); - tryAgain = true; - } - - if (ActorSystem) { - //LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_DEVICE, str.Str()); - } else { - Cerr << str.Str() << Endl; - } - } else { - if (ActorSystem) { - //LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_DEVICE, str.Str() << " trimmed# " << range[1] - // << " size# " << trim->GetSize() << " from# " << range[0] << " offset# " << trim->GetOffset()); - } - tryAgain = true; - } - return tryAgain; - } - - int LockFile() { - int ret = -1; - errno = EWOULDBLOCK; - int retry = 2; - while (ret == -1 && errno == EWOULDBLOCK && retry > 0) { - errno = 0; - ret = File->Flock(LOCK_EX | LOCK_NB); - if (ret == 0) { - break; - } else { - LastErrno = errno; - if (ActorSystem){ - //LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_DEVICE, PDiskInfo - // << " error on file locking, strerror# " << strerror(errno)); - } - if (retry > 1) { - Sleep(TDuration::Seconds(1)); - } - } - --retry; - } - return ret; - } - - EIoResult Setup(ui64 maxEvents, bool doLock) override { - File = MakeHolder<TFileHandle>(PDiskInfo.Path.c_str(), - OpenExisting | RdWr | DirectAligned | Sync); - bool isFileOpened = File->IsOpen(); - if (isFileOpened) { - if (doLock) { - int ret = LockFile(); - if (ret == -1) { - return EIoResult::FileLockError; - } - } - } else { - int fd = open(PDiskInfo.Path.c_str(), O_RDWR); - if (fd < 0) { - LastErrno = errno; - return EIoResult::FileOpenError; - } else { - close(fd); - return EIoResult::TryAgain; - } - } - int ret = io_setup(maxEvents, &IoContext); - if (ret < 0) { - LastErrno = -ret; - } - return RetErrnoToContextError(ret, "io_setup"); - } - - EIoResult Submit(IAsyncIoOperation *op, ICallback *callback) override { - op->SetCallback(callback); - iocb* ios[1] = { static_cast<iocb*>(static_cast<TAsyncIoOperation*>(op)) }; - //if (ActorSystem) { - // WILSON_TRACE(*ActorSystem, op->GetTraceIdPtr(), AsyncIoInQueue); - //} - - if (op->GetType() == IAsyncIoOperation::EType::PWrite) { - //PDISK_FAIL_INJECTION(1); - } - -#if defined(__has_feature) -# if __has_feature(thread_sanitizer) - // - // Thread Sanitizer does not consider io_submit / io_getevents synchronization. - // - AtomicStore((char*)op, *(char*)op); -# endif -#endif - - int ret = io_submit(IoContext, 1, ios); - if (ret < 0) { - LastErrno = -ret; - } else if (ret == 0) { - return EIoResult::TryAgain; - } - return RetErrnoToContextError(ret, "io_setup"); - } - - void SetActorSystem(TActorSystem *actorSystem) override { - ActorSystem = actorSystem; - } - - TString GetPDiskInfo() override { - return PDiskInfo.Str(); - } - - int GetLastErrno() override { - return LastErrno; - } - - TFileHandle *GetFileHandle() override { - return File.Get(); - } -}; - - -// -// TBufferPoolHugePages -// -TBufferPoolHugePages::TBufferPoolHugePages(ui32 bufferSize, ui32 bufferCount, TBufferPool::TPDiskParams params) - : TBufferPool(bufferSize, bufferCount, params) -{ - TBufferPool::UseHugePages = true; - constexpr ui32 alignment = 512; - auto spdkState = Singleton<TSpdkStateOSS>(); - AlignedBuffer = spdkState->Malloc(AlignUp(ui32(bufferSize), ui32(alignment)) * bufferCount, alignment); - Y_VERIFY((ui64)AlignedBuffer % alignment == 0); - MarkUpPool(AlignedBuffer); -} - -TBufferPoolHugePages::~TBufferPoolHugePages() { - auto spdkState = Singleton<TSpdkStateOSS>(); - spdkState->Free(AlignedBuffer); -} - -std::unique_ptr<IAsyncIoContext> CreateAsyncIoContextReal(const TString &path, ui32 pDiskId, TDeviceMode::TFlags flags) { - return std::make_unique<TAsyncIoContextLibaio>(path, pDiskId, flags); -} - -} // NPDisk -} // NKikimr +#include <library/cpp/actors/util/intrinsics.h> +#include <library/cpp/containers/stack_vector/stack_vec.h> +#include <util/system/file.h> +#include <util/stream/format.h> +#include <contrib/libs/libaio/libaio.h> + +#include <linux/fs.h> +#include <sys/ioctl.h> + +namespace NKikimr { +namespace NPDisk { + +class TCallbackContext; + +struct TAsyncIoOperation : iocb, IAsyncIoOperation { + void* Cookie; + ICallback *Callback; + TReqId ReqId; + NWilson::TTraceId TraceId; + bool IsTrim; // Trim is special case of IO_CMD_PWRITE operation + + TAsyncIoOperation() = default; + + TAsyncIoOperation(void *cookie, TReqId reqId, NWilson::TTraceId *traceId) + : Cookie(cookie) + , Callback(nullptr) + , ReqId(reqId) + , TraceId(traceId ? std::move(*traceId) : NWilson::TTraceId()) + , IsTrim(false) + {} + + ~TAsyncIoOperation() override { + } + + void* GetCookie() override { + return Cookie; + } + + NWilson::TTraceId *GetTraceIdPtr() override { + return &TraceId; + } + + void* GetData() override { + return u.c.buf; + } + + ui64 GetOffset() override { + return u.c.offset; + }; + + ui64 GetSize() override { + return u.c.nbytes; + }; + + TReqId GetReqId() override { + return ReqId; + } + + EType GetType() override { + switch (aio_lio_opcode) { + case IO_CMD_PWRITE: return IsTrim ? EType::PTrim : EType::PWrite; + case IO_CMD_PREAD: return EType::PRead; + default: + Y_FAIL_S("Libaio TAsyncIoOperation::GetType(), unknown type# " << Hex(aio_lio_opcode)); + } + }; + + void SetCallback(ICallback *callback) override { + Callback = callback; + } + + void ExecCallback(TAsyncIoOperationResult *result) override { + Callback->Exec(result); + } +}; + +class TAsyncIoContextLibaio : public IAsyncIoContext { + io_context_t IoContext; + TActorSystem *ActorSystem; + TPool<TAsyncIoOperation, 1024> Pool; + THolder<TFileHandle> File; + int LastErrno = 0; + + TPDiskDebugInfo PDiskInfo; +public: + + TAsyncIoContextLibaio(const TString &path, ui32 pDiskId, TDeviceMode::TFlags flags) + : IoContext(nullptr) + , ActorSystem(nullptr) + , PDiskInfo(path, pDiskId, "libaio") + { + Y_UNUSED(flags); + } + + ~TAsyncIoContextLibaio() { + } + + void InitializeMonitoring(TPDiskMon &/*mon*/) override { + //Pool.InitializeMonitoring(mon); + } + + IAsyncIoOperation* CreateAsyncIoOperation(void* cookie, TReqId reqId, NWilson::TTraceId *traceId) override { + void *p = Pool.Pop(); + IAsyncIoOperation *operation = new (p) TAsyncIoOperation(cookie, reqId, traceId); + return operation; + } + + void DestroyAsyncIoOperation(IAsyncIoOperation* operation) override { + Pool.Push(static_cast<TAsyncIoOperation*>(operation)); + } + + EIoResult Destroy() override { + int ret = io_destroy(IoContext); + if (ret < 0) { + switch (-ret) { + case EFAULT: return EIoResult::BadAddress; + case EINVAL: return EIoResult::InvalidArgument; + case ENOSYS: return EIoResult::FunctionNotImplemented; + default: Y_FAIL_S(PDiskInfo << " unexpected error in io_destroy, error# " << -ret + << " strerror# " << strerror(-ret)); + } + } + if (File) { + ret = File->Flock(LOCK_UN); + Y_VERIFY_S(ret == 0, "Error in Flock(LOCK_UN), errno# " << errno << " strerror# " << strerror(errno)); + bool isOk = File->Close(); + Y_VERIFY_S(isOk, PDiskInfo << " error on file close, errno# " << errno << " strerror# " << strerror(errno)); + } + return EIoResult::Ok; + } + + i64 GetEvents(ui64 minEvents, ui64 maxEvents, TAsyncIoOperationResult *events, TDuration timeout) override { + TStackVec<io_event, 64> ioEvents; + ioEvents.resize(maxEvents); + timespec ioTimeout = { (time_t)timeout.Seconds(), timeout.NanoSecondsOfSecond() }; + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&IoContext, sizeof(IoContext)); + int ret = io_getevents(IoContext, minEvents, maxEvents, &ioEvents[0], &ioTimeout); + if (ret < 0) { + return -static_cast<i64>(RetErrnoToContextError(ret, "io_getevents")); + } + for (int i = 0; i < ret; ++i) { + IAsyncIoOperation *op = static_cast<TAsyncIoOperation*>(ioEvents[i].obj); + +#if defined(__has_feature) +# if __has_feature(thread_sanitizer) + // + // Thread Sanitizer does not consider io_submit / io_getevents synchronization. + // + AtomicLoad((char*)op); +# endif +#endif + + events[i].Operation = op; + + events[i].Result = RetErrnoToContextError(ioEvents[i].res, "ioEvents[].res"); + + events[i].Operation->ExecCallback(&events[i]); + + //if (ActorSystem) { + // WILSON_TRACE(*ActorSystem, op->GetTraceIdPtr(), AsyncIoFinished); + //} + } + return ret; + } + + EIoResult RetErrnoToContextError(i64 ret, const char *info) { + if (ret < 0) { + switch(-ret) { + case EAGAIN: return EIoResult::TryAgain; + case EBADF: return EIoResult::BadFileNumber; + case EFAULT: return EIoResult::BadAddress; + case EINTR: return EIoResult::InterruptedSystemCall; + case EINVAL: return EIoResult::InvalidArgument; + case EIO: return EIoResult::IOError; + case ENOMEM: return EIoResult::OutOfMemory; + case ENOSYS: return EIoResult::FunctionNotImplemented; + case EILSEQ: return EIoResult::InvalidSequence; + case ENODATA: return EIoResult::NoData; + default: Y_FAIL_S(PDiskInfo << " unexpected error in " << info << ", error# " << -ret + << " strerror# " << strerror(-ret)); + } + } else { + return EIoResult::Ok; + } + } + + void PreparePRead(IAsyncIoOperation *op, void *destination, size_t size, size_t offset) override { + Y_VERIFY_DEBUG(File); + iocb* cb = static_cast<iocb*>(static_cast<TAsyncIoOperation*>(op)); + io_prep_pread(cb, static_cast<FHANDLE>(*File), destination, size, offset); + } + + void PreparePWrite(IAsyncIoOperation *op, const void *source, size_t size, size_t offset) override { + Y_VERIFY_DEBUG(File); + iocb* cb = static_cast<iocb*>(static_cast<TAsyncIoOperation*>(op)); + io_prep_pwrite(cb, static_cast<FHANDLE>(*File), const_cast<void*>(source), size, offset); + } + + void PreparePTrim(IAsyncIoOperation *op, size_t size, size_t offset) override { + PreparePWrite(op, nullptr, size, offset); + static_cast<TAsyncIoOperation*>(op)->IsTrim = true; + } + + bool DoTrim(IAsyncIoOperation *op) override { + TAsyncIoOperation *trim = static_cast<TAsyncIoOperation*>(op); + Y_VERIFY(trim->IsTrim); + + ui64 range[2] = {trim->GetOffset(), trim->GetSize()}; + bool tryAgain = true; + TStringStream str; + str << "BLKDISCARD " << PDiskInfo; + errno = 0; + if (ioctl((FHANDLE)*File.Get(), BLKDISCARD, &range) == -1) { + int errorId = errno; + if (errorId == EOPNOTSUPP) { + str << " failed, operation not supported, trimming will be disabled for the device"; + tryAgain = false; + } else if (errorId == ENOTTY) { + str << " failed, device is not a typewriter! Trimming will be disabled for the device"; + tryAgain = false; + } else { + str << " failed, errno# " << errorId << " strerror# " << strerror(errorId); + tryAgain = true; + } + + if (ActorSystem) { + //LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_DEVICE, str.Str()); + } else { + Cerr << str.Str() << Endl; + } + } else { + if (ActorSystem) { + //LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_DEVICE, str.Str() << " trimmed# " << range[1] + // << " size# " << trim->GetSize() << " from# " << range[0] << " offset# " << trim->GetOffset()); + } + tryAgain = true; + } + return tryAgain; + } + + int LockFile() { + int ret = -1; + errno = EWOULDBLOCK; + int retry = 2; + while (ret == -1 && errno == EWOULDBLOCK && retry > 0) { + errno = 0; + ret = File->Flock(LOCK_EX | LOCK_NB); + if (ret == 0) { + break; + } else { + LastErrno = errno; + if (ActorSystem){ + //LOG_ERROR_S(*ActorSystem, NKikimrServices::BS_DEVICE, PDiskInfo + // << " error on file locking, strerror# " << strerror(errno)); + } + if (retry > 1) { + Sleep(TDuration::Seconds(1)); + } + } + --retry; + } + return ret; + } + + EIoResult Setup(ui64 maxEvents, bool doLock) override { + File = MakeHolder<TFileHandle>(PDiskInfo.Path.c_str(), + OpenExisting | RdWr | DirectAligned | Sync); + bool isFileOpened = File->IsOpen(); + if (isFileOpened) { + if (doLock) { + int ret = LockFile(); + if (ret == -1) { + return EIoResult::FileLockError; + } + } + } else { + int fd = open(PDiskInfo.Path.c_str(), O_RDWR); + if (fd < 0) { + LastErrno = errno; + return EIoResult::FileOpenError; + } else { + close(fd); + return EIoResult::TryAgain; + } + } + int ret = io_setup(maxEvents, &IoContext); + if (ret < 0) { + LastErrno = -ret; + } + return RetErrnoToContextError(ret, "io_setup"); + } + + EIoResult Submit(IAsyncIoOperation *op, ICallback *callback) override { + op->SetCallback(callback); + iocb* ios[1] = { static_cast<iocb*>(static_cast<TAsyncIoOperation*>(op)) }; + //if (ActorSystem) { + // WILSON_TRACE(*ActorSystem, op->GetTraceIdPtr(), AsyncIoInQueue); + //} + + if (op->GetType() == IAsyncIoOperation::EType::PWrite) { + //PDISK_FAIL_INJECTION(1); + } + +#if defined(__has_feature) +# if __has_feature(thread_sanitizer) + // + // Thread Sanitizer does not consider io_submit / io_getevents synchronization. + // + AtomicStore((char*)op, *(char*)op); +# endif +#endif + + int ret = io_submit(IoContext, 1, ios); + if (ret < 0) { + LastErrno = -ret; + } else if (ret == 0) { + return EIoResult::TryAgain; + } + return RetErrnoToContextError(ret, "io_setup"); + } + + void SetActorSystem(TActorSystem *actorSystem) override { + ActorSystem = actorSystem; + } + + TString GetPDiskInfo() override { + return PDiskInfo.Str(); + } + + int GetLastErrno() override { + return LastErrno; + } + + TFileHandle *GetFileHandle() override { + return File.Get(); + } +}; + + +// +// TBufferPoolHugePages +// +TBufferPoolHugePages::TBufferPoolHugePages(ui32 bufferSize, ui32 bufferCount, TBufferPool::TPDiskParams params) + : TBufferPool(bufferSize, bufferCount, params) +{ + TBufferPool::UseHugePages = true; + constexpr ui32 alignment = 512; + auto spdkState = Singleton<TSpdkStateOSS>(); + AlignedBuffer = spdkState->Malloc(AlignUp(ui32(bufferSize), ui32(alignment)) * bufferCount, alignment); + Y_VERIFY((ui64)AlignedBuffer % alignment == 0); + MarkUpPool(AlignedBuffer); +} + +TBufferPoolHugePages::~TBufferPoolHugePages() { + auto spdkState = Singleton<TSpdkStateOSS>(); + spdkState->Free(AlignedBuffer); +} + +std::unique_ptr<IAsyncIoContext> CreateAsyncIoContextReal(const TString &path, ui32 pDiskId, TDeviceMode::TFlags flags) { + return std::make_unique<TAsyncIoContextLibaio>(path, pDiskId, flags); +} + +} // NPDisk +} // NKikimr diff --git a/ydb/library/pdisk_io/aio_map.cpp b/ydb/library/pdisk_io/aio_map.cpp index 689d5bf8e5..19062a7264 100644 --- a/ydb/library/pdisk_io/aio_map.cpp +++ b/ydb/library/pdisk_io/aio_map.cpp @@ -1,359 +1,359 @@ -#include "aio.h" - +#include "aio.h" + #include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_countedqueueoneone.h> - -#include <util/random/random.h> -#include <util/thread/pool.h> - -namespace NKikimr { -namespace NPDisk { - -struct TAsyncIoOperationMap : IObjectInQueue, IAsyncIoOperation { - TSectorMap &SectorMap; - TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> &CompleteQueue; - void *Cookie; - void *Data = nullptr; - ui64 Offset = 0; - ui64 Size = 0; - EType Type = IAsyncIoOperation::EType::PRead; - TReqId ReqId; - ICallback *Callback = nullptr; - NWilson::TTraceId TraceId; - - TInstant Deadline; - - TAsyncIoOperationMap(TSectorMap §orMap, - TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> &completeQueue, - void *cookie, TReqId reqId, NWilson::TTraceId *traceId) - : SectorMap(sectorMap) - , CompleteQueue(completeQueue) - , Cookie(cookie) - , ReqId(reqId) - , TraceId(traceId ? std::move(*traceId) : NWilson::TTraceId()) - {} - - ~TAsyncIoOperationMap() override { - } - - void* GetCookie() override { - return Cookie; - } - - NWilson::TTraceId *GetTraceIdPtr() override { - return &TraceId; - } - - void* GetData() override { - return Data; - } - - ui64 GetOffset() override { - return Offset; - }; - - ui64 GetSize() override { - return Size; - }; - - EType GetType() override { - return Type; - }; - - TReqId GetReqId() override { - return ReqId; - } - - void Process(void*) override { - switch (Type) { - case IAsyncIoOperation::EType::PRead: - { - SectorMap.Read((ui8*)Data, Size, Offset); - break; - } - case IAsyncIoOperation::EType::PWrite: - { - SectorMap.Write((ui8*)Data, Size, Offset); - break; - } - default: - Y_FAIL_S("Unexpected op type# " << (i64)Type); - } - CompleteQueue.Push(this); - } - - void SetCallback(ICallback *callback) override { - Callback = callback; - } - - void ExecCallback(TAsyncIoOperationResult *result) override { - Callback->Exec(result); - } -}; - -class TRandomWaitThreadPool : public IThreadPool { - TCountedQueueOneOne<TAsyncIoOperationMap*, 4 << 10> IncomingQueue; - TMultiMap<TInstant, TAsyncIoOperationMap*> WaitQueue; - - TThread WorkThread; - std::atomic<bool> StopFlag; - std::pair<TDuration, TDuration> WaitParams; - - /////// Thread working part ///// - static void *Proc(void* that) { - static_cast<TRandomWaitThreadPool*>(that)->Work(); - return nullptr; - } - - void Work() { - bool receivedNullFromIncomingQueue = false; - while (true) { - TInstant now = TInstant::Now(); - TAtomicBase size = IncomingQueue.GetWaitingSize(); - for (TAtomicBase idx = 0; idx < size; ++idx) { - TAsyncIoOperationMap *op = IncomingQueue.Pop(); - if (op) { - if (op->Deadline <= now) { - op->Process(nullptr); - } else { - WaitQueue.emplace(op->Deadline, op); - } - } else { - receivedNullFromIncomingQueue = true; - } - } - if (StopFlag.load()) { - Cleanup(receivedNullFromIncomingQueue); - return; - } - now = TInstant::Now(); - auto it = WaitQueue.begin(); - while (it != WaitQueue.end() && it->first <= now) { - TAsyncIoOperationMap *op = it->second; - op->Process(nullptr); - auto curr = it; - ++it; - WaitQueue.erase(curr); - } - TDuration wait = TDuration::Max(); - if (WaitQueue) { - Y_VERIFY(WaitQueue.begin()->first > now); - wait = WaitQueue.begin()->first - now; - } - IncomingQueue.ProducedWait(wait); - } - } - - void Cleanup(bool receiveNull) { - for (auto& op : WaitQueue) { - delete op.second; - } - WaitQueue.clear(); - while (!receiveNull) { - TAtomicBase size = IncomingQueue.GetWaitingSize(); - for (TAtomicBase idx = 0; idx < size; ++idx) { - TAsyncIoOperationMap *op = IncomingQueue.Pop(); - if (op) { - delete op; - } else { - receiveNull = true; - } - } - } - } - - /////// Intefrace ///// - bool Add(IObjectInQueue *obj) override { - if (StopFlag.load()) { - return false; - } - auto op = static_cast<TAsyncIoOperationMap*>(obj); - op->Deadline = TInstant::Now() + WaitParams.first - + TDuration::MicroSeconds(RandomNumber<ui32>(WaitParams.second.MicroSeconds())); - IncomingQueue.Push(op); - return true; - } - - size_t Size() const noexcept override { - return 0; // Size of thread pool, meaningless for that class - } - - void Start(size_t, size_t) override { - } - - void Stop() noexcept override { - Y_VERIFY(!StopFlag.load()); - StopFlag.store(true); - IncomingQueue.Push(nullptr); - WorkThread.Join(); - } - - -public: - TRandomWaitThreadPool(const std::pair<TDuration, TDuration>& waitParams) - : WorkThread(TThread::TParams(Proc, this)) - , StopFlag(false) - , WaitParams(waitParams) - { - WorkThread.Start(); - } - - ~TRandomWaitThreadPool(){ - } -}; - -class TAsyncIoContextMap : public IAsyncIoContext { - TAutoPtr<IThreadPool> Queue; - TIntrusivePtr<TSectorMap> SectorMap; - TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> CompleteQueue; - ui64 MaxEvents = 0; - int LastErrno = 0; - - TPDiskDebugInfo PDiskInfo; -public: - - TAsyncIoContextMap(const TString &path, ui32 pDiskId, TIntrusivePtr<TSectorMap> sectorMap) - : SectorMap(sectorMap) - , PDiskInfo(path, pDiskId, "map") - {} - - ~TAsyncIoContextMap() { - } - - void InitializeMonitoring(TPDiskMon &mon) override { - Y_UNUSED(mon); - } - - IAsyncIoOperation* CreateAsyncIoOperation(void* cookie, TReqId reqId, NWilson::TTraceId *traceId) override { - IAsyncIoOperation *operation = new TAsyncIoOperationMap(*SectorMap, CompleteQueue, cookie, reqId, traceId); - return operation; - } - - void DestroyAsyncIoOperation(IAsyncIoOperation *operation) override { - delete operation; - } - - EIoResult Destroy() override { - Queue->Stop(); - SectorMap->Unlock(); - - return EIoResult::Ok; - } - - i64 GetEvents(ui64 minEvents, ui64 maxEvents, TAsyncIoOperationResult *events, TDuration timeout) override { - ui64 outputIdx = 0; - TInstant startTime = TInstant::Now(); - TInstant deadline = startTime + timeout; - while (true) { - TAtomicBase size = CompleteQueue.GetWaitingSize(); - if (size > 0) { - for (TAtomicBase idx = 0; idx < size; ++idx) { - TAsyncIoOperationMap *op = static_cast<TAsyncIoOperationMap*>(CompleteQueue.Pop()); - events[outputIdx].Operation = op; - events[outputIdx].Result = (RandomNumber<double>() < - SectorMap->ImitateIoErrorProbability.load()) - ? EIoResult::FakeError - : EIoResult::Ok; - if (op->GetType() == IAsyncIoOperation::EType::PRead && - RandomNumber<double>() < SectorMap->ImitateReadIoErrorProbability.load()) { - events[outputIdx].Result = EIoResult::FakeError; - } - events[outputIdx].Operation->ExecCallback(&events[outputIdx]); - ++outputIdx; - if (outputIdx == maxEvents) { - return outputIdx; - } - } - } else { - if (outputIdx >= minEvents) { - return outputIdx; - } - if (!timeout.NanoSeconds()) { - CompleteQueue.ProducedWaitI(); - } else { - TInstant now = TInstant::Now(); - if (now > deadline) { - return outputIdx; - } - TDuration remainingTime = deadline - now; - bool isOk = CompleteQueue.ProducedWait(remainingTime); - if (!isOk) { - return outputIdx; - } - } - } - } - } - - void PrepareImpl(IAsyncIoOperation *op, void *data, size_t size, size_t offset, - IAsyncIoOperation::EType type) { - TAsyncIoOperationMap *operation = static_cast<TAsyncIoOperationMap*>(op); - operation->Data = data; - operation->Size = size; - operation->Offset = offset; - operation->Type = type; - } - - void PreparePRead(IAsyncIoOperation *op, void *destination, size_t size, size_t offset) override { - PrepareImpl(op, destination, size, offset, IAsyncIoOperation::EType::PRead); - } - - void PreparePWrite(IAsyncIoOperation *op, const void *source, size_t size, size_t offset) override { - PrepareImpl(op, const_cast<void*>(source), size, offset, IAsyncIoOperation::EType::PWrite); - } - - void PreparePTrim(IAsyncIoOperation *op, size_t size, size_t offset) override { - PrepareImpl(op, nullptr, size, offset, IAsyncIoOperation::EType::PTrim); - } - - bool DoTrim(IAsyncIoOperation *op) override { - Sleep(TDuration::MilliSeconds(40)); - - SectorMap->Trim(op->GetSize(), op->GetOffset()); - return true; - } - - EIoResult Setup(ui64 maxEvents, bool doLock) override { - if (doLock) { - bool isLocked = SectorMap->Lock(); - if (!isLocked) { - return EIoResult::FileOpenError; - } - } - MaxEvents = maxEvents; - if (SectorMap->ImitateRandomWait) { - Queue = new TRandomWaitThreadPool(*SectorMap->ImitateRandomWait); - } else { - Queue = CreateThreadPool(1, MaxEvents); - } - return EIoResult::Ok; - } - - EIoResult Submit(IAsyncIoOperation *op, ICallback *callback) override { - op->SetCallback(callback); - TAsyncIoOperationMap *operation = static_cast<TAsyncIoOperationMap*>(op); - bool isOk = Queue->Add(operation); - return isOk ? EIoResult::Ok : EIoResult::TryAgain; - } - - void SetActorSystem(TActorSystem* /*actorSystem*/) override - {} - - TString GetPDiskInfo() override { - return PDiskInfo.Str(); - } - - int GetLastErrno() override { - return LastErrno; - } - - TFileHandle *GetFileHandle() override { - return nullptr; - } -}; - -std::unique_ptr<IAsyncIoContext> CreateAsyncIoContextMap(const TString &path, ui32 pDiskId, TIntrusivePtr<TSectorMap> sectorMap) { - return std::make_unique<TAsyncIoContextMap>(path, pDiskId, sectorMap); -} - -} // NPDisk -} // NKikimr + +#include <util/random/random.h> +#include <util/thread/pool.h> + +namespace NKikimr { +namespace NPDisk { + +struct TAsyncIoOperationMap : IObjectInQueue, IAsyncIoOperation { + TSectorMap &SectorMap; + TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> &CompleteQueue; + void *Cookie; + void *Data = nullptr; + ui64 Offset = 0; + ui64 Size = 0; + EType Type = IAsyncIoOperation::EType::PRead; + TReqId ReqId; + ICallback *Callback = nullptr; + NWilson::TTraceId TraceId; + + TInstant Deadline; + + TAsyncIoOperationMap(TSectorMap §orMap, + TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> &completeQueue, + void *cookie, TReqId reqId, NWilson::TTraceId *traceId) + : SectorMap(sectorMap) + , CompleteQueue(completeQueue) + , Cookie(cookie) + , ReqId(reqId) + , TraceId(traceId ? std::move(*traceId) : NWilson::TTraceId()) + {} + + ~TAsyncIoOperationMap() override { + } + + void* GetCookie() override { + return Cookie; + } + + NWilson::TTraceId *GetTraceIdPtr() override { + return &TraceId; + } + + void* GetData() override { + return Data; + } + + ui64 GetOffset() override { + return Offset; + }; + + ui64 GetSize() override { + return Size; + }; + + EType GetType() override { + return Type; + }; + + TReqId GetReqId() override { + return ReqId; + } + + void Process(void*) override { + switch (Type) { + case IAsyncIoOperation::EType::PRead: + { + SectorMap.Read((ui8*)Data, Size, Offset); + break; + } + case IAsyncIoOperation::EType::PWrite: + { + SectorMap.Write((ui8*)Data, Size, Offset); + break; + } + default: + Y_FAIL_S("Unexpected op type# " << (i64)Type); + } + CompleteQueue.Push(this); + } + + void SetCallback(ICallback *callback) override { + Callback = callback; + } + + void ExecCallback(TAsyncIoOperationResult *result) override { + Callback->Exec(result); + } +}; + +class TRandomWaitThreadPool : public IThreadPool { + TCountedQueueOneOne<TAsyncIoOperationMap*, 4 << 10> IncomingQueue; + TMultiMap<TInstant, TAsyncIoOperationMap*> WaitQueue; + + TThread WorkThread; + std::atomic<bool> StopFlag; + std::pair<TDuration, TDuration> WaitParams; + + /////// Thread working part ///// + static void *Proc(void* that) { + static_cast<TRandomWaitThreadPool*>(that)->Work(); + return nullptr; + } + + void Work() { + bool receivedNullFromIncomingQueue = false; + while (true) { + TInstant now = TInstant::Now(); + TAtomicBase size = IncomingQueue.GetWaitingSize(); + for (TAtomicBase idx = 0; idx < size; ++idx) { + TAsyncIoOperationMap *op = IncomingQueue.Pop(); + if (op) { + if (op->Deadline <= now) { + op->Process(nullptr); + } else { + WaitQueue.emplace(op->Deadline, op); + } + } else { + receivedNullFromIncomingQueue = true; + } + } + if (StopFlag.load()) { + Cleanup(receivedNullFromIncomingQueue); + return; + } + now = TInstant::Now(); + auto it = WaitQueue.begin(); + while (it != WaitQueue.end() && it->first <= now) { + TAsyncIoOperationMap *op = it->second; + op->Process(nullptr); + auto curr = it; + ++it; + WaitQueue.erase(curr); + } + TDuration wait = TDuration::Max(); + if (WaitQueue) { + Y_VERIFY(WaitQueue.begin()->first > now); + wait = WaitQueue.begin()->first - now; + } + IncomingQueue.ProducedWait(wait); + } + } + + void Cleanup(bool receiveNull) { + for (auto& op : WaitQueue) { + delete op.second; + } + WaitQueue.clear(); + while (!receiveNull) { + TAtomicBase size = IncomingQueue.GetWaitingSize(); + for (TAtomicBase idx = 0; idx < size; ++idx) { + TAsyncIoOperationMap *op = IncomingQueue.Pop(); + if (op) { + delete op; + } else { + receiveNull = true; + } + } + } + } + + /////// Intefrace ///// + bool Add(IObjectInQueue *obj) override { + if (StopFlag.load()) { + return false; + } + auto op = static_cast<TAsyncIoOperationMap*>(obj); + op->Deadline = TInstant::Now() + WaitParams.first + + TDuration::MicroSeconds(RandomNumber<ui32>(WaitParams.second.MicroSeconds())); + IncomingQueue.Push(op); + return true; + } + + size_t Size() const noexcept override { + return 0; // Size of thread pool, meaningless for that class + } + + void Start(size_t, size_t) override { + } + + void Stop() noexcept override { + Y_VERIFY(!StopFlag.load()); + StopFlag.store(true); + IncomingQueue.Push(nullptr); + WorkThread.Join(); + } + + +public: + TRandomWaitThreadPool(const std::pair<TDuration, TDuration>& waitParams) + : WorkThread(TThread::TParams(Proc, this)) + , StopFlag(false) + , WaitParams(waitParams) + { + WorkThread.Start(); + } + + ~TRandomWaitThreadPool(){ + } +}; + +class TAsyncIoContextMap : public IAsyncIoContext { + TAutoPtr<IThreadPool> Queue; + TIntrusivePtr<TSectorMap> SectorMap; + TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> CompleteQueue; + ui64 MaxEvents = 0; + int LastErrno = 0; + + TPDiskDebugInfo PDiskInfo; +public: + + TAsyncIoContextMap(const TString &path, ui32 pDiskId, TIntrusivePtr<TSectorMap> sectorMap) + : SectorMap(sectorMap) + , PDiskInfo(path, pDiskId, "map") + {} + + ~TAsyncIoContextMap() { + } + + void InitializeMonitoring(TPDiskMon &mon) override { + Y_UNUSED(mon); + } + + IAsyncIoOperation* CreateAsyncIoOperation(void* cookie, TReqId reqId, NWilson::TTraceId *traceId) override { + IAsyncIoOperation *operation = new TAsyncIoOperationMap(*SectorMap, CompleteQueue, cookie, reqId, traceId); + return operation; + } + + void DestroyAsyncIoOperation(IAsyncIoOperation *operation) override { + delete operation; + } + + EIoResult Destroy() override { + Queue->Stop(); + SectorMap->Unlock(); + + return EIoResult::Ok; + } + + i64 GetEvents(ui64 minEvents, ui64 maxEvents, TAsyncIoOperationResult *events, TDuration timeout) override { + ui64 outputIdx = 0; + TInstant startTime = TInstant::Now(); + TInstant deadline = startTime + timeout; + while (true) { + TAtomicBase size = CompleteQueue.GetWaitingSize(); + if (size > 0) { + for (TAtomicBase idx = 0; idx < size; ++idx) { + TAsyncIoOperationMap *op = static_cast<TAsyncIoOperationMap*>(CompleteQueue.Pop()); + events[outputIdx].Operation = op; + events[outputIdx].Result = (RandomNumber<double>() < + SectorMap->ImitateIoErrorProbability.load()) + ? EIoResult::FakeError + : EIoResult::Ok; + if (op->GetType() == IAsyncIoOperation::EType::PRead && + RandomNumber<double>() < SectorMap->ImitateReadIoErrorProbability.load()) { + events[outputIdx].Result = EIoResult::FakeError; + } + events[outputIdx].Operation->ExecCallback(&events[outputIdx]); + ++outputIdx; + if (outputIdx == maxEvents) { + return outputIdx; + } + } + } else { + if (outputIdx >= minEvents) { + return outputIdx; + } + if (!timeout.NanoSeconds()) { + CompleteQueue.ProducedWaitI(); + } else { + TInstant now = TInstant::Now(); + if (now > deadline) { + return outputIdx; + } + TDuration remainingTime = deadline - now; + bool isOk = CompleteQueue.ProducedWait(remainingTime); + if (!isOk) { + return outputIdx; + } + } + } + } + } + + void PrepareImpl(IAsyncIoOperation *op, void *data, size_t size, size_t offset, + IAsyncIoOperation::EType type) { + TAsyncIoOperationMap *operation = static_cast<TAsyncIoOperationMap*>(op); + operation->Data = data; + operation->Size = size; + operation->Offset = offset; + operation->Type = type; + } + + void PreparePRead(IAsyncIoOperation *op, void *destination, size_t size, size_t offset) override { + PrepareImpl(op, destination, size, offset, IAsyncIoOperation::EType::PRead); + } + + void PreparePWrite(IAsyncIoOperation *op, const void *source, size_t size, size_t offset) override { + PrepareImpl(op, const_cast<void*>(source), size, offset, IAsyncIoOperation::EType::PWrite); + } + + void PreparePTrim(IAsyncIoOperation *op, size_t size, size_t offset) override { + PrepareImpl(op, nullptr, size, offset, IAsyncIoOperation::EType::PTrim); + } + + bool DoTrim(IAsyncIoOperation *op) override { + Sleep(TDuration::MilliSeconds(40)); + + SectorMap->Trim(op->GetSize(), op->GetOffset()); + return true; + } + + EIoResult Setup(ui64 maxEvents, bool doLock) override { + if (doLock) { + bool isLocked = SectorMap->Lock(); + if (!isLocked) { + return EIoResult::FileOpenError; + } + } + MaxEvents = maxEvents; + if (SectorMap->ImitateRandomWait) { + Queue = new TRandomWaitThreadPool(*SectorMap->ImitateRandomWait); + } else { + Queue = CreateThreadPool(1, MaxEvents); + } + return EIoResult::Ok; + } + + EIoResult Submit(IAsyncIoOperation *op, ICallback *callback) override { + op->SetCallback(callback); + TAsyncIoOperationMap *operation = static_cast<TAsyncIoOperationMap*>(op); + bool isOk = Queue->Add(operation); + return isOk ? EIoResult::Ok : EIoResult::TryAgain; + } + + void SetActorSystem(TActorSystem* /*actorSystem*/) override + {} + + TString GetPDiskInfo() override { + return PDiskInfo.Str(); + } + + int GetLastErrno() override { + return LastErrno; + } + + TFileHandle *GetFileHandle() override { + return nullptr; + } +}; + +std::unique_ptr<IAsyncIoContext> CreateAsyncIoContextMap(const TString &path, ui32 pDiskId, TIntrusivePtr<TSectorMap> sectorMap) { + return std::make_unique<TAsyncIoContextMap>(path, pDiskId, sectorMap); +} + +} // NPDisk +} // NKikimr diff --git a/ydb/library/pdisk_io/aio_mtp.cpp b/ydb/library/pdisk_io/aio_mtp.cpp index fb071673f7..d707dcaa1a 100644 --- a/ydb/library/pdisk_io/aio_mtp.cpp +++ b/ydb/library/pdisk_io/aio_mtp.cpp @@ -1,314 +1,314 @@ -#include "aio.h" - +#include "aio.h" + #include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_countedqueueoneone.h> #include <ydb/core/debug/valgrind_check.h> - -#include <util/system/file.h> - -namespace NKikimr { -namespace NPDisk { - -#define INTRODUCE_BAD_SECTORS 0 -#define BAD_SECTORS_STEP 15 -#define BAD_SECTOR_IDX 3 - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// PreadBad -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void PreadBad(TFileHandle *file, void* data, ui32 size, ui64 offset) { -#if INTRODUCE_BAD_SECTORS - void *data0 = data; - ui32 size0 = size; - ui64 offset0 = offset; -#endif - while (size) { - i32 sizeDone = file->Pread(data, size, offset); - if (sizeDone == (i32)size) { - break; - } - - if (sizeDone < 0) { - // TODO: get errno, report bad sectors - sizeDone = 0; - } - ui64 nextOffset = ((offset + sizeDone) / 4096 + 1) * 4096; - ui32 sizeSkipped = (ui32)(nextOffset - offset) - sizeDone; - - if (sizeSkipped) { - memset((ui8*)data + sizeDone, 0, sizeSkipped); - } - size -= sizeDone + sizeSkipped; - data = (void*)((ui8*)data + sizeDone + sizeSkipped); - offset = nextOffset; - } -#if INTRODUCE_BAD_SECTORS - ui64 firstSector = (offset0 / 4096 + BAD_SECTORS_STEP - 1) / BAD_SECTORS_STEP * BAD_SECTORS_STEP + BAD_SECTOR_IDX; - ui64 lastSector = (offset0 + size0) / 4096; - for ( ; firstSector < lastSector; firstSector += BAD_SECTORS_STEP) { - ui32 dataToSpoilOffset = (firstSector * 4096 - offset0); - memset((void*)((ui8*)data0 + dataToSpoilOffset), 0, 4096); - } -#endif -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// PwriteBad -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void PwriteBad(TFileHandle *file, const void* data, ui32 size, ui64 offset) { - while (size) { - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(data, size); - REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&offset, sizeof(offset)); - i32 sizeDone = file->Pwrite(data, size, offset); - if (sizeDone == (i32)size) { - break; - } - - if (sizeDone < 0) { - // TODO: get errno, report bad sectors - sizeDone = 0; - } - ui64 badOffset = ((offset + sizeDone) / 4096) * 4096; - ui64 nextOffset = badOffset + 4096; - ui32 sizeSkipped = (ui32)(nextOffset - offset) - sizeDone; - - size -= sizeDone + sizeSkipped; - data = (void*)((ui8*)data + sizeDone + sizeSkipped); - offset = nextOffset; - } -} - - -struct TAsyncIoOperation : IObjectInQueue, IAsyncIoOperation { - TMutex &Mutex; - TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> &CompleteQueue; - void *Cookie; - void *Data; - ui64 Offset; - ui64 Size; - TFileHandle *File; - EType Type; - TReqId ReqId; - ICallback *Callback; - NWilson::TTraceId TraceId; - - TAsyncIoOperation(TMutex &mutex, TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> &completeQueue, - void *cookie, TReqId reqId, NWilson::TTraceId *traceId) - : Mutex(mutex) - , CompleteQueue(completeQueue) - , Cookie(cookie) - , ReqId(reqId) - , TraceId(traceId ? std::move(*traceId) : NWilson::TTraceId()) - {} - - ~TAsyncIoOperation() override { - } - - void* GetCookie() override { - return Cookie; - } - - NWilson::TTraceId *GetTraceIdPtr() override { - return &TraceId; - } - - void* GetData() override { - return Data; - } - - ui64 GetOffset() override { - return Offset; - }; - - ui64 GetSize() override { - return Size; - }; - - EType GetType() override { - return Type; - }; - - TReqId GetReqId() override { - return ReqId; - } - - void Process(void*) override { - switch (Type) { - case IAsyncIoOperation::EType::PRead: - PreadBad(File, Data, Size, Offset); - break; - case IAsyncIoOperation::EType::PWrite: - PwriteBad(File, Data, Size, Offset); - break; - default: - Y_FAIL_S("Unexpected operation type# " << (i64)Type); - break; - } - { - TGuard<TMutex> guard(Mutex); - CompleteQueue.Push(this); - } - } - - void SetCallback(ICallback *callback) override { - Callback = callback; - } - - void ExecCallback(TAsyncIoOperationResult *result) override { - Callback->Exec(result); - } -}; - -class TAsyncIoContextMtp : public IAsyncIoContext { - TAutoPtr<IThreadPool> Queue; - TMutex Mutex; - TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> CompleteQueue; - ui64 MaxEvents; - THolder<TFileHandle> File; - int LastErrno = 0; - - static const ui64 NumThreads = 32; - - TPDiskDebugInfo PDiskInfo; -public: - - TAsyncIoContextMtp(const TString &path, ui32 pDiskId) - : PDiskInfo(path, pDiskId, "mtp") - {} - - ~TAsyncIoContextMtp() { - } - - void InitializeMonitoring(TPDiskMon &mon) override { - Y_UNUSED(mon); - } - - IAsyncIoOperation* CreateAsyncIoOperation(void* cookie, TReqId reqId, NWilson::TTraceId *traceId) override { - IAsyncIoOperation *operation = new TAsyncIoOperation(Mutex, CompleteQueue, cookie, reqId, traceId); - return operation; - } - - void DestroyAsyncIoOperation(IAsyncIoOperation *operation) override { - delete operation; - } - - EIoResult Destroy() override { - Queue->Stop(); - if (File) { - File->Flock(LOCK_UN); - } - return EIoResult::Ok; - } - - i64 GetEvents(ui64 minEvents, ui64 maxEvents, TAsyncIoOperationResult *events, TDuration timeout) override { - ui64 outputIdx = 0; - TInstant startTime = TInstant::Now(); - TInstant deadline = startTime + timeout; - while (true) { - TAtomicBase size = CompleteQueue.GetWaitingSize(); - if (size > 0) { - for (TAtomicBase idx = 0; idx < size; ++idx) { - TAsyncIoOperation *op = static_cast<TAsyncIoOperation*>(CompleteQueue.Pop()); - events[outputIdx].Operation = op; - events[outputIdx].Result = EIoResult::Ok; - events[outputIdx].Operation->ExecCallback(&events[outputIdx]); - ++outputIdx; - if (outputIdx == maxEvents) { - return outputIdx; - } - } - } else { - if (outputIdx >= minEvents) { - return outputIdx; - } - if (!timeout.NanoSeconds()) { - CompleteQueue.ProducedWaitI(); - } else { - TInstant now = TInstant::Now(); - if (now > deadline) { - return outputIdx; - } - TDuration remainingTime = deadline - now; - bool isOk = CompleteQueue.ProducedWait(remainingTime); - if (!isOk) { - return outputIdx; - } - } - } - } - } - - void PrepareImpl(IAsyncIoOperation *op, void *data, size_t size, size_t offset, IAsyncIoOperation::EType type) { - TAsyncIoOperation *operation = static_cast<TAsyncIoOperation*>(op); - operation->File = File.Get(); - operation->Data = data; - operation->Size = size; - operation->Offset = offset; - operation->Type = type; - } - - void PreparePRead(IAsyncIoOperation *op, void *destination, size_t size, size_t offset) override { - PrepareImpl(op, destination, size, offset, IAsyncIoOperation::EType::PRead); - } - - void PreparePWrite(IAsyncIoOperation *op, const void *source, size_t size, size_t offset) override { - PrepareImpl(op, const_cast<void*>(source), size, offset, IAsyncIoOperation::EType::PWrite); - } - - void PreparePTrim(IAsyncIoOperation *op, size_t size, size_t offset) override { - PrepareImpl(op, nullptr, size, offset, IAsyncIoOperation::EType::PTrim); - } - - bool DoTrim(IAsyncIoOperation *op) override { - Y_UNUSED(op); - return false; - } - - EIoResult Setup(ui64 maxEvents, bool doLock) override { - File = MakeHolder<TFileHandle>(PDiskInfo.Path.c_str(), - OpenExisting | RdWr | DirectAligned | Sync); - bool isFileOpened = File->IsOpen(); - if (isFileOpened && doLock) { - bool isLocked = File->Flock(LOCK_EX | LOCK_NB) == 0; - isFileOpened = isLocked; - } - if (!isFileOpened) { - return EIoResult::FileOpenError; - } - MaxEvents = maxEvents; - Queue = CreateThreadPool(NumThreads, MaxEvents); - return EIoResult::Ok; - } - - EIoResult Submit(IAsyncIoOperation *op, ICallback *callback) override { - op->SetCallback(callback); - TAsyncIoOperation *operation = static_cast<TAsyncIoOperation*>(op); - bool isOk = Queue->Add(operation); - if (isOk) { - return EIoResult::Ok; - } - return EIoResult::TryAgain; - } - - void SetActorSystem(TActorSystem* /*actorSystem*/) override - {} - - TString GetPDiskInfo() override { - return PDiskInfo.Str(); - } - - int GetLastErrno() override { - return LastErrno; - } - - TFileHandle *GetFileHandle() override { - return File.Get(); - } -}; - -std::unique_ptr<IAsyncIoContext> CreateAsyncIoContextReal(const TString &path, ui32 pDiskId, TDeviceMode::TFlags flags) { - Y_UNUSED(flags); - return std::make_unique<TAsyncIoContextMtp>(path, pDiskId); -} - -} // NPDisk -} // NKikimr + +#include <util/system/file.h> + +namespace NKikimr { +namespace NPDisk { + +#define INTRODUCE_BAD_SECTORS 0 +#define BAD_SECTORS_STEP 15 +#define BAD_SECTOR_IDX 3 + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// PreadBad +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void PreadBad(TFileHandle *file, void* data, ui32 size, ui64 offset) { +#if INTRODUCE_BAD_SECTORS + void *data0 = data; + ui32 size0 = size; + ui64 offset0 = offset; +#endif + while (size) { + i32 sizeDone = file->Pread(data, size, offset); + if (sizeDone == (i32)size) { + break; + } + + if (sizeDone < 0) { + // TODO: get errno, report bad sectors + sizeDone = 0; + } + ui64 nextOffset = ((offset + sizeDone) / 4096 + 1) * 4096; + ui32 sizeSkipped = (ui32)(nextOffset - offset) - sizeDone; + + if (sizeSkipped) { + memset((ui8*)data + sizeDone, 0, sizeSkipped); + } + size -= sizeDone + sizeSkipped; + data = (void*)((ui8*)data + sizeDone + sizeSkipped); + offset = nextOffset; + } +#if INTRODUCE_BAD_SECTORS + ui64 firstSector = (offset0 / 4096 + BAD_SECTORS_STEP - 1) / BAD_SECTORS_STEP * BAD_SECTORS_STEP + BAD_SECTOR_IDX; + ui64 lastSector = (offset0 + size0) / 4096; + for ( ; firstSector < lastSector; firstSector += BAD_SECTORS_STEP) { + ui32 dataToSpoilOffset = (firstSector * 4096 - offset0); + memset((void*)((ui8*)data0 + dataToSpoilOffset), 0, 4096); + } +#endif +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// PwriteBad +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void PwriteBad(TFileHandle *file, const void* data, ui32 size, ui64 offset) { + while (size) { + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(data, size); + REQUEST_VALGRIND_CHECK_MEM_IS_DEFINED(&offset, sizeof(offset)); + i32 sizeDone = file->Pwrite(data, size, offset); + if (sizeDone == (i32)size) { + break; + } + + if (sizeDone < 0) { + // TODO: get errno, report bad sectors + sizeDone = 0; + } + ui64 badOffset = ((offset + sizeDone) / 4096) * 4096; + ui64 nextOffset = badOffset + 4096; + ui32 sizeSkipped = (ui32)(nextOffset - offset) - sizeDone; + + size -= sizeDone + sizeSkipped; + data = (void*)((ui8*)data + sizeDone + sizeSkipped); + offset = nextOffset; + } +} + + +struct TAsyncIoOperation : IObjectInQueue, IAsyncIoOperation { + TMutex &Mutex; + TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> &CompleteQueue; + void *Cookie; + void *Data; + ui64 Offset; + ui64 Size; + TFileHandle *File; + EType Type; + TReqId ReqId; + ICallback *Callback; + NWilson::TTraceId TraceId; + + TAsyncIoOperation(TMutex &mutex, TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> &completeQueue, + void *cookie, TReqId reqId, NWilson::TTraceId *traceId) + : Mutex(mutex) + , CompleteQueue(completeQueue) + , Cookie(cookie) + , ReqId(reqId) + , TraceId(traceId ? std::move(*traceId) : NWilson::TTraceId()) + {} + + ~TAsyncIoOperation() override { + } + + void* GetCookie() override { + return Cookie; + } + + NWilson::TTraceId *GetTraceIdPtr() override { + return &TraceId; + } + + void* GetData() override { + return Data; + } + + ui64 GetOffset() override { + return Offset; + }; + + ui64 GetSize() override { + return Size; + }; + + EType GetType() override { + return Type; + }; + + TReqId GetReqId() override { + return ReqId; + } + + void Process(void*) override { + switch (Type) { + case IAsyncIoOperation::EType::PRead: + PreadBad(File, Data, Size, Offset); + break; + case IAsyncIoOperation::EType::PWrite: + PwriteBad(File, Data, Size, Offset); + break; + default: + Y_FAIL_S("Unexpected operation type# " << (i64)Type); + break; + } + { + TGuard<TMutex> guard(Mutex); + CompleteQueue.Push(this); + } + } + + void SetCallback(ICallback *callback) override { + Callback = callback; + } + + void ExecCallback(TAsyncIoOperationResult *result) override { + Callback->Exec(result); + } +}; + +class TAsyncIoContextMtp : public IAsyncIoContext { + TAutoPtr<IThreadPool> Queue; + TMutex Mutex; + TCountedQueueOneOne<IAsyncIoOperation*, 4 << 10> CompleteQueue; + ui64 MaxEvents; + THolder<TFileHandle> File; + int LastErrno = 0; + + static const ui64 NumThreads = 32; + + TPDiskDebugInfo PDiskInfo; +public: + + TAsyncIoContextMtp(const TString &path, ui32 pDiskId) + : PDiskInfo(path, pDiskId, "mtp") + {} + + ~TAsyncIoContextMtp() { + } + + void InitializeMonitoring(TPDiskMon &mon) override { + Y_UNUSED(mon); + } + + IAsyncIoOperation* CreateAsyncIoOperation(void* cookie, TReqId reqId, NWilson::TTraceId *traceId) override { + IAsyncIoOperation *operation = new TAsyncIoOperation(Mutex, CompleteQueue, cookie, reqId, traceId); + return operation; + } + + void DestroyAsyncIoOperation(IAsyncIoOperation *operation) override { + delete operation; + } + + EIoResult Destroy() override { + Queue->Stop(); + if (File) { + File->Flock(LOCK_UN); + } + return EIoResult::Ok; + } + + i64 GetEvents(ui64 minEvents, ui64 maxEvents, TAsyncIoOperationResult *events, TDuration timeout) override { + ui64 outputIdx = 0; + TInstant startTime = TInstant::Now(); + TInstant deadline = startTime + timeout; + while (true) { + TAtomicBase size = CompleteQueue.GetWaitingSize(); + if (size > 0) { + for (TAtomicBase idx = 0; idx < size; ++idx) { + TAsyncIoOperation *op = static_cast<TAsyncIoOperation*>(CompleteQueue.Pop()); + events[outputIdx].Operation = op; + events[outputIdx].Result = EIoResult::Ok; + events[outputIdx].Operation->ExecCallback(&events[outputIdx]); + ++outputIdx; + if (outputIdx == maxEvents) { + return outputIdx; + } + } + } else { + if (outputIdx >= minEvents) { + return outputIdx; + } + if (!timeout.NanoSeconds()) { + CompleteQueue.ProducedWaitI(); + } else { + TInstant now = TInstant::Now(); + if (now > deadline) { + return outputIdx; + } + TDuration remainingTime = deadline - now; + bool isOk = CompleteQueue.ProducedWait(remainingTime); + if (!isOk) { + return outputIdx; + } + } + } + } + } + + void PrepareImpl(IAsyncIoOperation *op, void *data, size_t size, size_t offset, IAsyncIoOperation::EType type) { + TAsyncIoOperation *operation = static_cast<TAsyncIoOperation*>(op); + operation->File = File.Get(); + operation->Data = data; + operation->Size = size; + operation->Offset = offset; + operation->Type = type; + } + + void PreparePRead(IAsyncIoOperation *op, void *destination, size_t size, size_t offset) override { + PrepareImpl(op, destination, size, offset, IAsyncIoOperation::EType::PRead); + } + + void PreparePWrite(IAsyncIoOperation *op, const void *source, size_t size, size_t offset) override { + PrepareImpl(op, const_cast<void*>(source), size, offset, IAsyncIoOperation::EType::PWrite); + } + + void PreparePTrim(IAsyncIoOperation *op, size_t size, size_t offset) override { + PrepareImpl(op, nullptr, size, offset, IAsyncIoOperation::EType::PTrim); + } + + bool DoTrim(IAsyncIoOperation *op) override { + Y_UNUSED(op); + return false; + } + + EIoResult Setup(ui64 maxEvents, bool doLock) override { + File = MakeHolder<TFileHandle>(PDiskInfo.Path.c_str(), + OpenExisting | RdWr | DirectAligned | Sync); + bool isFileOpened = File->IsOpen(); + if (isFileOpened && doLock) { + bool isLocked = File->Flock(LOCK_EX | LOCK_NB) == 0; + isFileOpened = isLocked; + } + if (!isFileOpened) { + return EIoResult::FileOpenError; + } + MaxEvents = maxEvents; + Queue = CreateThreadPool(NumThreads, MaxEvents); + return EIoResult::Ok; + } + + EIoResult Submit(IAsyncIoOperation *op, ICallback *callback) override { + op->SetCallback(callback); + TAsyncIoOperation *operation = static_cast<TAsyncIoOperation*>(op); + bool isOk = Queue->Add(operation); + if (isOk) { + return EIoResult::Ok; + } + return EIoResult::TryAgain; + } + + void SetActorSystem(TActorSystem* /*actorSystem*/) override + {} + + TString GetPDiskInfo() override { + return PDiskInfo.Str(); + } + + int GetLastErrno() override { + return LastErrno; + } + + TFileHandle *GetFileHandle() override { + return File.Get(); + } +}; + +std::unique_ptr<IAsyncIoContext> CreateAsyncIoContextReal(const TString &path, ui32 pDiskId, TDeviceMode::TFlags flags) { + Y_UNUSED(flags); + return std::make_unique<TAsyncIoContextMtp>(path, pDiskId); +} + +} // NPDisk +} // NKikimr diff --git a/ydb/library/pdisk_io/buffer_pool.h b/ydb/library/pdisk_io/buffer_pool.h index 8a9aa7c07f..acbe164b40 100644 --- a/ydb/library/pdisk_io/buffer_pool.h +++ b/ydb/library/pdisk_io/buffer_pool.h @@ -1,72 +1,72 @@ -#pragma once - -#include <library/cpp/actors/util/queue_oneone_inplace.h> -#include <library/cpp/monlib/dynamic_counters/counters.h> -#include <library/cpp/threading/queue/mpsc_read_as_filled.h> -#include <library/cpp/threading/queue/mpsc_vinfarr_obstructive.h> - -#include <util/system/atomic.h> -#include <util/generic/vector.h> -#include <util/generic/list.h> - -namespace NKikimr::NPDisk { - -template<typename TObject, ui32 Size> -class TPool { - TVector<TObject> Objects; - NThreading::TObstructiveConsumerAuxQueue<TObject> InPoolObjects; - NMonitoring::TDynamicCounters::TCounterPtr TotalAllocatedObjects; - TAtomic FreeObjects; - NMonitoring::TDynamicCounters::TCounterPtr FreeObjectsMin; - - public: - TPool() - : Objects(Size) - , FreeObjects(Size) - { - for (auto it = Objects.begin(); it != Objects.end(); ++it) { - InPoolObjects.Push(&(*it)); - } - } - - TObject *Pop() { - TObject *obj = InPoolObjects.Pop(); - if (!obj) { - obj = new TObject(); - if (TotalAllocatedObjects) { - *TotalAllocatedObjects += 1; - } - } - TAtomicBase currentFree = AtomicDecrement(FreeObjects); - if (FreeObjectsMin) { - *FreeObjectsMin = Min(static_cast<TAtomicBase>(*FreeObjectsMin), currentFree); - } - return obj; - } - - bool IsFromPool(TObject *obj) { - return &Objects.front() <= obj && obj <= &Objects.back(); - } - - void Push(TObject *obj) { - AtomicIncrement(FreeObjects); - if (IsFromPool(obj)) { - InPoolObjects.Push(obj); - } else { - delete obj; - } - } - - void InitializeMonitoring(NMonitoring::TDynamicCounters::TCounterPtr totalAllocatedObjects, - NMonitoring::TDynamicCounters::TCounterPtr freeObjectsMin) { - TotalAllocatedObjects = totalAllocatedObjects; - FreeObjectsMin = freeObjectsMin; - *FreeObjectsMin = AtomicGet(FreeObjects); - } - - ~TPool() { - while(InPoolObjects.Pop()); - } -}; - -} // NKikimr::NPDisk +#pragma once + +#include <library/cpp/actors/util/queue_oneone_inplace.h> +#include <library/cpp/monlib/dynamic_counters/counters.h> +#include <library/cpp/threading/queue/mpsc_read_as_filled.h> +#include <library/cpp/threading/queue/mpsc_vinfarr_obstructive.h> + +#include <util/system/atomic.h> +#include <util/generic/vector.h> +#include <util/generic/list.h> + +namespace NKikimr::NPDisk { + +template<typename TObject, ui32 Size> +class TPool { + TVector<TObject> Objects; + NThreading::TObstructiveConsumerAuxQueue<TObject> InPoolObjects; + NMonitoring::TDynamicCounters::TCounterPtr TotalAllocatedObjects; + TAtomic FreeObjects; + NMonitoring::TDynamicCounters::TCounterPtr FreeObjectsMin; + + public: + TPool() + : Objects(Size) + , FreeObjects(Size) + { + for (auto it = Objects.begin(); it != Objects.end(); ++it) { + InPoolObjects.Push(&(*it)); + } + } + + TObject *Pop() { + TObject *obj = InPoolObjects.Pop(); + if (!obj) { + obj = new TObject(); + if (TotalAllocatedObjects) { + *TotalAllocatedObjects += 1; + } + } + TAtomicBase currentFree = AtomicDecrement(FreeObjects); + if (FreeObjectsMin) { + *FreeObjectsMin = Min(static_cast<TAtomicBase>(*FreeObjectsMin), currentFree); + } + return obj; + } + + bool IsFromPool(TObject *obj) { + return &Objects.front() <= obj && obj <= &Objects.back(); + } + + void Push(TObject *obj) { + AtomicIncrement(FreeObjects); + if (IsFromPool(obj)) { + InPoolObjects.Push(obj); + } else { + delete obj; + } + } + + void InitializeMonitoring(NMonitoring::TDynamicCounters::TCounterPtr totalAllocatedObjects, + NMonitoring::TDynamicCounters::TCounterPtr freeObjectsMin) { + TotalAllocatedObjects = totalAllocatedObjects; + FreeObjectsMin = freeObjectsMin; + *FreeObjectsMin = AtomicGet(FreeObjects); + } + + ~TPool() { + while(InPoolObjects.Pop()); + } +}; + +} // NKikimr::NPDisk diff --git a/ydb/library/pdisk_io/buffers.cpp b/ydb/library/pdisk_io/buffers.cpp index fbe0747add..dfc4c0f323 100644 --- a/ydb/library/pdisk_io/buffers.cpp +++ b/ydb/library/pdisk_io/buffers.cpp @@ -1,7 +1,7 @@ -#include "buffers.h" +#include "buffers.h" + +#include <util/system/align.h> -#include <util/system/align.h> - namespace NKikimr { namespace NPDisk { @@ -9,42 +9,42 @@ namespace NPDisk { // TBuffer // -TBuffer::TBuffer(ui32 bufferSize, bool useHugePages) - : FallbackData(bufferSize, useHugePages) - , Buffer(FallbackData.Get()) - , BufferSize(bufferSize) - , Pool(nullptr) - , PopCount(0) -{ -} - +TBuffer::TBuffer(ui32 bufferSize, bool useHugePages) + : FallbackData(bufferSize, useHugePages) + , Buffer(FallbackData.Get()) + , BufferSize(bufferSize) + , Pool(nullptr) + , PopCount(0) +{ +} + TBuffer::TBuffer(ui8* buffer, ui32 bufferSize, TBufferPool *pool) : Buffer(buffer) - , BufferSize(bufferSize) + , BufferSize(bufferSize) , Pool(pool) , PopCount(0) {} TBuffer::~TBuffer() { - Y_VERIFY(!Pool); + Y_VERIFY(!Pool); } -void TBuffer::Exec(TActorSystem*) { - ReturnToPool(); +void TBuffer::Exec(TActorSystem*) { + ReturnToPool(); } -void TBuffer::Release(TActorSystem*) { - ReturnToPool(); +void TBuffer::Release(TActorSystem*) { + ReturnToPool(); +} + +bool TBuffer::ReturnToPool() { + if (Pool) { + Pool->Push(this); + return true; + } + return false; } -bool TBuffer::ReturnToPool() { - if (Pool) { - Pool->Push(this); - return true; - } - return false; -} - ui8* TBuffer::Data() const { return Buffer; } @@ -63,61 +63,61 @@ void TBuffer::RemoveFromPool() { // TBufferPool // -static constexpr size_t Alignment = 4096; // Block-device block size -static constexpr size_t PopRetries = 5; +static constexpr size_t Alignment = 4096; // Block-device block size +static constexpr size_t PopRetries = 5; -TBufferPool::TBufferPool(ui32 bufferSize, ui32 buffersCount, TPDiskParams params) +TBufferPool::TBufferPool(ui32 bufferSize, ui32 buffersCount, TPDiskParams params) : BufferSize(bufferSize) - , BuffersCount(buffersCount) + , BuffersCount(buffersCount) , ReadRotation(0) , WriteRotation(0) - , Params(params) + , Params(params) { -} - -void TBufferPool::MarkUpPool(ui8 *alignedData) { - AlignedData = alignedData; - for (ui32 i = 0; i < BuffersCount; ++i) { - TBuffer *buffer = new TBuffer(AlignedData + i * AlignUp((size_t)BufferSize, Alignment), BufferSize, this); +} + +void TBufferPool::MarkUpPool(ui8 *alignedData) { + AlignedData = alignedData; + for (ui32 i = 0; i < BuffersCount; ++i) { + TBuffer *buffer = new TBuffer(AlignedData + i * AlignUp((size_t)BufferSize, Alignment), BufferSize, this); Buffers.Push(buffer, AtomicIncrement(WriteRotation)); } } -ui32 TBufferPool::GetBufferSize() { - return BufferSize; -} - -TBuffer *TBufferPool::Pop() { - size_t retry = 0; - while (retry < PopRetries) { - TBuffer* buffer = Buffers.Pop(AtomicIncrement(ReadRotation)); - if (buffer) { - Y_VERIFY_S(buffer->PopCount == 0, "BufferPopCount# " << buffer->PopCount); - buffer->PopCount++; - NSan::Poison(buffer->Data(), buffer->Size()); - REQUEST_VALGRIND_MAKE_MEM_UNDEFINED(buffer->Data(), buffer->Size()); - return buffer; - } - if (Params.FailedAllocations) { - *Params.FailedAllocations += 1; - } - Sleep(TDuration::MilliSeconds((retry + 1) * 10)); - ++retry; +ui32 TBufferPool::GetBufferSize() { + return BufferSize; +} + +TBuffer *TBufferPool::Pop() { + size_t retry = 0; + while (retry < PopRetries) { + TBuffer* buffer = Buffers.Pop(AtomicIncrement(ReadRotation)); + if (buffer) { + Y_VERIFY_S(buffer->PopCount == 0, "BufferPopCount# " << buffer->PopCount); + buffer->PopCount++; + NSan::Poison(buffer->Data(), buffer->Size()); + REQUEST_VALGRIND_MAKE_MEM_UNDEFINED(buffer->Data(), buffer->Size()); + return buffer; + } + if (Params.FailedAllocations) { + *Params.FailedAllocations += 1; + } + Sleep(TDuration::MilliSeconds((retry + 1) * 10)); + ++retry; } - if (Params.ActorSystem) { - //LOG_NOTICE_S(*Params.ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << Params.PDiskId - // << "Failed to pop buffer from pool, retry# " << retry); - } - - TBuffer *buffer = new TBuffer(BufferSize, UseHugePages); - Y_VERIFY_S(buffer, "PDiskId# " << Params.PDiskId << "Cannot pop new buffer from PDisk's buffer pool"); - return buffer; + if (Params.ActorSystem) { + //LOG_NOTICE_S(*Params.ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << Params.PDiskId + // << "Failed to pop buffer from pool, retry# " << retry); + } + + TBuffer *buffer = new TBuffer(BufferSize, UseHugePages); + Y_VERIFY_S(buffer, "PDiskId# " << Params.PDiskId << "Cannot pop new buffer from PDisk's buffer pool"); + return buffer; } void TBufferPool::Push(TBuffer *buffer) { NSan::Poison(buffer->Data(), buffer->Size()); REQUEST_VALGRIND_MAKE_MEM_UNDEFINED(buffer->Data(), buffer->Size()); - Y_VERIFY_S(buffer->PopCount == 1, "BufferPopCount# " << buffer->PopCount); + Y_VERIFY_S(buffer->PopCount == 1, "BufferPopCount# " << buffer->PopCount); buffer->PopCount--; Buffers.Push(buffer, AtomicIncrement(WriteRotation)); } @@ -131,33 +131,33 @@ TBufferPool::~TBufferPool() { } } -// -// TBufferPoolCommon -// -TBufferPoolCommon::TBufferPoolCommon(ui32 bufferSize, ui32 bufferCount, TBufferPool::TPDiskParams params) - : TBufferPool(bufferSize, bufferCount, params) -{ - TBufferPool::UseHugePages = false; - RawBuffer.Reset(new ui8[AlignUp((size_t)bufferSize, Alignment) * bufferCount + Alignment - 1]); - ui8 *alignedData = (ui8*)AlignUp(RawBuffer.Get(), Alignment); - Y_VERIFY((ui64)alignedData % Alignment == 0); - MarkUpPool(alignedData); -} - -TBufferPoolCommon::~TBufferPoolCommon() { -} - -TBufferPool *CreateBufferPool(ui64 size, ui32 bufferCount, bool UseHugePages, TBufferPool::TPDiskParams params) { -#ifdef _linux_ - if (UseHugePages) { - return new TBufferPoolHugePages(size, bufferCount, params); - } else -#endif - { - Y_UNUSED(UseHugePages); - return new TBufferPoolCommon(size, bufferCount, params); - } -} - +// +// TBufferPoolCommon +// +TBufferPoolCommon::TBufferPoolCommon(ui32 bufferSize, ui32 bufferCount, TBufferPool::TPDiskParams params) + : TBufferPool(bufferSize, bufferCount, params) +{ + TBufferPool::UseHugePages = false; + RawBuffer.Reset(new ui8[AlignUp((size_t)bufferSize, Alignment) * bufferCount + Alignment - 1]); + ui8 *alignedData = (ui8*)AlignUp(RawBuffer.Get(), Alignment); + Y_VERIFY((ui64)alignedData % Alignment == 0); + MarkUpPool(alignedData); +} + +TBufferPoolCommon::~TBufferPoolCommon() { +} + +TBufferPool *CreateBufferPool(ui64 size, ui32 bufferCount, bool UseHugePages, TBufferPool::TPDiskParams params) { +#ifdef _linux_ + if (UseHugePages) { + return new TBufferPoolHugePages(size, bufferCount, params); + } else +#endif + { + Y_UNUSED(UseHugePages); + return new TBufferPoolCommon(size, bufferCount, params); + } +} + } // NPDisk } // NKikimr diff --git a/ydb/library/pdisk_io/buffers.h b/ydb/library/pdisk_io/buffers.h index aca6197821..a7b7867151 100644 --- a/ydb/library/pdisk_io/buffers.h +++ b/ydb/library/pdisk_io/buffers.h @@ -1,177 +1,177 @@ -#pragma once - +#pragma once + #include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion.h> #include <ydb/core/debug/valgrind_check.h> - + #include <ydb/library/pdisk_io/spdk_state.h> -#include <library/cpp/actors/util/unordered_cache.h> - -#include <util/system/sanitizers.h> - -namespace NKikimr { -namespace NPDisk { - -// -// TAlignedData -// - -struct TAlignedData { - TArrayHolder<ui8> Holder; - ui8 *AlignedBuffer = nullptr; - ui32 BufferSize = 0; - bool UseHugePages = false; -public: - TAlignedData() = default; - TAlignedData(TAlignedData &&data) = default; - - TAlignedData(ui32 size) - : TAlignedData(size, false) - {} - - TAlignedData(ui32 size, bool useHugePages) - : AlignedBuffer(nullptr) - , BufferSize(size) - , UseHugePages(useHugePages) - { - constexpr intptr_t alignment = 4096; //block device block size / or 512 for newer linux versions - if (UseHugePages) { - auto spdkState = Singleton<TSpdkStateOSS>(); - if (AlignedBuffer) { - spdkState->Free(AlignedBuffer); - } - AlignedBuffer = spdkState->Malloc(size, alignment); - } else { - Holder.Reset(new ui8[size + alignment - 1]); - AlignedBuffer = (ui8*)((intptr_t)(Holder.Get() + alignment - 1) / alignment * alignment); - } - NSan::Poison(AlignedBuffer, size); - REQUEST_VALGRIND_MAKE_MEM_UNDEFINED(AlignedBuffer, size); - } - - ui8* Get() { - return AlignedBuffer; - } - - const ui8* Get() const { - return AlignedBuffer; - } - - ui32 Size() const { - return BufferSize; - } - - ~TAlignedData() { - if (UseHugePages && AlignedBuffer) { - auto spdkState = Singleton<TSpdkStateOSS>(); - spdkState->Free(AlignedBuffer); - } - } -}; - -// -// TBuffer -// - -class TBufferPool; -class TReturnToPool; - -struct TBuffer : TCompletionAction { - using TPtr = THolder<TBuffer, TReturnToPool>; - - // Used only if buffer pool exhausted - TAlignedData FallbackData; - - ui8 *Buffer; - ui32 BufferSize; - TBufferPool *Pool; - i32 PopCount; - - TBuffer(ui8 *buffer, ui32 bufferSize, TBufferPool *pool); - TBuffer(ui32 bufferSize, bool useHugePages); - virtual ~TBuffer(); - void Exec(TActorSystem *actorSystem) override; - void Release(TActorSystem *actorSystem) override; - bool ReturnToPool(); - ui8* Data() const; - ui32 Size() const; - void RemoveFromPool(); -}; - -class TReturnToPool { -public: - static inline void Destroy(TBuffer *buffer) { - if (!buffer->ReturnToPool()) { - // buffer was allocated in heap - delete buffer; - } - } -}; - - -// -// TBufferPool -// -class TBufferPool { -public: - struct TPDiskParams { - NMonitoring::TDynamicCounters::TCounterPtr FailedAllocations = nullptr; - TActorSystem *ActorSystem = nullptr; - ui32 PDiskId = 0; - - TPDiskParams() = default; - - TPDiskParams(NMonitoring::TDynamicCounters::TCounterPtr failedAllocations, TActorSystem *actorSystem, - ui32 pDiskId) - : FailedAllocations(failedAllocations) - , ActorSystem(actorSystem) - , PDiskId(pDiskId) - {} - }; - -private: - ui32 BufferSize; - ui32 BuffersCount; - ui8 *AlignedData; - TUnorderedCache<TBuffer*, 512, 3> Buffers; - TAtomic ReadRotation; - TAtomic WriteRotation; - TPDiskParams Params; - -protected: - // Set by inherited classes - bool UseHugePages; - -public: - TBufferPool(ui32 bufferSize, ui32 bufferCount, TPDiskParams params); - void MarkUpPool(ui8 *alignedData); - TBuffer *Pop(); - ui32 GetBufferSize(); - void Push(TBuffer *buffer); - virtual ~TBufferPool() = 0; -}; - -// -// TBufferPoolCommon -// -class TBufferPoolCommon : public TBufferPool { - TArrayHolder<ui8> RawBuffer; -public: - TBufferPoolCommon(ui32 bufferSize, ui32 bufferCount, TBufferPool::TPDiskParams params); - virtual ~TBufferPoolCommon(); -}; - -// -// TBufferPoolHugePages -// -class TBufferPoolHugePages : public TBufferPool { - ui8 *AlignedBuffer; -public: - TBufferPoolHugePages(ui32 bufferSize, ui32 bufferCount, TPDiskParams params); - virtual ~TBufferPoolHugePages(); -}; - -TBufferPool *CreateBufferPool(ui64 size, ui32 bufferCount, bool UseHugePages, TBufferPool::TPDiskParams params); - -} // NPDisk -} // NKikimr - +#include <library/cpp/actors/util/unordered_cache.h> + +#include <util/system/sanitizers.h> + +namespace NKikimr { +namespace NPDisk { + +// +// TAlignedData +// + +struct TAlignedData { + TArrayHolder<ui8> Holder; + ui8 *AlignedBuffer = nullptr; + ui32 BufferSize = 0; + bool UseHugePages = false; +public: + TAlignedData() = default; + TAlignedData(TAlignedData &&data) = default; + + TAlignedData(ui32 size) + : TAlignedData(size, false) + {} + + TAlignedData(ui32 size, bool useHugePages) + : AlignedBuffer(nullptr) + , BufferSize(size) + , UseHugePages(useHugePages) + { + constexpr intptr_t alignment = 4096; //block device block size / or 512 for newer linux versions + if (UseHugePages) { + auto spdkState = Singleton<TSpdkStateOSS>(); + if (AlignedBuffer) { + spdkState->Free(AlignedBuffer); + } + AlignedBuffer = spdkState->Malloc(size, alignment); + } else { + Holder.Reset(new ui8[size + alignment - 1]); + AlignedBuffer = (ui8*)((intptr_t)(Holder.Get() + alignment - 1) / alignment * alignment); + } + NSan::Poison(AlignedBuffer, size); + REQUEST_VALGRIND_MAKE_MEM_UNDEFINED(AlignedBuffer, size); + } + + ui8* Get() { + return AlignedBuffer; + } + + const ui8* Get() const { + return AlignedBuffer; + } + + ui32 Size() const { + return BufferSize; + } + + ~TAlignedData() { + if (UseHugePages && AlignedBuffer) { + auto spdkState = Singleton<TSpdkStateOSS>(); + spdkState->Free(AlignedBuffer); + } + } +}; + +// +// TBuffer +// + +class TBufferPool; +class TReturnToPool; + +struct TBuffer : TCompletionAction { + using TPtr = THolder<TBuffer, TReturnToPool>; + + // Used only if buffer pool exhausted + TAlignedData FallbackData; + + ui8 *Buffer; + ui32 BufferSize; + TBufferPool *Pool; + i32 PopCount; + + TBuffer(ui8 *buffer, ui32 bufferSize, TBufferPool *pool); + TBuffer(ui32 bufferSize, bool useHugePages); + virtual ~TBuffer(); + void Exec(TActorSystem *actorSystem) override; + void Release(TActorSystem *actorSystem) override; + bool ReturnToPool(); + ui8* Data() const; + ui32 Size() const; + void RemoveFromPool(); +}; + +class TReturnToPool { +public: + static inline void Destroy(TBuffer *buffer) { + if (!buffer->ReturnToPool()) { + // buffer was allocated in heap + delete buffer; + } + } +}; + + +// +// TBufferPool +// +class TBufferPool { +public: + struct TPDiskParams { + NMonitoring::TDynamicCounters::TCounterPtr FailedAllocations = nullptr; + TActorSystem *ActorSystem = nullptr; + ui32 PDiskId = 0; + + TPDiskParams() = default; + + TPDiskParams(NMonitoring::TDynamicCounters::TCounterPtr failedAllocations, TActorSystem *actorSystem, + ui32 pDiskId) + : FailedAllocations(failedAllocations) + , ActorSystem(actorSystem) + , PDiskId(pDiskId) + {} + }; + +private: + ui32 BufferSize; + ui32 BuffersCount; + ui8 *AlignedData; + TUnorderedCache<TBuffer*, 512, 3> Buffers; + TAtomic ReadRotation; + TAtomic WriteRotation; + TPDiskParams Params; + +protected: + // Set by inherited classes + bool UseHugePages; + +public: + TBufferPool(ui32 bufferSize, ui32 bufferCount, TPDiskParams params); + void MarkUpPool(ui8 *alignedData); + TBuffer *Pop(); + ui32 GetBufferSize(); + void Push(TBuffer *buffer); + virtual ~TBufferPool() = 0; +}; + +// +// TBufferPoolCommon +// +class TBufferPoolCommon : public TBufferPool { + TArrayHolder<ui8> RawBuffer; +public: + TBufferPoolCommon(ui32 bufferSize, ui32 bufferCount, TBufferPool::TPDiskParams params); + virtual ~TBufferPoolCommon(); +}; + +// +// TBufferPoolHugePages +// +class TBufferPoolHugePages : public TBufferPool { + ui8 *AlignedBuffer; +public: + TBufferPoolHugePages(ui32 bufferSize, ui32 bufferCount, TPDiskParams params); + virtual ~TBufferPoolHugePages(); +}; + +TBufferPool *CreateBufferPool(ui64 size, ui32 bufferCount, bool UseHugePages, TBufferPool::TPDiskParams params); + +} // NPDisk +} // NKikimr + diff --git a/ydb/library/pdisk_io/file_params.h b/ydb/library/pdisk_io/file_params.h index 1e50291c82..62203620a4 100644 --- a/ydb/library/pdisk_io/file_params.h +++ b/ydb/library/pdisk_io/file_params.h @@ -1,18 +1,18 @@ -#pragma once - +#pragma once + #include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_drivedata.h> - -#include <util/folder/path.h> -#include <optional> - -namespace NKikimr { - -void DetectFileParameters(TString path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice); - -std::optional<NPDisk::TDriveData> FindDeviceBySerialNumber(const TString& serial, bool partlabelOnly); - -TVector<NPDisk::TDriveData> ListDevicesWithPartlabel(); - -TVector<NPDisk::TDriveData> ListAllDevices(); - -} // NKikimr + +#include <util/folder/path.h> +#include <optional> + +namespace NKikimr { + +void DetectFileParameters(TString path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice); + +std::optional<NPDisk::TDriveData> FindDeviceBySerialNumber(const TString& serial, bool partlabelOnly); + +TVector<NPDisk::TDriveData> ListDevicesWithPartlabel(); + +TVector<NPDisk::TDriveData> ListAllDevices(); + +} // NKikimr diff --git a/ydb/library/pdisk_io/file_params_darwin.cpp b/ydb/library/pdisk_io/file_params_darwin.cpp index 22a35f3432..d300a15345 100644 --- a/ydb/library/pdisk_io/file_params_darwin.cpp +++ b/ydb/library/pdisk_io/file_params_darwin.cpp @@ -1,63 +1,63 @@ -#include "file_params.h" - -#include <fcntl.h> -#include <sys/disk.h> -#include <sys/ioctl.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <unistd.h> - -namespace NKikimr { - -void DetectFileParameters(TString path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice) { - int file = open(path.c_str(), O_RDWR); - if (file < 0) { - TStringStream errStr; - errStr << "Can't open file \"" << path << "\": "; - if (errno == EACCES) { - errStr << "you have no rights"; - } else if (errno == ENOENT) { - errStr << "no such file"; - } else { - errStr << "unknown reason, errno# " << errno << ", strerror(errno)# " << strerror(errno); - } - ythrow yexception() << errStr.Str(); - } else { - struct stat stats; - if (fstat(file, &stats) == 0) { - if (S_ISREG(stats.st_mode)) { - outIsBlockDevice = false; - outDiskSizeBytes = stats.st_size; - } else if (S_ISBLK(stats.st_mode)) { - outIsBlockDevice = true; - ui64 sectorCount = 0; - if (ioctl(file, DKIOCGETBLOCKCOUNT, §orCount) < 0) { - ythrow yexception() << "Can't get device size, errno# " << errno << ", strerror(errno)# " - << strerror(errno) << Endl; - } - ui32 sectorSize = 0; - if (ioctl(file, DKIOCGETBLOCKSIZE, §orSize) < 0) { - ythrow yexception() << "Can't get device size, errno# " << errno << ", strerror(errno)# " - << strerror(errno) << Endl; - } - outDiskSizeBytes = sectorCount * sectorSize; - } else { - ythrow yexception() << "Unknown file type - neither file nor block device" << Endl; - } - } else { - ythrow yexception() << "Can't get info about file/device, errno# " << errno << ", strerror(errno)# " - << strerror(errno) << Endl; - } - close(file); - } -} - -std::optional<NPDisk::TDriveData> FindDeviceBySerialNumber(const TString& /*serial*/, bool /*partlabelOnly*/) { - return {}; -} - -TVector<NPDisk::TDriveData> ListDevicesWithPartlabel() { - return {}; -} - -} +#include "file_params.h" + +#include <fcntl.h> +#include <sys/disk.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +namespace NKikimr { + +void DetectFileParameters(TString path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice) { + int file = open(path.c_str(), O_RDWR); + if (file < 0) { + TStringStream errStr; + errStr << "Can't open file \"" << path << "\": "; + if (errno == EACCES) { + errStr << "you have no rights"; + } else if (errno == ENOENT) { + errStr << "no such file"; + } else { + errStr << "unknown reason, errno# " << errno << ", strerror(errno)# " << strerror(errno); + } + ythrow yexception() << errStr.Str(); + } else { + struct stat stats; + if (fstat(file, &stats) == 0) { + if (S_ISREG(stats.st_mode)) { + outIsBlockDevice = false; + outDiskSizeBytes = stats.st_size; + } else if (S_ISBLK(stats.st_mode)) { + outIsBlockDevice = true; + ui64 sectorCount = 0; + if (ioctl(file, DKIOCGETBLOCKCOUNT, §orCount) < 0) { + ythrow yexception() << "Can't get device size, errno# " << errno << ", strerror(errno)# " + << strerror(errno) << Endl; + } + ui32 sectorSize = 0; + if (ioctl(file, DKIOCGETBLOCKSIZE, §orSize) < 0) { + ythrow yexception() << "Can't get device size, errno# " << errno << ", strerror(errno)# " + << strerror(errno) << Endl; + } + outDiskSizeBytes = sectorCount * sectorSize; + } else { + ythrow yexception() << "Unknown file type - neither file nor block device" << Endl; + } + } else { + ythrow yexception() << "Can't get info about file/device, errno# " << errno << ", strerror(errno)# " + << strerror(errno) << Endl; + } + close(file); + } +} + +std::optional<NPDisk::TDriveData> FindDeviceBySerialNumber(const TString& /*serial*/, bool /*partlabelOnly*/) { + return {}; +} + +TVector<NPDisk::TDriveData> ListDevicesWithPartlabel() { + return {}; +} + +} diff --git a/ydb/library/pdisk_io/file_params_linux.cpp b/ydb/library/pdisk_io/file_params_linux.cpp index 754de47c2d..4b44328fe3 100644 --- a/ydb/library/pdisk_io/file_params_linux.cpp +++ b/ydb/library/pdisk_io/file_params_linux.cpp @@ -1,134 +1,134 @@ -#include "file_params.h" - +#include "file_params.h" + #include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_util_wcache.h> #include <ydb/library/pdisk_io/spdk_state.h> #include <ydb/library/pdisk_io/aio.h> - -#include <linux/fs.h> -#include <regex> -#include <sys/ioctl.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <unistd.h> - -namespace NKikimr { - -void DetectFileParameters(TString path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice) { - int file = open(path.c_str(), O_RDWR); - if (file < 0) { - TStringStream errStr; - errStr << "Can't open file \"" << path << "\": "; - if (errno == EACCES) { - errStr << "you have no rights"; - } else if (errno == ENOENT) { - errStr << "no such file"; - } else { - errStr << "unknown reason, errno# " << errno << ", strerror(errno)# " << strerror(errno); - } - ythrow yexception() << errStr.Str(); - } else { - struct stat stats; - if (fstat(file, &stats) == 0) { - if (S_ISREG(stats.st_mode)) { - outIsBlockDevice = false; - outDiskSizeBytes = stats.st_size; - } else if (S_ISBLK(stats.st_mode)) { - outIsBlockDevice = true; - if (ioctl(file, BLKGETSIZE64, &outDiskSizeBytes) < 0) { - ythrow yexception() << "Can't get device size, errno# " << errno << ", strerror(errno)# " - << strerror(errno) << Endl; - } - } else { - ythrow yexception() << "Unknown file type - neither file nor block device" << Endl; - } - } else { - ythrow yexception() << "Can't get info about file/device, errno# " << errno << ", strerror(errno)# " - << strerror(errno) << Endl; - } - close(file); - } -} - -static TVector<NPDisk::TDriveData> FilterOnlyUniqueSerial(TVector<NPDisk::TDriveData> devices) { - TVector<NPDisk::TDriveData> result; - std::sort(devices.begin(), devices.end(), - [] (const NPDisk::TDriveData& lhs, const NPDisk::TDriveData& rhs) { - return lhs.SerialNumber < rhs.SerialNumber; - } - ); - - for (size_t i = 0; i < devices.size(); ) { - bool duplicate = false; - while (i + 1 < devices.size() && devices[i].SerialNumber == devices[i + 1].SerialNumber) { - ++i; - duplicate = true; - } - if (!duplicate) { - result.push_back(devices[i]); - } - ++i; - } - return result; -} - -static TVector<NPDisk::TDriveData> ListDevices(const char *folder, const TString& serial, std::regex device_regex) { - TFsPath path(folder); - TVector<TFsPath> children; - try { - path.List(children); - } catch (std::exception&) { - return {}; - } - TVector<NPDisk::TDriveData> devicesFound; - for (const auto& child : children) { - if (std::regex_match(child.GetName().c_str(), device_regex)) { - TStringStream details; - std::optional<NPDisk::TDriveData> data = NPDisk::GetDriveData(child.GetPath(), &details); - if (data && (!serial || data->SerialNumber == serial)) { - devicesFound.push_back(*data); - } - } - } - - return FilterOnlyUniqueSerial(devicesFound); -} - -static std::optional<NPDisk::TDriveData> FindDeviceBySerialNumber(const char *folder, const TString& serial, - std::regex device_regex) { - TVector<NPDisk::TDriveData> devicesFound = ListDevices(folder, serial, device_regex); - - // There must be only one device with the serial - // If the folder is /dev, then device exptected not to have GPT partitions - // If the folder is /dev/disk/by-partlabel, then there only one symlink to partition to be found - if (devicesFound.size() == 1) { - return {devicesFound.front()}; - } else { - return {}; - } -} - -static const std::regex kikimrDevice{".*(kikimr|KIKIMR).*"}; - -TVector<NPDisk::TDriveData> ListAllDevices() { - return ListDevices("/dev", {}, std::regex(".*")); -} - -TVector<NPDisk::TDriveData> ListDevicesWithPartlabel() { - return ListDevices("/dev/disk/by-partlabel", "", kikimrDevice); -} - -std::optional<NPDisk::TDriveData> FindDeviceBySerialNumber(const TString& serial, bool partlabelOnly) { - std::optional<NPDisk::TDriveData> data; - - if (data = FindDeviceBySerialNumber("/dev/disk/by-partlabel", serial, kikimrDevice)) { - return data; - } else if (partlabelOnly) { - return {}; - } else if (data = FindDeviceBySerialNumber("/dev", serial, std::regex("sd\\w\\d*"))) { - return data; - } else { - return {}; - } -} - -} + +#include <linux/fs.h> +#include <regex> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +namespace NKikimr { + +void DetectFileParameters(TString path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice) { + int file = open(path.c_str(), O_RDWR); + if (file < 0) { + TStringStream errStr; + errStr << "Can't open file \"" << path << "\": "; + if (errno == EACCES) { + errStr << "you have no rights"; + } else if (errno == ENOENT) { + errStr << "no such file"; + } else { + errStr << "unknown reason, errno# " << errno << ", strerror(errno)# " << strerror(errno); + } + ythrow yexception() << errStr.Str(); + } else { + struct stat stats; + if (fstat(file, &stats) == 0) { + if (S_ISREG(stats.st_mode)) { + outIsBlockDevice = false; + outDiskSizeBytes = stats.st_size; + } else if (S_ISBLK(stats.st_mode)) { + outIsBlockDevice = true; + if (ioctl(file, BLKGETSIZE64, &outDiskSizeBytes) < 0) { + ythrow yexception() << "Can't get device size, errno# " << errno << ", strerror(errno)# " + << strerror(errno) << Endl; + } + } else { + ythrow yexception() << "Unknown file type - neither file nor block device" << Endl; + } + } else { + ythrow yexception() << "Can't get info about file/device, errno# " << errno << ", strerror(errno)# " + << strerror(errno) << Endl; + } + close(file); + } +} + +static TVector<NPDisk::TDriveData> FilterOnlyUniqueSerial(TVector<NPDisk::TDriveData> devices) { + TVector<NPDisk::TDriveData> result; + std::sort(devices.begin(), devices.end(), + [] (const NPDisk::TDriveData& lhs, const NPDisk::TDriveData& rhs) { + return lhs.SerialNumber < rhs.SerialNumber; + } + ); + + for (size_t i = 0; i < devices.size(); ) { + bool duplicate = false; + while (i + 1 < devices.size() && devices[i].SerialNumber == devices[i + 1].SerialNumber) { + ++i; + duplicate = true; + } + if (!duplicate) { + result.push_back(devices[i]); + } + ++i; + } + return result; +} + +static TVector<NPDisk::TDriveData> ListDevices(const char *folder, const TString& serial, std::regex device_regex) { + TFsPath path(folder); + TVector<TFsPath> children; + try { + path.List(children); + } catch (std::exception&) { + return {}; + } + TVector<NPDisk::TDriveData> devicesFound; + for (const auto& child : children) { + if (std::regex_match(child.GetName().c_str(), device_regex)) { + TStringStream details; + std::optional<NPDisk::TDriveData> data = NPDisk::GetDriveData(child.GetPath(), &details); + if (data && (!serial || data->SerialNumber == serial)) { + devicesFound.push_back(*data); + } + } + } + + return FilterOnlyUniqueSerial(devicesFound); +} + +static std::optional<NPDisk::TDriveData> FindDeviceBySerialNumber(const char *folder, const TString& serial, + std::regex device_regex) { + TVector<NPDisk::TDriveData> devicesFound = ListDevices(folder, serial, device_regex); + + // There must be only one device with the serial + // If the folder is /dev, then device exptected not to have GPT partitions + // If the folder is /dev/disk/by-partlabel, then there only one symlink to partition to be found + if (devicesFound.size() == 1) { + return {devicesFound.front()}; + } else { + return {}; + } +} + +static const std::regex kikimrDevice{".*(kikimr|KIKIMR).*"}; + +TVector<NPDisk::TDriveData> ListAllDevices() { + return ListDevices("/dev", {}, std::regex(".*")); +} + +TVector<NPDisk::TDriveData> ListDevicesWithPartlabel() { + return ListDevices("/dev/disk/by-partlabel", "", kikimrDevice); +} + +std::optional<NPDisk::TDriveData> FindDeviceBySerialNumber(const TString& serial, bool partlabelOnly) { + std::optional<NPDisk::TDriveData> data; + + if (data = FindDeviceBySerialNumber("/dev/disk/by-partlabel", serial, kikimrDevice)) { + return data; + } else if (partlabelOnly) { + return {}; + } else if (data = FindDeviceBySerialNumber("/dev", serial, std::regex("sd\\w\\d*"))) { + return data; + } else { + return {}; + } +} + +} diff --git a/ydb/library/pdisk_io/file_params_win.cpp b/ydb/library/pdisk_io/file_params_win.cpp index b467fcf051..9da2d2bcb8 100644 --- a/ydb/library/pdisk_io/file_params_win.cpp +++ b/ydb/library/pdisk_io/file_params_win.cpp @@ -1,45 +1,45 @@ -#include "file_params.h" - -#include <windows.h> - -namespace NKikimr { - -TString GetLastErrorStr() { - TStringStream errStr; - DWORD errorId = GetLastError(); - LPSTR messageBuffer = nullptr; - FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, errorId, 0, (LPSTR)&messageBuffer, 0, NULL); - errStr << messageBuffer; - LocalFree(messageBuffer); - return errStr.Str(); -} - -void DetectFileParameters(TString path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice) { - HANDLE hFile = CreateFile(path.c_str(), GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - //int file = open(path.c_str(), O_RDWR); - if (hFile == INVALID_HANDLE_VALUE) { - TStringStream errStr; - errStr << "Can't open file, path# \"" << path << "\": errorStr# " << GetLastErrorStr(); - ythrow yexception() << errStr.Str(); - } else { - LARGE_INTEGER lFileSize; - if (GetFileSizeEx(hFile, &lFileSize) != 0) { - outDiskSizeBytes = (ui64)lFileSize.QuadPart; - } else { - TStringStream errStr; - errStr << "Can't get file size, path# \"" << path << "\": errorStr# " << GetLastErrorStr(); - ythrow yexception() << errStr.Str(); - } - } -} - -std::optional<NPDisk::TDriveData> FindDeviceBySerialNumber(const TString& /*serial*/, bool /*partlabelOnly*/) { - return {}; -} - -TVector<NPDisk::TDriveData> ListDevicesWithPartlabel() { - return {}; -} - -} +#include "file_params.h" + +#include <windows.h> + +namespace NKikimr { + +TString GetLastErrorStr() { + TStringStream errStr; + DWORD errorId = GetLastError(); + LPSTR messageBuffer = nullptr; + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, errorId, 0, (LPSTR)&messageBuffer, 0, NULL); + errStr << messageBuffer; + LocalFree(messageBuffer); + return errStr.Str(); +} + +void DetectFileParameters(TString path, ui64 &outDiskSizeBytes, bool &outIsBlockDevice) { + HANDLE hFile = CreateFile(path.c_str(), GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + //int file = open(path.c_str(), O_RDWR); + if (hFile == INVALID_HANDLE_VALUE) { + TStringStream errStr; + errStr << "Can't open file, path# \"" << path << "\": errorStr# " << GetLastErrorStr(); + ythrow yexception() << errStr.Str(); + } else { + LARGE_INTEGER lFileSize; + if (GetFileSizeEx(hFile, &lFileSize) != 0) { + outDiskSizeBytes = (ui64)lFileSize.QuadPart; + } else { + TStringStream errStr; + errStr << "Can't get file size, path# \"" << path << "\": errorStr# " << GetLastErrorStr(); + ythrow yexception() << errStr.Str(); + } + } +} + +std::optional<NPDisk::TDriveData> FindDeviceBySerialNumber(const TString& /*serial*/, bool /*partlabelOnly*/) { + return {}; +} + +TVector<NPDisk::TDriveData> ListDevicesWithPartlabel() { + return {}; +} + +} diff --git a/ydb/library/pdisk_io/protos/sector_map.proto b/ydb/library/pdisk_io/protos/sector_map.proto index bdd4363374..a26ede61b0 100644 --- a/ydb/library/pdisk_io/protos/sector_map.proto +++ b/ydb/library/pdisk_io/protos/sector_map.proto @@ -1,18 +1,18 @@ -syntax = "proto3"; - -package NKikimrPDisk; - -enum ECompression { - NO_COMPRESSION = 0; - LZ4 = 1; -} - -message TSectorData { - uint64 Offset = 1; - ECompression CompressionType = 2; - bytes Data = 3; -} - -message TSectorMapSnapshot { - repeated TSectorData Sectors = 1; -} +syntax = "proto3"; + +package NKikimrPDisk; + +enum ECompression { + NO_COMPRESSION = 0; + LZ4 = 1; +} + +message TSectorData { + uint64 Offset = 1; + ECompression CompressionType = 2; + bytes Data = 3; +} + +message TSectorMapSnapshot { + repeated TSectorData Sectors = 1; +} diff --git a/ydb/library/pdisk_io/protos/ya.make b/ydb/library/pdisk_io/protos/ya.make index d64a1b4416..4d63170cf5 100644 --- a/ydb/library/pdisk_io/protos/ya.make +++ b/ydb/library/pdisk_io/protos/ya.make @@ -1,18 +1,18 @@ -PROTO_LIBRARY() - +PROTO_LIBRARY() + OWNER( va-kuznecov g:kikimr ) - -IF (OS_WINDOWS) - NO_OPTIMIZE_PY_PROTOS() -ENDIF() - -SRCS( - sector_map.proto -) - -EXCLUDE_TAGS(GO_PROTO) - -END() + +IF (OS_WINDOWS) + NO_OPTIMIZE_PY_PROTOS() +ENDIF() + +SRCS( + sector_map.proto +) + +EXCLUDE_TAGS(GO_PROTO) + +END() diff --git a/ydb/library/pdisk_io/sector_map.cpp b/ydb/library/pdisk_io/sector_map.cpp index 40ab1d5561..c1a7a28964 100644 --- a/ydb/library/pdisk_io/sector_map.cpp +++ b/ydb/library/pdisk_io/sector_map.cpp @@ -1,34 +1,34 @@ -#include "sector_map.h" - +#include "sector_map.h" + #include <ydb/library/pdisk_io/protos/sector_map.pb.h> - -namespace NKikimr::NPDisk { - -void TSectorMap::LoadFromFile(const TString& path) { - TString raw = TFileInput(path).ReadAll(); - NKikimrPDisk::TSectorMapSnapshot snap; - bool success = snap.ParseFromString(raw); - Y_VERIFY_S(success, path); - - Map.reserve(snap.SectorsSize()); - for (auto& s : snap.GetSectors()) { - Y_VERIFY_S(s.GetCompressionType() == NKikimrPDisk::ECompression::LZ4, path); - Map[s.GetOffset()] = s.GetData(); - } -} - -void TSectorMap::StoreToFile(const TString& path) { - NKikimrPDisk::TSectorMapSnapshot snap; - for (auto& [offset, data] : Map) { - NKikimrPDisk::TSectorData *sd = snap.AddSectors(); - sd->SetOffset(offset); - sd->SetData(data); - sd->SetCompressionType(NKikimrPDisk::ECompression::LZ4); - } - - TString raw; + +namespace NKikimr::NPDisk { + +void TSectorMap::LoadFromFile(const TString& path) { + TString raw = TFileInput(path).ReadAll(); + NKikimrPDisk::TSectorMapSnapshot snap; + bool success = snap.ParseFromString(raw); + Y_VERIFY_S(success, path); + + Map.reserve(snap.SectorsSize()); + for (auto& s : snap.GetSectors()) { + Y_VERIFY_S(s.GetCompressionType() == NKikimrPDisk::ECompression::LZ4, path); + Map[s.GetOffset()] = s.GetData(); + } +} + +void TSectorMap::StoreToFile(const TString& path) { + NKikimrPDisk::TSectorMapSnapshot snap; + for (auto& [offset, data] : Map) { + NKikimrPDisk::TSectorData *sd = snap.AddSectors(); + sd->SetOffset(offset); + sd->SetData(data); + sd->SetCompressionType(NKikimrPDisk::ECompression::LZ4); + } + + TString raw; Y_PROTOBUF_SUPPRESS_NODISCARD snap.SerializeToString(&raw); - TFileOutput(path).Write(raw); -} - -} + TFileOutput(path).Write(raw); +} + +} diff --git a/ydb/library/pdisk_io/sector_map.h b/ydb/library/pdisk_io/sector_map.h index 9bd5edcbda..f86e8fb6d0 100644 --- a/ydb/library/pdisk_io/sector_map.h +++ b/ydb/library/pdisk_io/sector_map.h @@ -1,151 +1,151 @@ #pragma once #include <ydb/core/util/yverify_stream.h> -#include <library/cpp/actors/util/ticket_lock.h> +#include <library/cpp/actors/util/ticket_lock.h> -#include <util/generic/guid.h> -#include <util/generic/hash.h> +#include <util/generic/guid.h> +#include <util/generic/hash.h> #include <util/generic/string.h> -#include <util/stream/file.h> -#include <util/stream/format.h> +#include <util/stream/file.h> +#include <util/stream/format.h> #include <util/system/mutex.h> -#include <contrib/libs/lz4/lz4.h> +#include <contrib/libs/lz4/lz4.h> + +#include <atomic> +#include <optional> -#include <atomic> -#include <optional> - namespace NKikimr { namespace NPDisk { - -constexpr ui64 SectorMapSectorSize = 4096; - + +constexpr ui64 SectorMapSectorSize = 4096; + class TSectorMap : public TThrRefBase { - THashMap<ui64, TString> Map; - + THashMap<ui64, TString> Map; + public: - TString Serial = CreateGuidAsString(); + TString Serial = CreateGuidAsString(); ui64 DeviceSize; TTicketLock MapLock; - std::atomic<bool> IsLocked; - std::optional<std::pair<TDuration, TDuration>> ImitateRandomWait; - std::atomic<double> ImitateIoErrorProbability; - std::atomic<double> ImitateReadIoErrorProbability; + std::atomic<bool> IsLocked; + std::optional<std::pair<TDuration, TDuration>> ImitateRandomWait; + std::atomic<double> ImitateIoErrorProbability; + std::atomic<double> ImitateReadIoErrorProbability; + + std::atomic<ui64> AllocatedBytes; - std::atomic<ui64> AllocatedBytes; - TSectorMap(ui64 deviceSize = 0) : DeviceSize(deviceSize) , IsLocked(false) - , ImitateIoErrorProbability(0.0) - , ImitateReadIoErrorProbability(0.0) - , AllocatedBytes(0) - {} + , ImitateIoErrorProbability(0.0) + , ImitateReadIoErrorProbability(0.0) + , AllocatedBytes(0) + {} bool Lock() { - return !IsLocked.exchange(true); + return !IsLocked.exchange(true); } bool Unlock() { - return IsLocked.exchange(false); + return IsLocked.exchange(false); } void ForceSize(ui64 size) { DeviceSize = size; - if (DeviceSize < size) { - for (const auto& [offset, data] : Map) { - Y_VERIFY_S(offset + 4096 <= DeviceSize, "It is not possible to shrink TSectorMap with data"); + if (DeviceSize < size) { + for (const auto& [offset, data] : Map) { + Y_VERIFY_S(offset + 4096 <= DeviceSize, "It is not possible to shrink TSectorMap with data"); } } } void ZeroInit(ui64 sectors) { - ui64 bytes = sectors * SectorMapSectorSize; - TString str = TString::Uninitialized(bytes); - memset(str.Detach(), 0, bytes); - Write((ui8*)str.Detach(), bytes, 0); - } - - void Read(ui8 *data, i64 size, ui64 offset) { - Y_VERIFY(size % SectorMapSectorSize == 0); - Y_VERIFY(offset % SectorMapSectorSize == 0); - - TGuard<TTicketLock> guard(MapLock); - for (; size > 0; size -= SectorMapSectorSize) { - if (auto it = Map.find(offset); it == Map.end()) { - memset(data, 0x33, SectorMapSectorSize); - } else { - char tmp[4 * SectorMapSectorSize]; - int processed = LZ4_decompress_safe(it->second.data(), tmp, it->second.size(), 4 * SectorMapSectorSize); - Y_VERIFY_S(processed == SectorMapSectorSize, "processed# " << processed); - memcpy(data, tmp, SectorMapSectorSize); - } - offset += SectorMapSectorSize; - data += SectorMapSectorSize; + ui64 bytes = sectors * SectorMapSectorSize; + TString str = TString::Uninitialized(bytes); + memset(str.Detach(), 0, bytes); + Write((ui8*)str.Detach(), bytes, 0); + } + + void Read(ui8 *data, i64 size, ui64 offset) { + Y_VERIFY(size % SectorMapSectorSize == 0); + Y_VERIFY(offset % SectorMapSectorSize == 0); + + TGuard<TTicketLock> guard(MapLock); + for (; size > 0; size -= SectorMapSectorSize) { + if (auto it = Map.find(offset); it == Map.end()) { + memset(data, 0x33, SectorMapSectorSize); + } else { + char tmp[4 * SectorMapSectorSize]; + int processed = LZ4_decompress_safe(it->second.data(), tmp, it->second.size(), 4 * SectorMapSectorSize); + Y_VERIFY_S(processed == SectorMapSectorSize, "processed# " << processed); + memcpy(data, tmp, SectorMapSectorSize); + } + offset += SectorMapSectorSize; + data += SectorMapSectorSize; + } + } + + void Write(const ui8 *data, i64 size, ui64 offset) { + Y_VERIFY(size % SectorMapSectorSize == 0); + Y_VERIFY(offset % SectorMapSectorSize == 0); + + TGuard<TTicketLock> guard(MapLock); + for (; size > 0; size -= SectorMapSectorSize) { + char tmp[4 * SectorMapSectorSize]; + int written = LZ4_compress_default((const char*)data, tmp, SectorMapSectorSize, 4 * SectorMapSectorSize); + Y_VERIFY_S(written > 0, "written# " << written); + TString str = TString::Uninitialized(written); + memcpy(str.Detach(), tmp, written); + if (auto it = Map.find(offset); it != Map.end()) { + AllocatedBytes.fetch_sub(it->second.size()); + it->second = str; + } else { + Map[offset] = str; + } + AllocatedBytes.fetch_add(Map[offset].size()); + offset += SectorMapSectorSize; + data += SectorMapSectorSize; + } + } + + void Trim(i64 size, ui64 offset) { + TGuard<TTicketLock> guard(MapLock); + Y_VERIFY(size % SectorMapSectorSize == 0); + Y_VERIFY(offset % SectorMapSectorSize == 0); + for (; size > 0; size -= SectorMapSectorSize) { + if (auto it = Map.find(offset); it != Map.end()) { + AllocatedBytes.fetch_sub(it->second.size()); + Map.erase(it); + } + offset += SectorMapSectorSize; } } - - void Write(const ui8 *data, i64 size, ui64 offset) { - Y_VERIFY(size % SectorMapSectorSize == 0); - Y_VERIFY(offset % SectorMapSectorSize == 0); - - TGuard<TTicketLock> guard(MapLock); - for (; size > 0; size -= SectorMapSectorSize) { - char tmp[4 * SectorMapSectorSize]; - int written = LZ4_compress_default((const char*)data, tmp, SectorMapSectorSize, 4 * SectorMapSectorSize); - Y_VERIFY_S(written > 0, "written# " << written); - TString str = TString::Uninitialized(written); - memcpy(str.Detach(), tmp, written); - if (auto it = Map.find(offset); it != Map.end()) { - AllocatedBytes.fetch_sub(it->second.size()); - it->second = str; - } else { - Map[offset] = str; - } - AllocatedBytes.fetch_add(Map[offset].size()); - offset += SectorMapSectorSize; - data += SectorMapSectorSize; - } - } - - void Trim(i64 size, ui64 offset) { - TGuard<TTicketLock> guard(MapLock); - Y_VERIFY(size % SectorMapSectorSize == 0); - Y_VERIFY(offset % SectorMapSectorSize == 0); - for (; size > 0; size -= SectorMapSectorSize) { - if (auto it = Map.find(offset); it != Map.end()) { - AllocatedBytes.fetch_sub(it->second.size()); - Map.erase(it); - } - offset += SectorMapSectorSize; - } - } - - ui64 DataBytes() const { - return Map.size() * 4096; - } - - TString ToString() const { - TStringStream str; - str << "Serial# " << Serial.Quote() << "\n"; - str << "DeviceSize# " << DeviceSize << "\n"; - str << "IsLocked# " << IsLocked.load() << "\n"; - if (ImitateRandomWait) { - str << "ImitateRandomWait# [" << ImitateRandomWait->first << ", " - << ImitateRandomWait->first + ImitateRandomWait->second << ")" << "\n"; - } - str << "ImitateReadIoErrorProbability# " << ImitateReadIoErrorProbability.load() << "\n"; - str << "ImitateIoErrorProbability# " << ImitateIoErrorProbability.load() << "\n"; - str << "AllocatedBytes (approx.)# " << HumanReadableSize(AllocatedBytes.load(), SF_QUANTITY) << "\n"; - str << "DataBytes# " << HumanReadableSize(DataBytes(), SF_QUANTITY) << "\n"; - return str.Str(); - } - - // Requires proto information, so should be defined in cpp - void LoadFromFile(const TString& path); - void StoreToFile(const TString& path); + + ui64 DataBytes() const { + return Map.size() * 4096; + } + + TString ToString() const { + TStringStream str; + str << "Serial# " << Serial.Quote() << "\n"; + str << "DeviceSize# " << DeviceSize << "\n"; + str << "IsLocked# " << IsLocked.load() << "\n"; + if (ImitateRandomWait) { + str << "ImitateRandomWait# [" << ImitateRandomWait->first << ", " + << ImitateRandomWait->first + ImitateRandomWait->second << ")" << "\n"; + } + str << "ImitateReadIoErrorProbability# " << ImitateReadIoErrorProbability.load() << "\n"; + str << "ImitateIoErrorProbability# " << ImitateIoErrorProbability.load() << "\n"; + str << "AllocatedBytes (approx.)# " << HumanReadableSize(AllocatedBytes.load(), SF_QUANTITY) << "\n"; + str << "DataBytes# " << HumanReadableSize(DataBytes(), SF_QUANTITY) << "\n"; + return str.Str(); + } + + // Requires proto information, so should be defined in cpp + void LoadFromFile(const TString& path); + void StoreToFile(const TString& path); }; } // NPDisk diff --git a/ydb/library/pdisk_io/spdk_state.h b/ydb/library/pdisk_io/spdk_state.h index 2c60964f44..2129a325da 100644 --- a/ydb/library/pdisk_io/spdk_state.h +++ b/ydb/library/pdisk_io/spdk_state.h @@ -1,42 +1,42 @@ -#pragma once - -#include <util/system/yassert.h> - -namespace NKikimr::NPDisk { - -class ISpdkState { -public: - virtual void LaunchThread(int (*fn)(void *), void *cookie) = 0; - virtual ui8 *Malloc(ui64 size, ui32 align) = 0; - virtual void Free(ui8 *buff) = 0; - //virtual ui64 GetDeviceSize() = 0; - virtual void WaitAllThreads() = 0; - virtual ~ISpdkState() {}; -}; - -class TSpdkStateOSS : public ISpdkState { -public: - TSpdkStateOSS() {} - - void LaunchThread(int (*)(void *), void *) override { - Y_FAIL("Spdk is not supported now"); - } - - ui8 *Malloc(ui64, ui32) override { - Y_FAIL("Spdk is not supported now"); - } - - void Free(ui8 *) override { - Y_FAIL("Spdk is not supported now"); - } - - //ui64 GetDeviceSize() override { - // Y_FAIL("Spdk is not supported now"); - //} - - void WaitAllThreads() override { - Y_FAIL("Spdk is not supported now"); - } -}; - -} +#pragma once + +#include <util/system/yassert.h> + +namespace NKikimr::NPDisk { + +class ISpdkState { +public: + virtual void LaunchThread(int (*fn)(void *), void *cookie) = 0; + virtual ui8 *Malloc(ui64 size, ui32 align) = 0; + virtual void Free(ui8 *buff) = 0; + //virtual ui64 GetDeviceSize() = 0; + virtual void WaitAllThreads() = 0; + virtual ~ISpdkState() {}; +}; + +class TSpdkStateOSS : public ISpdkState { +public: + TSpdkStateOSS() {} + + void LaunchThread(int (*)(void *), void *) override { + Y_FAIL("Spdk is not supported now"); + } + + ui8 *Malloc(ui64, ui32) override { + Y_FAIL("Spdk is not supported now"); + } + + void Free(ui8 *) override { + Y_FAIL("Spdk is not supported now"); + } + + //ui64 GetDeviceSize() override { + // Y_FAIL("Spdk is not supported now"); + //} + + void WaitAllThreads() override { + Y_FAIL("Spdk is not supported now"); + } +}; + +} diff --git a/ydb/library/pdisk_io/ya.make b/ydb/library/pdisk_io/ya.make index a118c1848d..ae4efcd019 100644 --- a/ydb/library/pdisk_io/ya.make +++ b/ydb/library/pdisk_io/ya.make @@ -1,54 +1,54 @@ -LIBRARY() - -OWNER( - va-kuznecov - g:kikimr -) - -GENERATE_ENUM_SERIALIZATION(aio.h) - -IF (OS_LINUX) - PEERDIR( - contrib/libs/libaio - ) - SRCS( - aio_linux.cpp - file_params_linux.cpp - ) +LIBRARY() + +OWNER( + va-kuznecov + g:kikimr +) + +GENERATE_ENUM_SERIALIZATION(aio.h) + +IF (OS_LINUX) + PEERDIR( + contrib/libs/libaio + ) + SRCS( + aio_linux.cpp + file_params_linux.cpp + ) ELSE(OS_LINUX) - SRCS( - aio_mtp.cpp - ) -ENDIF(OS_LINUX) - -IF (OS_DARWIN) - SRCS( - file_params_darwin.cpp - ) -ENDIF(OS_DARWIN) - -IF (OS_WINDOWS) - SRCS( - file_params_win.cpp - ) -ENDIF(OS_WINDOWS) - -PEERDIR( - library/cpp/actors/core - library/cpp/monlib/dynamic_counters + SRCS( + aio_mtp.cpp + ) +ENDIF(OS_LINUX) + +IF (OS_DARWIN) + SRCS( + file_params_darwin.cpp + ) +ENDIF(OS_DARWIN) + +IF (OS_WINDOWS) + SRCS( + file_params_win.cpp + ) +ENDIF(OS_WINDOWS) + +PEERDIR( + library/cpp/actors/core + library/cpp/monlib/dynamic_counters ydb/core/debug ydb/library/pdisk_io/protos ydb/library/wilson -) - -SRCS( - aio.cpp - aio.h - aio_map.cpp - buffers.cpp - buffers.h - sector_map.cpp - sector_map.h -) - -END() +) + +SRCS( + aio.cpp + aio.h + aio_map.cpp + buffers.cpp + buffers.h + sector_map.cpp + sector_map.h +) + +END() diff --git a/ydb/public/lib/ydb_cli/commands/ydb_tools.cpp b/ydb/public/lib/ydb_cli/commands/ydb_tools.cpp index 3fca191b6b..db4ec14d30 100644 --- a/ydb/public/lib/ydb_cli/commands/ydb_tools.cpp +++ b/ydb/public/lib/ydb_cli/commands/ydb_tools.cpp @@ -1,44 +1,44 @@ -#include "ydb_tools.h" - +#include "ydb_tools.h" + #include <ydb/public/lib/ydb_cli/common/normalize_path.h> #include <ydb/public/lib/ydb_cli/dump/dump.h> #include <ydb/library/backup/backup.h> #include <ydb/library/backup/util.h> - + #include <util/stream/format.h> -#include <util/string/split.h> - -namespace NYdb::NConsoleClient { - -TCommandTools::TCommandTools() - : TClientCommandTree("tools", {}, "YDB tools service") -{ +#include <util/string/split.h> + +namespace NYdb::NConsoleClient { + +TCommandTools::TCommandTools() + : TClientCommandTree("tools", {}, "YDB tools service") +{ AddCommand(std::make_unique<TCommandDump>()); AddCommand(std::make_unique<TCommandRestore>()); AddCommand(std::make_unique<TCommandCopy>()); AddCommand(std::make_unique<TCommandRename>()); -} - -TToolsCommand::TToolsCommand(const TString& name, const std::initializer_list<TString>& aliases, const TString& description) - : TYdbCommand(name, aliases, description) -{} - -void TToolsCommand::Config(TConfig& config) { - TYdbCommand::Config(config); -} - -//////////////////////////////////////////////////////////////////////////////// -// Dump -//////////////////////////////////////////////////////////////////////////////// -TCommandDump::TCommandDump() +} + +TToolsCommand::TToolsCommand(const TString& name, const std::initializer_list<TString>& aliases, const TString& description) + : TYdbCommand(name, aliases, description) +{} + +void TToolsCommand::Config(TConfig& config) { + TYdbCommand::Config(config); +} + +//////////////////////////////////////////////////////////////////////////////// +// Dump +//////////////////////////////////////////////////////////////////////////////// +TCommandDump::TCommandDump() : TToolsCommand("dump", {}, "Dump specified database directory or table into local directory") -{} - -void TCommandDump::Config(TConfig& config) { - TToolsCommand::Config(config); - - config.SetFreeArgsNum(0); - +{} + +void TCommandDump::Config(TConfig& config) { + TToolsCommand::Config(config); + + config.SetFreeArgsNum(0); + config.Opts->AddLongOption('p', "path", "Database path to a directory or a table to be dumped.") .DefaultValue(".").StoreResult(&Path); config.Opts->AddLongOption("exclude", "Pattern(s) (PCRE) for paths excluded from dump." @@ -48,9 +48,9 @@ void TCommandDump::Config(TConfig& config) { }); config.Opts->AddLongOption('o', "output", "[Required] Path in a local filesystem to a directory to place dump into." " Directory should either not exist or be empty.") - .StoreResult(&FilePath); + .StoreResult(&FilePath); config.Opts->AddLongOption("scheme-only", "Dump only scheme") - .StoreTrue(&IsSchemeOnly); + .StoreTrue(&IsSchemeOnly); config.Opts->AddLongOption("avoid-copy", "Avoid copying." " By default, YDB makes a copy of a table before dumping it to reduce impact on workload and ensure consistency.\n" "In some cases (e.g. for tables with external blobs) copying should be disabled.") @@ -68,65 +68,65 @@ void TCommandDump::Config(TConfig& config) { "database - take one consistent snapshot of all tables specified for dump." " Takes more time and is more likely to impact workload;\n" "table - take consistent snapshot per each table independently.") - .DefaultValue("database").StoreResult(&ConsistencyLevel); -} - -void TCommandDump::Parse(TConfig& config) { - TClientCommand::Parse(config); + .DefaultValue("database").StoreResult(&ConsistencyLevel); +} + +void TCommandDump::Parse(TConfig& config) { + TClientCommand::Parse(config); AdjustPath(config); -} - -int TCommandDump::Run(TConfig& config) { - - bool useConsistentCopyTable; - if (ConsistencyLevel == "database") { - useConsistentCopyTable = true; - } else if (ConsistencyLevel == "table") { - useConsistentCopyTable = false; - } else { +} + +int TCommandDump::Run(TConfig& config) { + + bool useConsistentCopyTable; + if (ConsistencyLevel == "database") { + useConsistentCopyTable = true; + } else if (ConsistencyLevel == "table") { + useConsistentCopyTable = false; + } else { throw yexception() << "Incorrect consistency level. Available options: \"database\", \"table\"" << Endl; - } - - NYdb::SetVerbosity(config.IsVerbose); - - try { - TString relPath = NYdb::RelPathFromAbsolute(config.Database, Path); + } + + NYdb::SetVerbosity(config.IsVerbose); + + try { + TString relPath = NYdb::RelPathFromAbsolute(config.Database, Path); NYdb::NBackup::BackupFolder(CreateDriver(config), config.Database, relPath, FilePath, ExclusionPatterns, IsSchemeOnly, useConsistentCopyTable, AvoidCopy, SavePartialResult, PreservePoolKinds); - } catch (const NYdb::NBackup::TYdbErrorException& e) { - e.LogToStderr(); - return EXIT_FAILURE; - } catch (const yexception& e) { - Cerr << "General error, what# " << e.what() << Endl; - return EXIT_FAILURE; - } - return EXIT_SUCCESS; -} - -//////////////////////////////////////////////////////////////////////////////// -// Restore -//////////////////////////////////////////////////////////////////////////////// -TCommandRestore::TCommandRestore() - : TToolsCommand("restore", {}, "Restore database from local dump into specified directory") -{} - -void TCommandRestore::Config(TConfig& config) { - TToolsCommand::Config(config); - - config.SetFreeArgsNum(0); - - config.Opts->AddLongOption('p', "path", + } catch (const NYdb::NBackup::TYdbErrorException& e) { + e.LogToStderr(); + return EXIT_FAILURE; + } catch (const yexception& e) { + Cerr << "General error, what# " << e.what() << Endl; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +//////////////////////////////////////////////////////////////////////////////// +// Restore +//////////////////////////////////////////////////////////////////////////////// +TCommandRestore::TCommandRestore() + : TToolsCommand("restore", {}, "Restore database from local dump into specified directory") +{} + +void TCommandRestore::Config(TConfig& config) { + TToolsCommand::Config(config); + + config.SetFreeArgsNum(0); + + config.Opts->AddLongOption('p', "path", "[Required] Database path to a destination directory where restored directory or table will be placed.") - .StoreResult(&Path); + .StoreResult(&Path); config.Opts->AddLongOption('i', "input", "[Required] Path in a local filesystem to a directory with dump.") - .StoreResult(&FilePath); - + .StoreResult(&FilePath); + config.Opts->AddLongOption("dry-run", TStringBuilder() << "Do not restore tables, only check that:" << Endl << " - all dumped tables exist in database;" << Endl << " - all dumped table schemes are the same as in database.") - .StoreTrue(&IsDryRun); + .StoreTrue(&IsDryRun); NDump::TRestoreSettings defaults; @@ -148,7 +148,7 @@ void TCommandRestore::Config(TConfig& config) { .StoreTrue(&SavePartialResult); config.Opts->AddLongOption("bandwidth", "Limit data upload bandwidth, bytes per second (example: 2MiB)") - .DefaultValue("0").StoreResult(&UploadBandwidth); + .DefaultValue("0").StoreResult(&UploadBandwidth); config.Opts->AddLongOption("rps", "Limit requests per second (example: 100)") .DefaultValue(defaults.RateLimiterSettings_.GetRps()).StoreResult(&UploadRps); @@ -176,16 +176,16 @@ void TCommandRestore::Config(TConfig& config) { config.Opts->MutuallyExclusive("bandwidth", "rps"); config.Opts->MutuallyExclusive("import-data", "bulk-upsert"); -} - -void TCommandRestore::Parse(TConfig& config) { - TClientCommand::Parse(config); +} + +void TCommandRestore::Parse(TConfig& config) { + TClientCommand::Parse(config); AdjustPath(config); -} - -int TCommandRestore::Run(TConfig& config) { - NYdb::SetVerbosity(config.IsVerbose); - +} + +int TCommandRestore::Run(TConfig& config) { + NYdb::SetVerbosity(config.IsVerbose); + auto settings = NDump::TRestoreSettings() .DryRun(IsDryRun) .RestoreData(RestoreData) @@ -194,7 +194,7 @@ int TCommandRestore::Run(TConfig& config) { .SavePartialResult(SavePartialResult) .RowsPerRequest(NYdb::SizeFromString(RowsPerRequest)) .InFly(InFly); - + if (auto bytesPerRequest = NYdb::SizeFromString(BytesPerRequest)) { if (bytesPerRequest > NDump::TRestoreSettings::MaxBytesPerRequest) { throw TMissUseException() @@ -203,7 +203,7 @@ int TCommandRestore::Run(TConfig& config) { } settings.BytesPerRequest(bytesPerRequest); - } + } if (RequestUnitsPerRequest) { settings.RequestUnitsPerRequest(NYdb::SizeFromString(RequestUnitsPerRequest)); @@ -224,9 +224,9 @@ int TCommandRestore::Run(TConfig& config) { NDump::TClient client(CreateDriver(config)); ThrowOnError(client.Restore(FilePath, Path, settings)); - return EXIT_SUCCESS; -} - + return EXIT_SUCCESS; +} + //////////////////////////////////////////////////////////////////////////////// // Copy //////////////////////////////////////////////////////////////////////////////// @@ -375,4 +375,4 @@ int TCommandRename::Run(TConfig& config) { return EXIT_SUCCESS; } -} // NYdb::NConsoleClient +} // NYdb::NConsoleClient diff --git a/ydb/public/lib/ydb_cli/commands/ydb_tools.h b/ydb/public/lib/ydb_cli/commands/ydb_tools.h index 0682426d2e..3a1218a981 100644 --- a/ydb/public/lib/ydb_cli/commands/ydb_tools.h +++ b/ydb/public/lib/ydb_cli/commands/ydb_tools.h @@ -1,74 +1,74 @@ -#pragma once - -#include "ydb_command.h" -#include "ydb_common.h" +#pragma once + +#include "ydb_command.h" +#include "ydb_common.h" #include "ydb_service_table.h" - + #include <ydb/public/lib/ydb_cli/common/examples.h> #include <ydb/public/lib/ydb_cli/common/parseable_struct.h> #include <library/cpp/regex/pcre/regexp.h> -namespace NYdb { -namespace NConsoleClient { - -class TCommandTools : public TClientCommandTree { -public: - TCommandTools(); -}; - -class TToolsCommand : public TYdbCommand { -public: - TToolsCommand( - const TString& name, - const std::initializer_list<TString>& aliases = std::initializer_list<TString>(), - const TString& description = TString() - ); - - virtual void Config(TConfig& config) override; -}; - +namespace NYdb { +namespace NConsoleClient { + +class TCommandTools : public TClientCommandTree { +public: + TCommandTools(); +}; + +class TToolsCommand : public TYdbCommand { +public: + TToolsCommand( + const TString& name, + const std::initializer_list<TString>& aliases = std::initializer_list<TString>(), + const TString& description = TString() + ); + + virtual void Config(TConfig& config) override; +}; + class TCommandDump : public TToolsCommand, public TCommandWithPath { -public: - TCommandDump(); - virtual void Config(TConfig& config) override; - virtual void Parse(TConfig& config) override; - virtual int Run(TConfig& config) override; - -private: +public: + TCommandDump(); + virtual void Config(TConfig& config) override; + virtual void Parse(TConfig& config) override; + virtual int Run(TConfig& config) override; + +private: TVector<TRegExMatch> ExclusionPatterns; - TString FilePath; - bool IsSchemeOnly; + TString FilePath; + bool IsSchemeOnly; bool AvoidCopy = false; bool SavePartialResult = false; - TString ConsistencyLevel; + TString ConsistencyLevel; bool PreservePoolKinds = false; -}; - +}; + class TCommandRestore : public TToolsCommand, public TCommandWithPath { -public: - TCommandRestore(); - virtual void Config(TConfig& config) override; - virtual void Parse(TConfig& config) override; - virtual int Run(TConfig& config) override; - -private: - TString FilePath; - bool IsDryRun = false; +public: + TCommandRestore(); + virtual void Config(TConfig& config) override; + virtual void Parse(TConfig& config) override; + virtual int Run(TConfig& config) override; + +private: + TString FilePath; + bool IsDryRun = false; bool RestoreData = true; bool RestoreIndexes = true; bool SkipDocumentTables = false; bool SavePartialResult = false; - TString UploadBandwidth; + TString UploadBandwidth; TString UploadRps; TString RowsPerRequest; TString BytesPerRequest; TString RequestUnitsPerRequest; ui32 InFly; - bool UseBulkUpsert = false; + bool UseBulkUpsert = false; bool UseImportData = false; -}; - +}; + class TCommandCopy : public TTableCommand { public: TCommandCopy(); @@ -106,5 +106,5 @@ private: TString DatabaseName; }; -} -} +} +} diff --git a/ydb/public/sdk/cpp/client/ydb_result/result.cpp b/ydb/public/sdk/cpp/client/ydb_result/result.cpp index 88213e6eed..dc7eb09c0a 100644 --- a/ydb/public/sdk/cpp/client/ydb_result/result.cpp +++ b/ydb/public/sdk/cpp/client/ydb_result/result.cpp @@ -25,14 +25,14 @@ void TColumn::Out(IOutputStream& o) const { << " }"; } -bool operator==(const TColumn& col1, const TColumn& col2) { - return col1.Name == col2.Name && TypesEqual(col1.Type, col2.Type); -} - -bool operator!=(const TColumn& col1, const TColumn& col2) { - return !(col1 == col2); -} - +bool operator==(const TColumn& col1, const TColumn& col2) { + return col1.Name == col2.Name && TypesEqual(col1.Type, col2.Type); +} + +bool operator!=(const TColumn& col1, const TColumn& col2) { + return !(col1 == col2); +} + class TResultSet::TImpl { public: TImpl(const Ydb::ResultSet& proto) diff --git a/ydb/public/sdk/cpp/client/ydb_result/result.h b/ydb/public/sdk/cpp/client/ydb_result/result.h index f975af549c..287c8078be 100644 --- a/ydb/public/sdk/cpp/client/ydb_result/result.h +++ b/ydb/public/sdk/cpp/client/ydb_result/result.h @@ -25,9 +25,9 @@ struct TColumn { void Out(IOutputStream& o) const; }; -bool operator==(const TColumn& col1, const TColumn& col2); -bool operator!=(const TColumn& col1, const TColumn& col2); - +bool operator==(const TColumn& col1, const TColumn& col2); +bool operator!=(const TColumn& col1, const TColumn& col2); + //! Collection of rows, represents result of query or part of the result in case of stream operations class TResultSet { friend class TResultSetParser; diff --git a/ydb/public/sdk/cpp/client/ydb_value/value.cpp b/ydb/public/sdk/cpp/client/ydb_value/value.cpp index 54c122a981..8c18d4d50f 100644 --- a/ydb/public/sdk/cpp/client/ydb_value/value.cpp +++ b/ydb/public/sdk/cpp/client/ydb_value/value.cpp @@ -66,7 +66,7 @@ static TTypeParser::ETypeKind GetKind(const Ydb::Type& type) { return ETypeKind::Void; } -bool TypesEqual(const TType& t1, const TType& t2) { +bool TypesEqual(const TType& t1, const TType& t2) { return TypesEqual(TProtoAccessor::GetProto(t1), TProtoAccessor::GetProto(t2)); } diff --git a/ydb/public/sdk/cpp/client/ydb_value/value.h b/ydb/public/sdk/cpp/client/ydb_value/value.h index dd90fedee5..4a163b9220 100644 --- a/ydb/public/sdk/cpp/client/ydb_value/value.h +++ b/ydb/public/sdk/cpp/client/ydb_value/value.h @@ -140,8 +140,8 @@ private: std::unique_ptr<TImpl> Impl_; }; -bool TypesEqual(const TType& t1, const TType& t2); - +bool TypesEqual(const TType& t1, const TType& t2); + TString FormatType(const TType& type); //! Used to create arbitrary type. diff --git a/ydb/public/sdk/python/ydb/table.py b/ydb/public/sdk/python/ydb/table.py index 2b7fe4ac3b..c49c4660c7 100644 --- a/ydb/public/sdk/python/ydb/table.py +++ b/ydb/public/sdk/python/ydb/table.py @@ -124,9 +124,9 @@ class Column(object): self._type = type self.family = family - def __eq__(self, other): - return self.name == other.name and self._type.item == other.type.item - + def __eq__(self, other): + return self.name == other.name and self._type.item == other.type.item + @property def name(self): return self._name diff --git a/ydb/public/sdk/python/ydb/types.py b/ydb/public/sdk/python/ydb/types.py index b50f282621..6ae09a5b42 100644 --- a/ydb/public/sdk/python/ydb/types.py +++ b/ydb/public/sdk/python/ydb/types.py @@ -203,9 +203,9 @@ class DecimalType(AbstractTypeBuilder): """ return self._proto - def __eq__(self, other): - return self._precision == other.precision and self._scale == other.scale - + def __eq__(self, other): + return self._precision == other.precision and self._scale == other.scale + def __str__(self): """ Returns string representation of a type @@ -241,9 +241,9 @@ class OptionalType(AbstractTypeBuilder): """ return self._proto - def __eq__(self, other): - return self._item == other.item - + def __eq__(self, other): + return self._item == other.item + def __str__(self): return self._repr diff --git a/ydb/tests/functional/blobstorage/self_heal.py b/ydb/tests/functional/blobstorage/self_heal.py index ac7505540b..e18b22bae1 100644 --- a/ydb/tests/functional/blobstorage/self_heal.py +++ b/ydb/tests/functional/blobstorage/self_heal.py @@ -1,36 +1,36 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from hamcrest import is_ - +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from hamcrest import is_ + from ydb.tests.library.common.wait_for import wait_for_and_assert from ydb.tests.library.harness.kikimr_cluster import kikimr_cluster_factory from ydb.tests.library.predicates import blobstorage - - -TIMEOUT_SECONDS = 480 - - -class TestEnableSelfHeal(object): - @classmethod - def setup_class(cls): - cls.kikimr_cluster = kikimr_cluster_factory() - cls.kikimr_cluster.start() - cls.client = cls.kikimr_cluster.client - cls.client.update_self_heal(True) - - @classmethod - def teardown_class(cls): - if hasattr(cls, 'kikimr_cluster'): - cls.kikimr_cluster.stop() - - def test_replication(self): - for node in self.kikimr_cluster.nodes.values(): - node.stop() - node.start() - - # TODO break pdisks and wait for self heal to repair cluster - - wait_for_and_assert( - lambda: blobstorage.cluster_has_no_unsynced_vdisks(self.kikimr_cluster), - is_(True), timeout_seconds=TIMEOUT_SECONDS, message='All vdisks are sync' - ) + + +TIMEOUT_SECONDS = 480 + + +class TestEnableSelfHeal(object): + @classmethod + def setup_class(cls): + cls.kikimr_cluster = kikimr_cluster_factory() + cls.kikimr_cluster.start() + cls.client = cls.kikimr_cluster.client + cls.client.update_self_heal(True) + + @classmethod + def teardown_class(cls): + if hasattr(cls, 'kikimr_cluster'): + cls.kikimr_cluster.stop() + + def test_replication(self): + for node in self.kikimr_cluster.nodes.values(): + node.stop() + node.start() + + # TODO break pdisks and wait for self heal to repair cluster + + wait_for_and_assert( + lambda: blobstorage.cluster_has_no_unsynced_vdisks(self.kikimr_cluster), + is_(True), timeout_seconds=TIMEOUT_SECONDS, message='All vdisks are sync' + ) diff --git a/ydb/tests/functional/blobstorage/ya.make b/ydb/tests/functional/blobstorage/ya.make index 21716e1176..3248849d57 100644 --- a/ydb/tests/functional/blobstorage/ya.make +++ b/ydb/tests/functional/blobstorage/ya.make @@ -4,9 +4,9 @@ PY3TEST() ENV(YDB_DRIVER_BINARY="ydb/apps/ydbd/ydbd") TEST_SRCS( - pdisk_format_info.py + pdisk_format_info.py replication.py - self_heal.py + self_heal.py tablet_channel_migration.py ) diff --git a/ydb/tests/functional/ydb_cli/test_ydb_backup.py b/ydb/tests/functional/ydb_cli/test_ydb_backup.py index 0162e09964..2431a47111 100644 --- a/ydb/tests/functional/ydb_cli/test_ydb_backup.py +++ b/ydb/tests/functional/ydb_cli/test_ydb_backup.py @@ -1,719 +1,719 @@ -# -*- coding: utf-8 -*- - +# -*- coding: utf-8 -*- + from ydb.tests.library.common import yatest_common from ydb.tests.library.harness.kikimr_cluster import kikimr_cluster_factory import ydb -from hamcrest import assert_that, is_, is_not, contains_inanyorder, has_item, has_items -import os -import logging - - -logger = logging.getLogger(__name__) - - -def backup_bin(): +from hamcrest import assert_that, is_, is_not, contains_inanyorder, has_item, has_items +import os +import logging + + +logger = logging.getLogger(__name__) + + +def backup_bin(): return yatest_common.binary_path("ydb/apps/ydb/ydb") - - -def upsert_simple(session, full_path): - path, table = os.path.split(full_path) - session.transaction().execute( - """ - PRAGMA TablePathPrefix("{0}"); + + +def upsert_simple(session, full_path): + path, table = os.path.split(full_path) + session.transaction().execute( + """ + PRAGMA TablePathPrefix("{0}"); UPSERT INTO {1} (`id`, `number`, `string`, `fixed_point`) VALUES (2, 6, "pen", CAST("2.4" AS Decimal(22,9))); UPSERT INTO {1} (`id`, `string`, `fixed_point`) VALUES (3, "pineapple", CAST("3.5" AS Decimal(22,9))); UPSERT INTO {1} (`id`, `number`, `fixed_point`) VALUES (5, 12, CAST("512.6" AS Decimal(22,9))); UPSERT INTO {1} (`id`, `number`, `string` ) VALUES (7, 15, "pen" ); - """.format(path, table), - commit_tx=True, - ) - - + """.format(path, table), + commit_tx=True, + ) + + def output_path(*args): path = os.path.join(yatest_common.output_path(), *args) os.makedirs(path) return path -def list_to_string(arr, formatter=lambda x: x): - string = "{" - needsComma = False - for x in arr: - if needsComma: - string += ", " - needsComma = True - string += formatter(x) - string += "}" - return string - - -def columns_to_string(columns): - return list_to_string(columns, lambda col: col.name + ":" + str(col.type.item).strip()) - - -def create_table_with_data(session, path): - path = "/Root/" + path - session.create_table( - path, - ydb.TableDescription() - .with_column(ydb.Column("id", ydb.OptionalType(ydb.PrimitiveType.Uint32))) - .with_column(ydb.Column("number", ydb.OptionalType(ydb.PrimitiveType.Uint64))) - .with_column(ydb.Column("string", ydb.OptionalType(ydb.PrimitiveType.String))) - .with_column(ydb.Column("fixed_point", ydb.OptionalType(ydb.DecimalType()))) - .with_primary_keys("id") - ) - - upsert_simple(session, path) - - -def is_tables_the_same(session, path_left, path_right, check_data=True): - table_desc_left = session.describe_table(path_left) - table_desc_right = session.describe_table(path_right) +def list_to_string(arr, formatter=lambda x: x): + string = "{" + needsComma = False + for x in arr: + if needsComma: + string += ", " + needsComma = True + string += formatter(x) + string += "}" + return string + + +def columns_to_string(columns): + return list_to_string(columns, lambda col: col.name + ":" + str(col.type.item).strip()) + + +def create_table_with_data(session, path): + path = "/Root/" + path + session.create_table( + path, + ydb.TableDescription() + .with_column(ydb.Column("id", ydb.OptionalType(ydb.PrimitiveType.Uint32))) + .with_column(ydb.Column("number", ydb.OptionalType(ydb.PrimitiveType.Uint64))) + .with_column(ydb.Column("string", ydb.OptionalType(ydb.PrimitiveType.String))) + .with_column(ydb.Column("fixed_point", ydb.OptionalType(ydb.DecimalType()))) + .with_primary_keys("id") + ) + + upsert_simple(session, path) + + +def is_tables_the_same(session, path_left, path_right, check_data=True): + table_desc_left = session.describe_table(path_left) + table_desc_right = session.describe_table(path_right) if ( sorted(table_desc_left.columns, key=lambda x: x.name) != sorted(table_desc_right.columns, key=lambda x: x.name) or table_desc_left.primary_key != table_desc_right.primary_key): - left_cols = columns_to_string(table_desc_left.columns) - left_pk = list_to_string(table_desc_left.primary_key) - right_cols = columns_to_string(table_desc_right.columns) - right_pk = list_to_string(table_desc_right.primary_key) - logging.debug("Tables descriptions (is not the same)!" + - "\npath_left# " + path_left + " has columns# " + left_cols + " primary_key# " + left_pk + - "\npath_right# " + path_right + " has columns# " + right_cols + " primary_key# " + right_pk) - return False - - if not check_data: - return True - - table_it_left = session.read_table(path_left, ordered=True) - table_it_right = session.read_table(path_right, ordered=True) - left_rows = [] - right_rows = [] - processed_rows = 0 - while True: - if len(left_rows) == 0: - try: - left_rows = next(table_it_left).rows - except StopIteration: - if len(right_rows) == 0: - return True - else: - logging.debug(path_left + " is shorter than " + path_right + " processed# " + str(processed_rows) + - " len(right_rows)#" + str(len(right_rows))) - return False - if len(right_rows) == 0: - try: - right_rows = next(table_it_right).rows - except StopIteration: - if len(left_rows) == 0: - return True - else: - logging.debug(path_right + " is shorter than " + path_left + " processed# " + str(processed_rows) + - " len(left_rows)#" + str(len(left_rows))) - return False - - rows_to_process = min(len(left_rows), len(right_rows)) - for i in range(rows_to_process): - if left_rows[i] != right_rows[i]: - logging.debug(str(left_rows[i]) + " != " + str(right_rows[i])) - return False - processed_rows += rows_to_process - left_rows = left_rows[rows_to_process:] - right_rows = right_rows[rows_to_process:] - return True - - -def list_all_dirs(prefix, path=""): - paths = [] - full_path = os.path.join(prefix, path) - logger.debug("prefix# " + prefix + " path# " + path) - for item in os.listdir(full_path): - item_path = os.path.join(full_path, item) - if os.path.isdir(item_path): - paths.append(os.path.join(path, item)) - paths += list_all_dirs(prefix, os.path.join(path, item)) - else: - # don't list regular files - pass - return paths - - + left_cols = columns_to_string(table_desc_left.columns) + left_pk = list_to_string(table_desc_left.primary_key) + right_cols = columns_to_string(table_desc_right.columns) + right_pk = list_to_string(table_desc_right.primary_key) + logging.debug("Tables descriptions (is not the same)!" + + "\npath_left# " + path_left + " has columns# " + left_cols + " primary_key# " + left_pk + + "\npath_right# " + path_right + " has columns# " + right_cols + " primary_key# " + right_pk) + return False + + if not check_data: + return True + + table_it_left = session.read_table(path_left, ordered=True) + table_it_right = session.read_table(path_right, ordered=True) + left_rows = [] + right_rows = [] + processed_rows = 0 + while True: + if len(left_rows) == 0: + try: + left_rows = next(table_it_left).rows + except StopIteration: + if len(right_rows) == 0: + return True + else: + logging.debug(path_left + " is shorter than " + path_right + " processed# " + str(processed_rows) + + " len(right_rows)#" + str(len(right_rows))) + return False + if len(right_rows) == 0: + try: + right_rows = next(table_it_right).rows + except StopIteration: + if len(left_rows) == 0: + return True + else: + logging.debug(path_right + " is shorter than " + path_left + " processed# " + str(processed_rows) + + " len(left_rows)#" + str(len(left_rows))) + return False + + rows_to_process = min(len(left_rows), len(right_rows)) + for i in range(rows_to_process): + if left_rows[i] != right_rows[i]: + logging.debug(str(left_rows[i]) + " != " + str(right_rows[i])) + return False + processed_rows += rows_to_process + left_rows = left_rows[rows_to_process:] + right_rows = right_rows[rows_to_process:] + return True + + +def list_all_dirs(prefix, path=""): + paths = [] + full_path = os.path.join(prefix, path) + logger.debug("prefix# " + prefix + " path# " + path) + for item in os.listdir(full_path): + item_path = os.path.join(full_path, item) + if os.path.isdir(item_path): + paths.append(os.path.join(path, item)) + paths += list_all_dirs(prefix, os.path.join(path, item)) + else: + # don't list regular files + pass + return paths + + class BaseTestBackupInFiles(object): - @classmethod - def setup_class(cls): + @classmethod + def setup_class(cls): cls.cluster = kikimr_cluster_factory() - cls.cluster.start() + cls.cluster.start() cls.root_dir = "/Root" driver_config = ydb.DriverConfig( database="/Root", endpoint="%s:%s" % (cls.cluster.nodes[1].host, cls.cluster.nodes[1].port)) - cls.driver = ydb.Driver(driver_config) - cls.driver.wait(timeout=4) - - @classmethod - def teardown_class(cls): + cls.driver = ydb.Driver(driver_config) + cls.driver.wait(timeout=4) + + @classmethod + def teardown_class(cls): cls.cluster.stop() - - @classmethod - def create_backup(cls, path, expected_dirs, check_data, additional_args=[]): - _, name = os.path.split(path) + + @classmethod + def create_backup(cls, path, expected_dirs, check_data, additional_args=[]): + _, name = os.path.split(path) backup_files_dir = output_path("backup_files_dir_" + path.replace("/", "_")) - execution = yatest_common.execute( - [ - backup_bin(), - "--verbose", + execution = yatest_common.execute( + [ + backup_bin(), + "--verbose", "--endpoint", "grpc://localhost:%d" % cls.cluster.nodes[1].grpc_port, - "--database", "/Root", + "--database", "/Root", "tools", "dump", "--path", os.path.join('/Root', path), "--output", backup_files_dir - ] + - additional_args - ) - + ] + + additional_args + ) + logger.debug("std_out:\n" + execution.std_out.decode('utf-8')) - list_all_dirs(backup_files_dir) - logger.debug("list_all_dirs(backup_files_dir)# " + str(list_all_dirs(backup_files_dir))) - logger.debug("expected_dirs# " + str(expected_dirs)) - - assert_that( - list_all_dirs(backup_files_dir), - has_items(*expected_dirs) - ) - - for _dir in expected_dirs: - if check_data: - assert_that( - os.listdir(backup_files_dir + "/" + _dir), - contains_inanyorder("data_00.csv", "scheme.pb") - ) - else: - assert_that( - os.listdir(backup_files_dir + "/" + _dir), - has_item("scheme.pb") - ) - + list_all_dirs(backup_files_dir) + logger.debug("list_all_dirs(backup_files_dir)# " + str(list_all_dirs(backup_files_dir))) + logger.debug("expected_dirs# " + str(expected_dirs)) + + assert_that( + list_all_dirs(backup_files_dir), + has_items(*expected_dirs) + ) + + for _dir in expected_dirs: + if check_data: + assert_that( + os.listdir(backup_files_dir + "/" + _dir), + contains_inanyorder("data_00.csv", "scheme.pb") + ) + else: + assert_that( + os.listdir(backup_files_dir + "/" + _dir), + has_item("scheme.pb") + ) + class TestBackupSingle(BaseTestBackupInFiles): - def test_single_table_backup(self): - session = self.driver.table_client.session().create() - # Create table - path = "table" - create_table_with_data(session, path) - - # Backup table - self.create_backup(path, [path], False) - - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root").children], + def test_single_table_backup(self): + session = self.driver.table_client.session().create() + # Create table + path = "table" + create_table_with_data(session, path) + + # Backup table + self.create_backup(path, [path], False) + + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root").children], is_(["table", ".sys"]) - ) - + ) + class TestBaseSingleFromDifPlaces(BaseTestBackupInFiles): - def test_single_table_backup_from_different_places(self): - session = self.driver.table_client.session().create() - # Create table - self.driver.scheme_client.make_directory( - '/Root/folder' - ) - self.driver.scheme_client.make_directory( - '/Root/folder/sub_folder' - ) - tables_paths = [ - "first", - "second", - "folder/third", - "folder/fourth", - "folder/sub_folder/fifth", - ] - - for path in tables_paths: - create_table_with_data(session, path) - - # Backup table - for path in tables_paths: - _, table_name = os.path.split(path) - self.create_backup(path, [table_name], True) - - -class TestRecursiveNonConsistent(BaseTestBackupInFiles): - def test_recursive_table_backup_from_different_places(self): - session = self.driver.table_client.session().create() - # Create table - self.driver.scheme_client.make_directory( - '/Root/folder' - ) - self.driver.scheme_client.make_directory( - '/Root/folder/sub_folder' - ) - tables_paths = [ - "first", - "second", - "folder/third", - "folder/fourth", - "folder/sub_folder/fifth", - ] - - for path in tables_paths: - create_table_with_data(session, path) - - # Backup all tables from Root recursively - self.create_backup("/Root", tables_paths, True, ["--consistency-level", "table"]) - - # Backup single table - self.create_backup("first", ["first"], True, ["--consistency-level", "table"]) - self.create_backup("folder/third", ["third"], True, ["--consistency-level", "table"]) - - # Backup tables from folder recursively - tables_paths = [ - "third", - "fourth", - "sub_folder/fifth", - ] - self.create_backup("folder", tables_paths, True, ["--consistency-level", "table"]) - - # Backup table from sub_folder recursively - tables_paths = [ - "fifth", - ] - self.create_backup("folder/sub_folder", tables_paths, True, ["--consistency-level", "table"]) - - -class TestRecursiveSchemeOnly(BaseTestBackupInFiles): - def test_recursive_table_backup_from_different_places(self): - session = self.driver.table_client.session().create() - # Create table - self.driver.scheme_client.make_directory( - '/Root/folder' - ) - self.driver.scheme_client.make_directory( - '/Root/folder/sub_folder' - ) - tables_paths = [ - "first", - "second", - "folder/third", - "folder/fourth", - "folder/sub_folder/fifth", - ] - - for path in tables_paths: - create_table_with_data(session, path) - - # Backup all tables from Root recursively + def test_single_table_backup_from_different_places(self): + session = self.driver.table_client.session().create() + # Create table + self.driver.scheme_client.make_directory( + '/Root/folder' + ) + self.driver.scheme_client.make_directory( + '/Root/folder/sub_folder' + ) + tables_paths = [ + "first", + "second", + "folder/third", + "folder/fourth", + "folder/sub_folder/fifth", + ] + + for path in tables_paths: + create_table_with_data(session, path) + + # Backup table + for path in tables_paths: + _, table_name = os.path.split(path) + self.create_backup(path, [table_name], True) + + +class TestRecursiveNonConsistent(BaseTestBackupInFiles): + def test_recursive_table_backup_from_different_places(self): + session = self.driver.table_client.session().create() + # Create table + self.driver.scheme_client.make_directory( + '/Root/folder' + ) + self.driver.scheme_client.make_directory( + '/Root/folder/sub_folder' + ) + tables_paths = [ + "first", + "second", + "folder/third", + "folder/fourth", + "folder/sub_folder/fifth", + ] + + for path in tables_paths: + create_table_with_data(session, path) + + # Backup all tables from Root recursively + self.create_backup("/Root", tables_paths, True, ["--consistency-level", "table"]) + + # Backup single table + self.create_backup("first", ["first"], True, ["--consistency-level", "table"]) + self.create_backup("folder/third", ["third"], True, ["--consistency-level", "table"]) + + # Backup tables from folder recursively + tables_paths = [ + "third", + "fourth", + "sub_folder/fifth", + ] + self.create_backup("folder", tables_paths, True, ["--consistency-level", "table"]) + + # Backup table from sub_folder recursively + tables_paths = [ + "fifth", + ] + self.create_backup("folder/sub_folder", tables_paths, True, ["--consistency-level", "table"]) + + +class TestRecursiveSchemeOnly(BaseTestBackupInFiles): + def test_recursive_table_backup_from_different_places(self): + session = self.driver.table_client.session().create() + # Create table + self.driver.scheme_client.make_directory( + '/Root/folder' + ) + self.driver.scheme_client.make_directory( + '/Root/folder/sub_folder' + ) + tables_paths = [ + "first", + "second", + "folder/third", + "folder/fourth", + "folder/sub_folder/fifth", + ] + + for path in tables_paths: + create_table_with_data(session, path) + + # Backup all tables from Root recursively self.create_backup("/Root", tables_paths, False, ["--scheme-only"]) - - # Backup single table - self.create_backup("first", ["first"], False, ["--scheme-only"]) - self.create_backup("folder/third", ["third"], False, ["--scheme-only"]) - - # Backup tables from folder recursively - tables_paths = [ - "third", - "fourth", - "sub_folder/fifth", - ] + + # Backup single table + self.create_backup("first", ["first"], False, ["--scheme-only"]) + self.create_backup("folder/third", ["third"], False, ["--scheme-only"]) + + # Backup tables from folder recursively + tables_paths = [ + "third", + "fourth", + "sub_folder/fifth", + ] self.create_backup("folder", tables_paths, False, ["--scheme-only"]) - - # Backup table from sub_folder recursively - tables_paths = [ - "fifth", - ] + + # Backup table from sub_folder recursively + tables_paths = [ + "fifth", + ] self.create_backup("folder/sub_folder", tables_paths, False, ["--scheme-only"]) - - -class TestRecursiveConsistent(BaseTestBackupInFiles): - def test_recursive_table_backup_from_different_places(self): - session = self.driver.table_client.session().create() - # Create table - self.driver.scheme_client.make_directory( - '/Root/folder' - ) - self.driver.scheme_client.make_directory( - '/Root/folder/sub_folder' - ) - tables_paths = [ - "first", - "second", - "folder/third", - "folder/fourth", - "folder/sub_folder/fifth", - ] - - for path in tables_paths: - create_table_with_data(session, path) - - # Backup all tables from Root recursively + + +class TestRecursiveConsistent(BaseTestBackupInFiles): + def test_recursive_table_backup_from_different_places(self): + session = self.driver.table_client.session().create() + # Create table + self.driver.scheme_client.make_directory( + '/Root/folder' + ) + self.driver.scheme_client.make_directory( + '/Root/folder/sub_folder' + ) + tables_paths = [ + "first", + "second", + "folder/third", + "folder/fourth", + "folder/sub_folder/fifth", + ] + + for path in tables_paths: + create_table_with_data(session, path) + + # Backup all tables from Root recursively self.create_backup("/Root", tables_paths, True, ["--consistency-level", "database"]) - - # Backup single table - self.create_backup("first", ["first"], True, ["--consistency-level", "database"]) - self.create_backup("folder/third", ["third"], True, ["--consistency-level", "database"]) - - # Backup tables from folder recursively - tables_paths = [ - "third", - "fourth", - "sub_folder/fifth", - ] - self.create_backup("folder", tables_paths, True, ["--consistency-level", "database"]) - - # Backup table from sub_folder recursively - tables_paths = [ - "fifth", - ] - self.create_backup("folder/sub_folder", tables_paths, True, ["--consistency-level", "database"]) - - + + # Backup single table + self.create_backup("first", ["first"], True, ["--consistency-level", "database"]) + self.create_backup("folder/third", ["third"], True, ["--consistency-level", "database"]) + + # Backup tables from folder recursively + tables_paths = [ + "third", + "fourth", + "sub_folder/fifth", + ] + self.create_backup("folder", tables_paths, True, ["--consistency-level", "database"]) + + # Backup table from sub_folder recursively + tables_paths = [ + "fifth", + ] + self.create_backup("folder/sub_folder", tables_paths, True, ["--consistency-level", "database"]) + + class TestSingleBackupRestore(BaseTestBackupInFiles): - def test_single_table_with_data_backup_restore(self): - self.test_single_table_with_data_backup_restore_impl(False) - self.test_single_table_with_data_backup_restore_impl(True) - - @classmethod - def test_single_table_with_data_backup_restore_impl(self, use_bulk_upsert): - self.driver.scheme_client.make_directory( - '/Root/folder' - ) - postfix = '_bulk_upsert' if use_bulk_upsert else '' - - session = self.driver.table_client.session().create() - - # Create table and fill with data - create_table_with_data(session, "folder/table") - - # Backup table - backup_files_dir = output_path('test_single_table_with_data_backup_restore' + postfix, "backup_files_dir") - yatest_common.execute( - [ - backup_bin(), - "--verbose", + def test_single_table_with_data_backup_restore(self): + self.test_single_table_with_data_backup_restore_impl(False) + self.test_single_table_with_data_backup_restore_impl(True) + + @classmethod + def test_single_table_with_data_backup_restore_impl(self, use_bulk_upsert): + self.driver.scheme_client.make_directory( + '/Root/folder' + ) + postfix = '_bulk_upsert' if use_bulk_upsert else '' + + session = self.driver.table_client.session().create() + + # Create table and fill with data + create_table_with_data(session, "folder/table") + + # Backup table + backup_files_dir = output_path('test_single_table_with_data_backup_restore' + postfix, "backup_files_dir") + yatest_common.execute( + [ + backup_bin(), + "--verbose", "--endpoint", "grpc://localhost:%d" % self.cluster.nodes[1].grpc_port, - "--database", "/Root", + "--database", "/Root", "tools", "dump", "--path", "/Root/folder", "--output", backup_files_dir - ] - ) - assert_that( - os.listdir(backup_files_dir), - is_(["table"]) - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root").children], + ] + ) + assert_that( + os.listdir(backup_files_dir), + is_(["table"]) + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root").children], is_(["folder", ".sys"]) - ) - - # Restore table - restore_cmd = [ - backup_bin(), - "--verbose", + ) + + # Restore table + restore_cmd = [ + backup_bin(), + "--verbose", "--endpoint", "grpc://localhost:%d" % self.cluster.nodes[1].grpc_port, - "--database", "/Root", - "tools", "restore", - "--path", "/Root/restored" + postfix, - "--input", backup_files_dir - ] - if use_bulk_upsert: - restore_cmd.append("--bulk-upsert") - yatest_common.execute(restore_cmd) - - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root").children], + "--database", "/Root", + "tools", "restore", + "--path", "/Root/restored" + postfix, + "--input", backup_files_dir + ] + if use_bulk_upsert: + restore_cmd.append("--bulk-upsert") + yatest_common.execute(restore_cmd) + + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root").children], contains_inanyorder("folder", "restored" + postfix, ".sys") - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root/restored" + postfix).children], - is_(["table"]) - ) - assert_that( - is_tables_the_same(session, "/Root/folder/table", "/Root/restored" + postfix + "/table"), - is_(True) - ) - session.drop_table("/Root/restored" + postfix + "/table") - self.driver.scheme_client.remove_directory("/Root/restored" + postfix) - - -class TestBackupRestoreInRoot(BaseTestBackupInFiles): - def test_table_backup_restore_in_root(self): - self.driver.scheme_client.make_directory( - '/Root/folder' - ) - - session = self.driver.table_client.session().create() - - # Create table and fill with data - create_table_with_data(session, "folder/table") - - # Backup table - backup_files_dir = output_path('test_single_table_with_data_backup_restore', "backup_files_dir") - yatest_common.execute( - [ - backup_bin(), - "--verbose", + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root/restored" + postfix).children], + is_(["table"]) + ) + assert_that( + is_tables_the_same(session, "/Root/folder/table", "/Root/restored" + postfix + "/table"), + is_(True) + ) + session.drop_table("/Root/restored" + postfix + "/table") + self.driver.scheme_client.remove_directory("/Root/restored" + postfix) + + +class TestBackupRestoreInRoot(BaseTestBackupInFiles): + def test_table_backup_restore_in_root(self): + self.driver.scheme_client.make_directory( + '/Root/folder' + ) + + session = self.driver.table_client.session().create() + + # Create table and fill with data + create_table_with_data(session, "folder/table") + + # Backup table + backup_files_dir = output_path('test_single_table_with_data_backup_restore', "backup_files_dir") + yatest_common.execute( + [ + backup_bin(), + "--verbose", "--endpoint", "grpc://localhost:%d" % self.cluster.nodes[1].grpc_port, - "--database", "/Root", - "tools", "dump", - "--path", "/Root/folder", - "--output", backup_files_dir - ] - ) - assert_that( - os.listdir(backup_files_dir), - is_(["table"]) - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root").children], + "--database", "/Root", + "tools", "dump", + "--path", "/Root/folder", + "--output", backup_files_dir + ] + ) + assert_that( + os.listdir(backup_files_dir), + is_(["table"]) + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root").children], is_(["folder", ".sys"]) - ) - - # Restore table - yatest_common.execute( - [ - backup_bin(), - "--verbose", + ) + + # Restore table + yatest_common.execute( + [ + backup_bin(), + "--verbose", "--endpoint", "grpc://localhost:%d" % self.cluster.nodes[1].grpc_port, - "--database", "/Root", - "tools", "restore", - "--path", "/Root/", - "--input", backup_files_dir - ] - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root").children], + "--database", "/Root", + "tools", "restore", + "--path", "/Root/", + "--input", backup_files_dir + ] + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root").children], contains_inanyorder("folder", "table", ".sys") - ) - assert_that( - is_tables_the_same(session, "/Root/folder/table", "/Root/table"), - is_(True) - ) - - -class TestBackupRestoreInRootSchemeOnly(BaseTestBackupInFiles): - def test_table_backup_restore_in_root_scheme_only(self): - self.driver.scheme_client.make_directory( - '/Root/folder' - ) - - session = self.driver.table_client.session().create() - - # Create table and fill with data - create_table_with_data(session, "folder/table") - - # Backup table - backup_files_dir = output_path('test_single_table_with_data_backup_restore', "backup_files_dir") - yatest_common.execute( - [ - backup_bin(), - "--verbose", + ) + assert_that( + is_tables_the_same(session, "/Root/folder/table", "/Root/table"), + is_(True) + ) + + +class TestBackupRestoreInRootSchemeOnly(BaseTestBackupInFiles): + def test_table_backup_restore_in_root_scheme_only(self): + self.driver.scheme_client.make_directory( + '/Root/folder' + ) + + session = self.driver.table_client.session().create() + + # Create table and fill with data + create_table_with_data(session, "folder/table") + + # Backup table + backup_files_dir = output_path('test_single_table_with_data_backup_restore', "backup_files_dir") + yatest_common.execute( + [ + backup_bin(), + "--verbose", "--endpoint", "grpc://localhost:%d" % self.cluster.nodes[1].grpc_port, - "--database", "/Root", - "tools", "dump", - "--scheme-only", - "--path", "/Root/folder", - "--output", backup_files_dir - ] - ) - assert_that( - os.listdir(backup_files_dir), - is_(["table"]) - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root").children], + "--database", "/Root", + "tools", "dump", + "--scheme-only", + "--path", "/Root/folder", + "--output", backup_files_dir + ] + ) + assert_that( + os.listdir(backup_files_dir), + is_(["table"]) + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root").children], is_(["folder", ".sys"]) - ) - - # Restore table - yatest_common.execute( - [ - backup_bin(), - "--verbose", + ) + + # Restore table + yatest_common.execute( + [ + backup_bin(), + "--verbose", "--endpoint", "grpc://localhost:%d" % self.cluster.nodes[1].grpc_port, - "--database", "/Root", - "tools", "restore", - "--path", "/Root/", - "--input", backup_files_dir - ] - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root").children], + "--database", "/Root", + "tools", "restore", + "--path", "/Root/", + "--input", backup_files_dir + ] + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root").children], contains_inanyorder("folder", "table", ".sys") - ) - assert_that( - is_tables_the_same(session, "/Root/folder/table", "/Root/table", False), - is_(True) - ) - - + ) + assert_that( + is_tables_the_same(session, "/Root/folder/table", "/Root/table", False), + is_(True) + ) + + class TestIncompleteBackup(BaseTestBackupInFiles): - def test_incomplete_backup_will_not_be_restored(self): - self.driver.scheme_client.make_directory( - '/Root/folder' - ) - - session = self.driver.table_client.session().create() - - create_table_with_data(session, "folder/table") - - # Backup table + def test_incomplete_backup_will_not_be_restored(self): + self.driver.scheme_client.make_directory( + '/Root/folder' + ) + + session = self.driver.table_client.session().create() + + create_table_with_data(session, "folder/table") + + # Backup table backup_files_dir = output_path("backup_files_dir") - yatest_common.execute( - [ - backup_bin(), - "--verbose", + yatest_common.execute( + [ + backup_bin(), + "--verbose", "--endpoint", "grpc://localhost:%d" % self.cluster.nodes[1].grpc_port, - "--database", "/Root", + "--database", "/Root", 'tools', 'dump', "--path", '/Root/folder', "--output", backup_files_dir - ] - ) - assert_that( - os.listdir(backup_files_dir), - is_(["table"]) - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root").children], + ] + ) + assert_that( + os.listdir(backup_files_dir), + is_(["table"]) + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root").children], is_(["folder", ".sys"]) - ) - - # Create "incomplete" file in folder with backup files - open(os.path.join(backup_files_dir, "incomplete"), "w").close() - open(os.path.join(backup_files_dir, "table", "incomplete"), "w").close() - - # Restore table and check that it fails without restoring anything - execution = yatest_common.execute( - [ - backup_bin(), - "--verbose", + ) + + # Create "incomplete" file in folder with backup files + open(os.path.join(backup_files_dir, "incomplete"), "w").close() + open(os.path.join(backup_files_dir, "table", "incomplete"), "w").close() + + # Restore table and check that it fails without restoring anything + execution = yatest_common.execute( + [ + backup_bin(), + "--verbose", "--endpoint", "grpc://localhost:%d" % self.cluster.nodes[1].grpc_port, - "--database", "/Root", + "--database", "/Root", 'tools', 'restore', "--path", "/Root/restored", "--input", backup_files_dir - ], - check_exit_code=False - ) - assert_that( - execution.exit_code, - is_not(0) - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root").children], + ], + check_exit_code=False + ) + assert_that( + execution.exit_code, + is_not(0) + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root").children], is_(["folder", ".sys"]) - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root/folder").children], - is_(["table"]) - ) - - execution = yatest_common.execute( - [ - backup_bin(), - "--verbose", - "--endpoint", "localhost:%d" % self.cluster.nodes[1].grpc_port, + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root/folder").children], + is_(["table"]) + ) + + execution = yatest_common.execute( + [ + backup_bin(), + "--verbose", + "--endpoint", "localhost:%d" % self.cluster.nodes[1].grpc_port, "--database", "/Root" 'tools', 'restore', "--path", "/Root/restored", "--input", os.path.join(backup_files_dir, "table") - ], - check_exit_code=False - ) - - assert_that( - execution.exit_code, - is_not(0) - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root").children], + ], + check_exit_code=False + ) + + assert_that( + execution.exit_code, + is_not(0) + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root").children], is_(["folder", ".sys"]) - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root/folder").children], - is_(["table"]) - ) - - -class TestAlterBackupRestore(BaseTestBackupInFiles): - def test_alter_table_with_data_backup_restore(self): - self.driver.scheme_client.make_directory( - '/Root/folder' - ) - - session = self.driver.table_client.session().create() - - # Create table and fill with data - path = "/Root/folder/table" - session.create_table( - path, - ydb.TableDescription() - .with_column(ydb.Column("a", ydb.OptionalType(ydb.PrimitiveType.Uint32))) - .with_column(ydb.Column("b", ydb.OptionalType(ydb.PrimitiveType.String))) - .with_column(ydb.Column("c", ydb.OptionalType(ydb.PrimitiveType.Uint32))) - .with_column(ydb.Column("d", ydb.OptionalType(ydb.PrimitiveType.String))) - .with_column(ydb.Column("e", ydb.OptionalType(ydb.PrimitiveType.Uint32))) - .with_column(ydb.Column("f", ydb.OptionalType(ydb.PrimitiveType.String))) - .with_column(ydb.Column("g", ydb.OptionalType(ydb.PrimitiveType.Uint32))) - .with_column(ydb.Column("h", ydb.OptionalType(ydb.PrimitiveType.String))) - .with_primary_keys("a") - ) - prefix, table = os.path.split(path) - session.transaction().execute( - """ - PRAGMA TablePathPrefix("{0}"); - UPSERT INTO {1} (a, b, c, d, e, f, g, h) VALUES (5, "b", 5, "b", 5, "b", 5, "b"); - """.format(prefix, table), - commit_tx=True, - ) - session.alter_table( - path, - [], - ['b'] - ) - - # Backup table - backup_files_dir = output_path('test_single_table_with_data_backup_restore', "backup_files_dir") - yatest_common.execute( - [ - backup_bin(), - "--verbose", + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root/folder").children], + is_(["table"]) + ) + + +class TestAlterBackupRestore(BaseTestBackupInFiles): + def test_alter_table_with_data_backup_restore(self): + self.driver.scheme_client.make_directory( + '/Root/folder' + ) + + session = self.driver.table_client.session().create() + + # Create table and fill with data + path = "/Root/folder/table" + session.create_table( + path, + ydb.TableDescription() + .with_column(ydb.Column("a", ydb.OptionalType(ydb.PrimitiveType.Uint32))) + .with_column(ydb.Column("b", ydb.OptionalType(ydb.PrimitiveType.String))) + .with_column(ydb.Column("c", ydb.OptionalType(ydb.PrimitiveType.Uint32))) + .with_column(ydb.Column("d", ydb.OptionalType(ydb.PrimitiveType.String))) + .with_column(ydb.Column("e", ydb.OptionalType(ydb.PrimitiveType.Uint32))) + .with_column(ydb.Column("f", ydb.OptionalType(ydb.PrimitiveType.String))) + .with_column(ydb.Column("g", ydb.OptionalType(ydb.PrimitiveType.Uint32))) + .with_column(ydb.Column("h", ydb.OptionalType(ydb.PrimitiveType.String))) + .with_primary_keys("a") + ) + prefix, table = os.path.split(path) + session.transaction().execute( + """ + PRAGMA TablePathPrefix("{0}"); + UPSERT INTO {1} (a, b, c, d, e, f, g, h) VALUES (5, "b", 5, "b", 5, "b", 5, "b"); + """.format(prefix, table), + commit_tx=True, + ) + session.alter_table( + path, + [], + ['b'] + ) + + # Backup table + backup_files_dir = output_path('test_single_table_with_data_backup_restore', "backup_files_dir") + yatest_common.execute( + [ + backup_bin(), + "--verbose", "--endpoint", "grpc://localhost:%d" % self.cluster.nodes[1].grpc_port, - "--database", "/Root", - "tools", "dump", - "--path", "/Root/folder", - "--output", backup_files_dir - ] - ) - assert_that( - os.listdir(backup_files_dir), - is_(["table"]) - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root").children], + "--database", "/Root", + "tools", "dump", + "--path", "/Root/folder", + "--output", backup_files_dir + ] + ) + assert_that( + os.listdir(backup_files_dir), + is_(["table"]) + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root").children], is_(["folder", ".sys"]) - ) - - # Restore table - yatest_common.execute( - [ - backup_bin(), - "--verbose", + ) + + # Restore table + yatest_common.execute( + [ + backup_bin(), + "--verbose", "--endpoint", "grpc://localhost:%d" % self.cluster.nodes[1].grpc_port, - "--database", "/Root", - "tools", "restore", - "--path", "/Root/restored", - "--input", backup_files_dir - ] - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root").children], + "--database", "/Root", + "tools", "restore", + "--path", "/Root/restored", + "--input", backup_files_dir + ] + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root").children], contains_inanyorder("folder", "restored", ".sys") - ) - assert_that( - [child.name for child in self.driver.scheme_client.list_directory("/Root/restored").children], - is_(["table"]) - ) - assert_that( - is_tables_the_same(session, "/Root/folder/table", "/Root/restored/table"), - is_(True) - ) + ) + assert_that( + [child.name for child in self.driver.scheme_client.list_directory("/Root/restored").children], + is_(["table"]) + ) + assert_that( + is_tables_the_same(session, "/Root/folder/table", "/Root/restored/table"), + is_(True) + ) diff --git a/ydb/tests/functional/ydb_cli/ya.make b/ydb/tests/functional/ydb_cli/ya.make index c70c576bf9..cf8d4dae48 100644 --- a/ydb/tests/functional/ydb_cli/ya.make +++ b/ydb/tests/functional/ydb_cli/ya.make @@ -1,27 +1,27 @@ -OWNER(g:kikimr va-kuznecov) +OWNER(g:kikimr va-kuznecov) PY3TEST() - -TEST_SRCS( - test_ydb_backup.py + +TEST_SRCS( + test_ydb_backup.py test_ydb_table.py test_ydb_scripting.py -) - +) + ENV(YDB_TOKEN="root@builtin") ENV(YDB_DRIVER_BINARY="ydb/apps/ydbd/ydbd") -TIMEOUT(600) -SIZE(MEDIUM) - -DEPENDS( +TIMEOUT(600) +SIZE(MEDIUM) + +DEPENDS( ydb/apps/ydbd ydb/apps/ydb -) - -PEERDIR( +) + +PEERDIR( ydb/tests/library -) - -FORK_SUBTESTS() -FORK_TEST_FILES() - -END() +) + +FORK_SUBTESTS() +FORK_TEST_FILES() + +END() diff --git a/ydb/tests/library/harness/kikimr_client.py b/ydb/tests/library/harness/kikimr_client.py index a9a2f07ced..d74383ae23 100644 --- a/ydb/tests/library/harness/kikimr_client.py +++ b/ydb/tests/library/harness/kikimr_client.py @@ -94,13 +94,13 @@ class KiKiMRMessageBusClient(object): def close(self): self._channel.close() - def update_self_heal(self, enable, domain=1): - request = msgbus.TBlobStorageConfigRequest() - request.Domain = domain - command = request.Request.Command.add() - command.EnableSelfHeal.Enable = enable - return self.send(request, 'BlobStorageConfig') - + def update_self_heal(self, enable, domain=1): + request = msgbus.TBlobStorageConfigRequest() + request.Domain = domain + command = request.Request.Command.add() + command.EnableSelfHeal.Enable = enable + return self.send(request, 'BlobStorageConfig') + def read_drive_status(self, hostname, interconnect_port, drive_path=None, domain=1): request = msgbus.TBlobStorageConfigRequest() request.Domain = domain @@ -237,7 +237,7 @@ class KiKiMRMessageBusClient(object): if tablet.allowed_node_ids: create_tablet_cmd.AllowedNodeIDs.extend(tablet.allowed_node_ids) - request.DomainUid = self.__domain_id + request.DomainUid = self.__domain_id return self.invoke( request, 'HiveCreateTablet' |