diff options
author | dakovalkov <dakovalkov@yandex-team.com> | 2023-12-03 13:33:55 +0300 |
---|---|---|
committer | dakovalkov <dakovalkov@yandex-team.com> | 2023-12-03 14:04:39 +0300 |
commit | 2a718325637e5302334b6d0a6430f63168f8dbb3 (patch) | |
tree | 64be81080b7df9ec1d86d053a0c394ae53fcf1fe /contrib/restricted/aws/aws-c-compression/source | |
parent | e0d94a470142d95c3007e9c5d80380994940664a (diff) | |
download | ydb-2a718325637e5302334b6d0a6430f63168f8dbb3.tar.gz |
Update contrib/libs/aws-sdk-cpp to 1.11.37
Diffstat (limited to 'contrib/restricted/aws/aws-c-compression/source')
3 files changed, 502 insertions, 0 deletions
diff --git a/contrib/restricted/aws/aws-c-compression/source/compression.c b/contrib/restricted/aws/aws-c-compression/source/compression.c new file mode 100644 index 0000000000..52777c0eeb --- /dev/null +++ b/contrib/restricted/aws/aws-c-compression/source/compression.c @@ -0,0 +1,44 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +#include <aws/compression/compression.h> + +#define DEFINE_ERROR_INFO(CODE, STR) \ + [(CODE)-AWS_ERROR_ENUM_BEGIN_RANGE(AWS_C_COMPRESSION_PACKAGE_ID)] = \ + AWS_DEFINE_ERROR_INFO(CODE, STR, "aws-c-compression") + +/* clang-format off */ +static struct aws_error_info s_errors[] = { + DEFINE_ERROR_INFO( + AWS_ERROR_COMPRESSION_UNKNOWN_SYMBOL, + "Compression encountered an unknown symbol."), +}; +/* clang-format on */ + +static struct aws_error_info_list s_error_list = { + .error_list = s_errors, + .count = AWS_ARRAY_SIZE(s_errors), +}; + +static bool s_library_initialized = false; +void aws_compression_library_init(struct aws_allocator *alloc) { + if (s_library_initialized) { + return; + } + s_library_initialized = true; + + aws_common_library_init(alloc); + aws_register_error_info(&s_error_list); +} + +void aws_compression_library_clean_up(void) { + if (!s_library_initialized) { + return; + } + s_library_initialized = false; + + aws_unregister_error_info(&s_error_list); + aws_common_library_clean_up(); +} diff --git a/contrib/restricted/aws/aws-c-compression/source/huffman.c b/contrib/restricted/aws/aws-c-compression/source/huffman.c new file mode 100644 index 0000000000..074c9f4ed5 --- /dev/null +++ b/contrib/restricted/aws/aws-c-compression/source/huffman.c @@ -0,0 +1,285 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +#include <aws/compression/huffman.h> + +#define BITSIZEOF(val) (sizeof(val) * 8) + +static uint8_t MAX_PATTERN_BITS = BITSIZEOF(((struct aws_huffman_code *)0)->pattern); + +void aws_huffman_encoder_init(struct aws_huffman_encoder *encoder, struct aws_huffman_symbol_coder *coder) { + + AWS_ASSERT(encoder); + AWS_ASSERT(coder); + + AWS_ZERO_STRUCT(*encoder); + encoder->coder = coder; + encoder->eos_padding = UINT8_MAX; +} + +void aws_huffman_encoder_reset(struct aws_huffman_encoder *encoder) { + + AWS_ASSERT(encoder); + + AWS_ZERO_STRUCT(encoder->overflow_bits); +} + +void aws_huffman_decoder_init(struct aws_huffman_decoder *decoder, struct aws_huffman_symbol_coder *coder) { + + AWS_ASSERT(decoder); + AWS_ASSERT(coder); + + AWS_ZERO_STRUCT(*decoder); + decoder->coder = coder; +} + +void aws_huffman_decoder_reset(struct aws_huffman_decoder *decoder) { + + decoder->working_bits = 0; + decoder->num_bits = 0; +} + +void aws_huffman_decoder_allow_growth(struct aws_huffman_decoder *decoder, bool allow_growth) { + decoder->allow_growth = allow_growth; +} + +/* Much of encode is written in a helper function, + so this struct helps avoid passing all the parameters through by hand */ +struct encoder_state { + struct aws_huffman_encoder *encoder; + struct aws_byte_buf *output_buf; + uint8_t working; + uint8_t bit_pos; +}; + +/* Helper function to write a single bit_pattern to memory (or working_bits if + * out of buffer space) */ +static int encode_write_bit_pattern(struct encoder_state *state, struct aws_huffman_code bit_pattern) { + AWS_PRECONDITION(state->output_buf->len < state->output_buf->capacity); + + if (bit_pattern.num_bits == 0) { + return aws_raise_error(AWS_ERROR_COMPRESSION_UNKNOWN_SYMBOL); + } + + uint8_t bits_to_write = bit_pattern.num_bits; + while (bits_to_write > 0) { + uint8_t bits_for_current = bits_to_write > state->bit_pos ? state->bit_pos : bits_to_write; + /* Chop off the top 0s and bits that have already been read */ + uint8_t bits_to_cut = + (BITSIZEOF(bit_pattern.pattern) - bit_pattern.num_bits) + (bit_pattern.num_bits - bits_to_write); + + /* Write the appropiate number of bits to this byte + Shift to the left to cut any unneeded bits + Shift to the right to position the bits correctly */ + state->working |= (bit_pattern.pattern << bits_to_cut) >> (MAX_PATTERN_BITS - state->bit_pos); + + bits_to_write -= bits_for_current; + state->bit_pos -= bits_for_current; + + if (state->bit_pos == 0) { + /* Save the whole byte */ + aws_byte_buf_write_u8(state->output_buf, state->working); + + state->bit_pos = 8; + state->working = 0; + + if (state->output_buf->len == state->output_buf->capacity) { + state->encoder->overflow_bits.num_bits = bits_to_write; + + if (bits_to_write) { + /* If buffer is full and there are remaining bits, save them to overflow and return */ + bits_to_cut += bits_for_current; + + state->encoder->overflow_bits.pattern = + (bit_pattern.pattern << bits_to_cut) >> (MAX_PATTERN_BITS - bits_to_write); + + return aws_raise_error(AWS_ERROR_SHORT_BUFFER); + } + } + } + } + + return AWS_OP_SUCCESS; +} + +size_t aws_huffman_get_encoded_length(struct aws_huffman_encoder *encoder, struct aws_byte_cursor to_encode) { + + AWS_PRECONDITION(encoder); + AWS_PRECONDITION(aws_byte_cursor_is_valid(&to_encode)); + + size_t num_bits = 0; + + while (to_encode.len) { + uint8_t new_byte = 0; + aws_byte_cursor_read_u8(&to_encode, &new_byte); + struct aws_huffman_code code_point = encoder->coder->encode(new_byte, encoder->coder->userdata); + num_bits += code_point.num_bits; + } + + size_t length = num_bits / 8; + + /* Round up */ + if (num_bits % 8) { + ++length; + } + + return length; +} + +int aws_huffman_encode( + struct aws_huffman_encoder *encoder, + struct aws_byte_cursor *to_encode, + struct aws_byte_buf *output) { + + AWS_ASSERT(encoder); + AWS_ASSERT(encoder->coder); + AWS_ASSERT(to_encode); + AWS_ASSERT(output); + + struct encoder_state state = { + .working = 0, + .bit_pos = 8, + }; + state.encoder = encoder; + state.output_buf = output; + + /* Write any bits leftover from previous invocation */ + if (encoder->overflow_bits.num_bits) { + if (output->len == output->capacity) { + return aws_raise_error(AWS_ERROR_SHORT_BUFFER); + } + + if (encode_write_bit_pattern(&state, encoder->overflow_bits)) { + return AWS_OP_ERR; + } + + encoder->overflow_bits.num_bits = 0; + } + + while (to_encode->len) { + if (output->len == output->capacity) { + return aws_raise_error(AWS_ERROR_SHORT_BUFFER); + } + + uint8_t new_byte = 0; + aws_byte_cursor_read_u8(to_encode, &new_byte); + struct aws_huffman_code code_point = encoder->coder->encode(new_byte, encoder->coder->userdata); + + if (encode_write_bit_pattern(&state, code_point)) { + return AWS_OP_ERR; + } + } + + /* The following code only runs when the buffer has written successfully */ + + /* If whole buffer processed, write EOS */ + if (state.bit_pos != 8) { + struct aws_huffman_code eos_cp; + eos_cp.pattern = encoder->eos_padding; + eos_cp.num_bits = state.bit_pos; + encode_write_bit_pattern(&state, eos_cp); + AWS_ASSERT(state.bit_pos == 8); + } + + return AWS_OP_SUCCESS; +} + +/* Decode's reading is written in a helper function, + so this struct helps avoid passing all the parameters through by hand */ +struct huffman_decoder_state { + struct aws_huffman_decoder *decoder; + struct aws_byte_cursor *input_cursor; +}; + +static void decode_fill_working_bits(struct huffman_decoder_state *state) { + + /* Read from bytes in the buffer until there are enough bytes to process */ + while (state->decoder->num_bits < MAX_PATTERN_BITS && state->input_cursor->len) { + + /* Read the appropiate number of bits from this byte */ + uint8_t new_byte = 0; + aws_byte_cursor_read_u8(state->input_cursor, &new_byte); + + uint64_t positioned = ((uint64_t)new_byte) + << (BITSIZEOF(state->decoder->working_bits) - 8 - state->decoder->num_bits); + state->decoder->working_bits |= positioned; + + state->decoder->num_bits += 8; + } +} + +int aws_huffman_decode( + struct aws_huffman_decoder *decoder, + struct aws_byte_cursor *to_decode, + struct aws_byte_buf *output) { + + AWS_ASSERT(decoder); + AWS_ASSERT(decoder->coder); + AWS_ASSERT(to_decode); + AWS_ASSERT(output); + + struct huffman_decoder_state state; + state.decoder = decoder; + state.input_cursor = to_decode; + + /* Measures how much of the input was read */ + size_t bits_left = decoder->num_bits + to_decode->len * 8; + + while (1) { + + decode_fill_working_bits(&state); + + uint8_t symbol; + uint8_t bits_read = decoder->coder->decode( + (uint32_t)(decoder->working_bits >> (BITSIZEOF(decoder->working_bits) - MAX_PATTERN_BITS)), + &symbol, + decoder->coder->userdata); + + if (bits_read == 0) { + if (bits_left < MAX_PATTERN_BITS) { + /* More input is needed to continue */ + return AWS_OP_SUCCESS; + } + /* Unknown symbol found */ + return aws_raise_error(AWS_ERROR_COMPRESSION_UNKNOWN_SYMBOL); + } + if (bits_read > bits_left) { + /* Check if the buffer has been overrun. + Note: because of the check in decode_fill_working_bits, + the buffer won't actually overrun, instead there will + be 0's in the bottom of working_bits. */ + + return AWS_OP_SUCCESS; + } + + if (output->len == output->capacity) { + /* Check if we've hit the end of the output buffer. + * Grow buffer, or raise error, depending on settings */ + if (decoder->allow_growth) { + /* Double the capacity */ + if (aws_byte_buf_reserve_relative(output, output->capacity)) { + return AWS_OP_ERR; + } + } else { + return aws_raise_error(AWS_ERROR_SHORT_BUFFER); + } + } + + bits_left -= bits_read; + decoder->working_bits <<= bits_read; + decoder->num_bits -= bits_read; + + /* Store the found symbol */ + aws_byte_buf_write_u8(output, symbol); + + /* Successfully decoded whole buffer */ + if (bits_left == 0) { + return AWS_OP_SUCCESS; + } + } + + /* This case is unreachable */ + AWS_ASSERT(0); +} diff --git a/contrib/restricted/aws/aws-c-compression/source/huffman_testing.c b/contrib/restricted/aws/aws-c-compression/source/huffman_testing.c new file mode 100644 index 0000000000..67d17a2c2c --- /dev/null +++ b/contrib/restricted/aws/aws-c-compression/source/huffman_testing.c @@ -0,0 +1,173 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +/** + * See aws/testing/compression/huffman.h for docs. + */ +#define AWS_UNSTABLE_TESTING_API +#include <aws/compression/private/huffman_testing.h> + +#include <aws/common/byte_buf.h> +#include <aws/common/common.h> + +int huffman_test_transitive( + struct aws_huffman_symbol_coder *coder, + const char *input, + size_t size, + size_t encoded_size, + const char **error_string) { + + struct aws_huffman_encoder encoder; + aws_huffman_encoder_init(&encoder, coder); + struct aws_huffman_decoder decoder; + aws_huffman_decoder_init(&decoder, coder); + + const size_t intermediate_buffer_size = size * 2; + AWS_VARIABLE_LENGTH_ARRAY(uint8_t, intermediate_buffer, intermediate_buffer_size); + memset(intermediate_buffer, 0, intermediate_buffer_size); + AWS_VARIABLE_LENGTH_ARRAY(char, output_buffer, size); + memset(output_buffer, 0, size); + + struct aws_byte_cursor to_encode = aws_byte_cursor_from_array((uint8_t *)input, size); + struct aws_byte_buf intermediate_buf = aws_byte_buf_from_empty_array(intermediate_buffer, intermediate_buffer_size); + struct aws_byte_buf output_buf = aws_byte_buf_from_empty_array(output_buffer, size); + + int result = aws_huffman_encode(&encoder, &to_encode, &intermediate_buf); + + if (result != AWS_OP_SUCCESS) { + *error_string = "aws_huffman_encode failed"; + return AWS_OP_ERR; + } + if (to_encode.len != 0) { + *error_string = "not all data encoded"; + return AWS_OP_ERR; + } + if (encoded_size && intermediate_buf.len != encoded_size) { + *error_string = "encoded length is incorrect"; + return AWS_OP_ERR; + } + + struct aws_byte_cursor intermediate_cur = aws_byte_cursor_from_buf(&intermediate_buf); + result = aws_huffman_decode(&decoder, &intermediate_cur, &output_buf); + + if (result != AWS_OP_SUCCESS) { + *error_string = "aws_huffman_decode failed"; + return AWS_OP_ERR; + } + if (intermediate_cur.len != 0) { + *error_string = "not all encoded data was decoded"; + return AWS_OP_ERR; + } + if (output_buf.len != size) { + *error_string = "decode output size incorrect"; + return AWS_OP_ERR; + } + if (memcmp(input, output_buffer, size) != 0) { + *error_string = "decoded data does not match input data"; + return AWS_OP_ERR; + } + + return AWS_OP_SUCCESS; +} + +int huffman_test_transitive_chunked( + struct aws_huffman_symbol_coder *coder, + const char *input, + size_t size, + size_t encoded_size, + size_t output_chunk_size, + const char **error_string) { + + struct aws_huffman_encoder encoder; + aws_huffman_encoder_init(&encoder, coder); + struct aws_huffman_decoder decoder; + aws_huffman_decoder_init(&decoder, coder); + + const size_t intermediate_buffer_size = size * 2; + AWS_VARIABLE_LENGTH_ARRAY(uint8_t, intermediate_buffer, intermediate_buffer_size); + memset(intermediate_buffer, 0, intermediate_buffer_size); + AWS_VARIABLE_LENGTH_ARRAY(char, output_buffer, size); + memset(output_buffer, 0, size); + + struct aws_byte_cursor to_encode = aws_byte_cursor_from_array(input, size); + struct aws_byte_buf intermediate_buf = aws_byte_buf_from_empty_array(intermediate_buffer, (size_t)-1); + intermediate_buf.capacity = 0; + struct aws_byte_buf output_buf = aws_byte_buf_from_empty_array(output_buffer, (size_t)-1); + output_buf.capacity = 0; + + int result = AWS_OP_SUCCESS; + + { + do { + const size_t previous_intermediate_len = intermediate_buf.len; + + intermediate_buf.capacity += output_chunk_size; + result = aws_huffman_encode(&encoder, &to_encode, &intermediate_buf); + + if (intermediate_buf.len == previous_intermediate_len) { + *error_string = "encode didn't write any data"; + return AWS_OP_ERR; + } + + if (result != AWS_OP_SUCCESS && aws_last_error() != AWS_ERROR_SHORT_BUFFER) { + *error_string = "encode returned wrong error code"; + return AWS_OP_ERR; + } + } while (result != AWS_OP_SUCCESS); + } + + if (result != AWS_OP_SUCCESS) { + *error_string = "aws_huffman_encode failed"; + return AWS_OP_ERR; + } + if (intermediate_buf.len > intermediate_buffer_size) { + *error_string = "too much data encoded"; + return AWS_OP_ERR; + } + if (encoded_size && intermediate_buf.len != encoded_size) { + *error_string = "encoded length is incorrect"; + return AWS_OP_ERR; + } + + struct aws_byte_cursor intermediate_cur = aws_byte_cursor_from_buf(&intermediate_buf); + + { + do { + const size_t previous_output_len = output_buf.len; + + output_buf.capacity += output_chunk_size; + if (output_buf.capacity > size) { + output_buf.capacity = size; + } + + result = aws_huffman_decode(&decoder, &intermediate_cur, &output_buf); + + if (output_buf.len == previous_output_len) { + *error_string = "decode didn't write any data"; + return AWS_OP_ERR; + } + + if (result != AWS_OP_SUCCESS && aws_last_error() != AWS_ERROR_SHORT_BUFFER) { + *error_string = "decode returned wrong error code"; + return AWS_OP_ERR; + } + } while (result != AWS_OP_SUCCESS); + } + + if (result != AWS_OP_SUCCESS) { + *error_string = "aws_huffman_decode failed"; + return AWS_OP_ERR; + } + if (output_buf.len != size) { + *error_string = "decode output size incorrect"; + return AWS_OP_ERR; + } + if (memcmp(input, output_buffer, size) != 0) { + *error_string = "decoded data does not match input data"; + return AWS_OP_ERR; + } + + return AWS_OP_SUCCESS; +} |