aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/restricted/aws/aws-c-compression/source
diff options
context:
space:
mode:
authordakovalkov <dakovalkov@yandex-team.com>2023-12-03 13:33:55 +0300
committerdakovalkov <dakovalkov@yandex-team.com>2023-12-03 14:04:39 +0300
commit2a718325637e5302334b6d0a6430f63168f8dbb3 (patch)
tree64be81080b7df9ec1d86d053a0c394ae53fcf1fe /contrib/restricted/aws/aws-c-compression/source
parente0d94a470142d95c3007e9c5d80380994940664a (diff)
downloadydb-2a718325637e5302334b6d0a6430f63168f8dbb3.tar.gz
Update contrib/libs/aws-sdk-cpp to 1.11.37
Diffstat (limited to 'contrib/restricted/aws/aws-c-compression/source')
-rw-r--r--contrib/restricted/aws/aws-c-compression/source/compression.c44
-rw-r--r--contrib/restricted/aws/aws-c-compression/source/huffman.c285
-rw-r--r--contrib/restricted/aws/aws-c-compression/source/huffman_testing.c173
3 files changed, 502 insertions, 0 deletions
diff --git a/contrib/restricted/aws/aws-c-compression/source/compression.c b/contrib/restricted/aws/aws-c-compression/source/compression.c
new file mode 100644
index 0000000000..52777c0eeb
--- /dev/null
+++ b/contrib/restricted/aws/aws-c-compression/source/compression.c
@@ -0,0 +1,44 @@
+/**
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0.
+ */
+
+#include <aws/compression/compression.h>
+
+#define DEFINE_ERROR_INFO(CODE, STR) \
+ [(CODE)-AWS_ERROR_ENUM_BEGIN_RANGE(AWS_C_COMPRESSION_PACKAGE_ID)] = \
+ AWS_DEFINE_ERROR_INFO(CODE, STR, "aws-c-compression")
+
+/* clang-format off */
+static struct aws_error_info s_errors[] = {
+ DEFINE_ERROR_INFO(
+ AWS_ERROR_COMPRESSION_UNKNOWN_SYMBOL,
+ "Compression encountered an unknown symbol."),
+};
+/* clang-format on */
+
+static struct aws_error_info_list s_error_list = {
+ .error_list = s_errors,
+ .count = AWS_ARRAY_SIZE(s_errors),
+};
+
+static bool s_library_initialized = false;
+void aws_compression_library_init(struct aws_allocator *alloc) {
+ if (s_library_initialized) {
+ return;
+ }
+ s_library_initialized = true;
+
+ aws_common_library_init(alloc);
+ aws_register_error_info(&s_error_list);
+}
+
+void aws_compression_library_clean_up(void) {
+ if (!s_library_initialized) {
+ return;
+ }
+ s_library_initialized = false;
+
+ aws_unregister_error_info(&s_error_list);
+ aws_common_library_clean_up();
+}
diff --git a/contrib/restricted/aws/aws-c-compression/source/huffman.c b/contrib/restricted/aws/aws-c-compression/source/huffman.c
new file mode 100644
index 0000000000..074c9f4ed5
--- /dev/null
+++ b/contrib/restricted/aws/aws-c-compression/source/huffman.c
@@ -0,0 +1,285 @@
+/**
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0.
+ */
+
+#include <aws/compression/huffman.h>
+
+#define BITSIZEOF(val) (sizeof(val) * 8)
+
+static uint8_t MAX_PATTERN_BITS = BITSIZEOF(((struct aws_huffman_code *)0)->pattern);
+
+void aws_huffman_encoder_init(struct aws_huffman_encoder *encoder, struct aws_huffman_symbol_coder *coder) {
+
+ AWS_ASSERT(encoder);
+ AWS_ASSERT(coder);
+
+ AWS_ZERO_STRUCT(*encoder);
+ encoder->coder = coder;
+ encoder->eos_padding = UINT8_MAX;
+}
+
+void aws_huffman_encoder_reset(struct aws_huffman_encoder *encoder) {
+
+ AWS_ASSERT(encoder);
+
+ AWS_ZERO_STRUCT(encoder->overflow_bits);
+}
+
+void aws_huffman_decoder_init(struct aws_huffman_decoder *decoder, struct aws_huffman_symbol_coder *coder) {
+
+ AWS_ASSERT(decoder);
+ AWS_ASSERT(coder);
+
+ AWS_ZERO_STRUCT(*decoder);
+ decoder->coder = coder;
+}
+
+void aws_huffman_decoder_reset(struct aws_huffman_decoder *decoder) {
+
+ decoder->working_bits = 0;
+ decoder->num_bits = 0;
+}
+
+void aws_huffman_decoder_allow_growth(struct aws_huffman_decoder *decoder, bool allow_growth) {
+ decoder->allow_growth = allow_growth;
+}
+
+/* Much of encode is written in a helper function,
+ so this struct helps avoid passing all the parameters through by hand */
+struct encoder_state {
+ struct aws_huffman_encoder *encoder;
+ struct aws_byte_buf *output_buf;
+ uint8_t working;
+ uint8_t bit_pos;
+};
+
+/* Helper function to write a single bit_pattern to memory (or working_bits if
+ * out of buffer space) */
+static int encode_write_bit_pattern(struct encoder_state *state, struct aws_huffman_code bit_pattern) {
+ AWS_PRECONDITION(state->output_buf->len < state->output_buf->capacity);
+
+ if (bit_pattern.num_bits == 0) {
+ return aws_raise_error(AWS_ERROR_COMPRESSION_UNKNOWN_SYMBOL);
+ }
+
+ uint8_t bits_to_write = bit_pattern.num_bits;
+ while (bits_to_write > 0) {
+ uint8_t bits_for_current = bits_to_write > state->bit_pos ? state->bit_pos : bits_to_write;
+ /* Chop off the top 0s and bits that have already been read */
+ uint8_t bits_to_cut =
+ (BITSIZEOF(bit_pattern.pattern) - bit_pattern.num_bits) + (bit_pattern.num_bits - bits_to_write);
+
+ /* Write the appropiate number of bits to this byte
+ Shift to the left to cut any unneeded bits
+ Shift to the right to position the bits correctly */
+ state->working |= (bit_pattern.pattern << bits_to_cut) >> (MAX_PATTERN_BITS - state->bit_pos);
+
+ bits_to_write -= bits_for_current;
+ state->bit_pos -= bits_for_current;
+
+ if (state->bit_pos == 0) {
+ /* Save the whole byte */
+ aws_byte_buf_write_u8(state->output_buf, state->working);
+
+ state->bit_pos = 8;
+ state->working = 0;
+
+ if (state->output_buf->len == state->output_buf->capacity) {
+ state->encoder->overflow_bits.num_bits = bits_to_write;
+
+ if (bits_to_write) {
+ /* If buffer is full and there are remaining bits, save them to overflow and return */
+ bits_to_cut += bits_for_current;
+
+ state->encoder->overflow_bits.pattern =
+ (bit_pattern.pattern << bits_to_cut) >> (MAX_PATTERN_BITS - bits_to_write);
+
+ return aws_raise_error(AWS_ERROR_SHORT_BUFFER);
+ }
+ }
+ }
+ }
+
+ return AWS_OP_SUCCESS;
+}
+
+size_t aws_huffman_get_encoded_length(struct aws_huffman_encoder *encoder, struct aws_byte_cursor to_encode) {
+
+ AWS_PRECONDITION(encoder);
+ AWS_PRECONDITION(aws_byte_cursor_is_valid(&to_encode));
+
+ size_t num_bits = 0;
+
+ while (to_encode.len) {
+ uint8_t new_byte = 0;
+ aws_byte_cursor_read_u8(&to_encode, &new_byte);
+ struct aws_huffman_code code_point = encoder->coder->encode(new_byte, encoder->coder->userdata);
+ num_bits += code_point.num_bits;
+ }
+
+ size_t length = num_bits / 8;
+
+ /* Round up */
+ if (num_bits % 8) {
+ ++length;
+ }
+
+ return length;
+}
+
+int aws_huffman_encode(
+ struct aws_huffman_encoder *encoder,
+ struct aws_byte_cursor *to_encode,
+ struct aws_byte_buf *output) {
+
+ AWS_ASSERT(encoder);
+ AWS_ASSERT(encoder->coder);
+ AWS_ASSERT(to_encode);
+ AWS_ASSERT(output);
+
+ struct encoder_state state = {
+ .working = 0,
+ .bit_pos = 8,
+ };
+ state.encoder = encoder;
+ state.output_buf = output;
+
+ /* Write any bits leftover from previous invocation */
+ if (encoder->overflow_bits.num_bits) {
+ if (output->len == output->capacity) {
+ return aws_raise_error(AWS_ERROR_SHORT_BUFFER);
+ }
+
+ if (encode_write_bit_pattern(&state, encoder->overflow_bits)) {
+ return AWS_OP_ERR;
+ }
+
+ encoder->overflow_bits.num_bits = 0;
+ }
+
+ while (to_encode->len) {
+ if (output->len == output->capacity) {
+ return aws_raise_error(AWS_ERROR_SHORT_BUFFER);
+ }
+
+ uint8_t new_byte = 0;
+ aws_byte_cursor_read_u8(to_encode, &new_byte);
+ struct aws_huffman_code code_point = encoder->coder->encode(new_byte, encoder->coder->userdata);
+
+ if (encode_write_bit_pattern(&state, code_point)) {
+ return AWS_OP_ERR;
+ }
+ }
+
+ /* The following code only runs when the buffer has written successfully */
+
+ /* If whole buffer processed, write EOS */
+ if (state.bit_pos != 8) {
+ struct aws_huffman_code eos_cp;
+ eos_cp.pattern = encoder->eos_padding;
+ eos_cp.num_bits = state.bit_pos;
+ encode_write_bit_pattern(&state, eos_cp);
+ AWS_ASSERT(state.bit_pos == 8);
+ }
+
+ return AWS_OP_SUCCESS;
+}
+
+/* Decode's reading is written in a helper function,
+ so this struct helps avoid passing all the parameters through by hand */
+struct huffman_decoder_state {
+ struct aws_huffman_decoder *decoder;
+ struct aws_byte_cursor *input_cursor;
+};
+
+static void decode_fill_working_bits(struct huffman_decoder_state *state) {
+
+ /* Read from bytes in the buffer until there are enough bytes to process */
+ while (state->decoder->num_bits < MAX_PATTERN_BITS && state->input_cursor->len) {
+
+ /* Read the appropiate number of bits from this byte */
+ uint8_t new_byte = 0;
+ aws_byte_cursor_read_u8(state->input_cursor, &new_byte);
+
+ uint64_t positioned = ((uint64_t)new_byte)
+ << (BITSIZEOF(state->decoder->working_bits) - 8 - state->decoder->num_bits);
+ state->decoder->working_bits |= positioned;
+
+ state->decoder->num_bits += 8;
+ }
+}
+
+int aws_huffman_decode(
+ struct aws_huffman_decoder *decoder,
+ struct aws_byte_cursor *to_decode,
+ struct aws_byte_buf *output) {
+
+ AWS_ASSERT(decoder);
+ AWS_ASSERT(decoder->coder);
+ AWS_ASSERT(to_decode);
+ AWS_ASSERT(output);
+
+ struct huffman_decoder_state state;
+ state.decoder = decoder;
+ state.input_cursor = to_decode;
+
+ /* Measures how much of the input was read */
+ size_t bits_left = decoder->num_bits + to_decode->len * 8;
+
+ while (1) {
+
+ decode_fill_working_bits(&state);
+
+ uint8_t symbol;
+ uint8_t bits_read = decoder->coder->decode(
+ (uint32_t)(decoder->working_bits >> (BITSIZEOF(decoder->working_bits) - MAX_PATTERN_BITS)),
+ &symbol,
+ decoder->coder->userdata);
+
+ if (bits_read == 0) {
+ if (bits_left < MAX_PATTERN_BITS) {
+ /* More input is needed to continue */
+ return AWS_OP_SUCCESS;
+ }
+ /* Unknown symbol found */
+ return aws_raise_error(AWS_ERROR_COMPRESSION_UNKNOWN_SYMBOL);
+ }
+ if (bits_read > bits_left) {
+ /* Check if the buffer has been overrun.
+ Note: because of the check in decode_fill_working_bits,
+ the buffer won't actually overrun, instead there will
+ be 0's in the bottom of working_bits. */
+
+ return AWS_OP_SUCCESS;
+ }
+
+ if (output->len == output->capacity) {
+ /* Check if we've hit the end of the output buffer.
+ * Grow buffer, or raise error, depending on settings */
+ if (decoder->allow_growth) {
+ /* Double the capacity */
+ if (aws_byte_buf_reserve_relative(output, output->capacity)) {
+ return AWS_OP_ERR;
+ }
+ } else {
+ return aws_raise_error(AWS_ERROR_SHORT_BUFFER);
+ }
+ }
+
+ bits_left -= bits_read;
+ decoder->working_bits <<= bits_read;
+ decoder->num_bits -= bits_read;
+
+ /* Store the found symbol */
+ aws_byte_buf_write_u8(output, symbol);
+
+ /* Successfully decoded whole buffer */
+ if (bits_left == 0) {
+ return AWS_OP_SUCCESS;
+ }
+ }
+
+ /* This case is unreachable */
+ AWS_ASSERT(0);
+}
diff --git a/contrib/restricted/aws/aws-c-compression/source/huffman_testing.c b/contrib/restricted/aws/aws-c-compression/source/huffman_testing.c
new file mode 100644
index 0000000000..67d17a2c2c
--- /dev/null
+++ b/contrib/restricted/aws/aws-c-compression/source/huffman_testing.c
@@ -0,0 +1,173 @@
+/**
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0.
+ */
+
+/**
+ * See aws/testing/compression/huffman.h for docs.
+ */
+#define AWS_UNSTABLE_TESTING_API
+#include <aws/compression/private/huffman_testing.h>
+
+#include <aws/common/byte_buf.h>
+#include <aws/common/common.h>
+
+int huffman_test_transitive(
+ struct aws_huffman_symbol_coder *coder,
+ const char *input,
+ size_t size,
+ size_t encoded_size,
+ const char **error_string) {
+
+ struct aws_huffman_encoder encoder;
+ aws_huffman_encoder_init(&encoder, coder);
+ struct aws_huffman_decoder decoder;
+ aws_huffman_decoder_init(&decoder, coder);
+
+ const size_t intermediate_buffer_size = size * 2;
+ AWS_VARIABLE_LENGTH_ARRAY(uint8_t, intermediate_buffer, intermediate_buffer_size);
+ memset(intermediate_buffer, 0, intermediate_buffer_size);
+ AWS_VARIABLE_LENGTH_ARRAY(char, output_buffer, size);
+ memset(output_buffer, 0, size);
+
+ struct aws_byte_cursor to_encode = aws_byte_cursor_from_array((uint8_t *)input, size);
+ struct aws_byte_buf intermediate_buf = aws_byte_buf_from_empty_array(intermediate_buffer, intermediate_buffer_size);
+ struct aws_byte_buf output_buf = aws_byte_buf_from_empty_array(output_buffer, size);
+
+ int result = aws_huffman_encode(&encoder, &to_encode, &intermediate_buf);
+
+ if (result != AWS_OP_SUCCESS) {
+ *error_string = "aws_huffman_encode failed";
+ return AWS_OP_ERR;
+ }
+ if (to_encode.len != 0) {
+ *error_string = "not all data encoded";
+ return AWS_OP_ERR;
+ }
+ if (encoded_size && intermediate_buf.len != encoded_size) {
+ *error_string = "encoded length is incorrect";
+ return AWS_OP_ERR;
+ }
+
+ struct aws_byte_cursor intermediate_cur = aws_byte_cursor_from_buf(&intermediate_buf);
+ result = aws_huffman_decode(&decoder, &intermediate_cur, &output_buf);
+
+ if (result != AWS_OP_SUCCESS) {
+ *error_string = "aws_huffman_decode failed";
+ return AWS_OP_ERR;
+ }
+ if (intermediate_cur.len != 0) {
+ *error_string = "not all encoded data was decoded";
+ return AWS_OP_ERR;
+ }
+ if (output_buf.len != size) {
+ *error_string = "decode output size incorrect";
+ return AWS_OP_ERR;
+ }
+ if (memcmp(input, output_buffer, size) != 0) {
+ *error_string = "decoded data does not match input data";
+ return AWS_OP_ERR;
+ }
+
+ return AWS_OP_SUCCESS;
+}
+
+int huffman_test_transitive_chunked(
+ struct aws_huffman_symbol_coder *coder,
+ const char *input,
+ size_t size,
+ size_t encoded_size,
+ size_t output_chunk_size,
+ const char **error_string) {
+
+ struct aws_huffman_encoder encoder;
+ aws_huffman_encoder_init(&encoder, coder);
+ struct aws_huffman_decoder decoder;
+ aws_huffman_decoder_init(&decoder, coder);
+
+ const size_t intermediate_buffer_size = size * 2;
+ AWS_VARIABLE_LENGTH_ARRAY(uint8_t, intermediate_buffer, intermediate_buffer_size);
+ memset(intermediate_buffer, 0, intermediate_buffer_size);
+ AWS_VARIABLE_LENGTH_ARRAY(char, output_buffer, size);
+ memset(output_buffer, 0, size);
+
+ struct aws_byte_cursor to_encode = aws_byte_cursor_from_array(input, size);
+ struct aws_byte_buf intermediate_buf = aws_byte_buf_from_empty_array(intermediate_buffer, (size_t)-1);
+ intermediate_buf.capacity = 0;
+ struct aws_byte_buf output_buf = aws_byte_buf_from_empty_array(output_buffer, (size_t)-1);
+ output_buf.capacity = 0;
+
+ int result = AWS_OP_SUCCESS;
+
+ {
+ do {
+ const size_t previous_intermediate_len = intermediate_buf.len;
+
+ intermediate_buf.capacity += output_chunk_size;
+ result = aws_huffman_encode(&encoder, &to_encode, &intermediate_buf);
+
+ if (intermediate_buf.len == previous_intermediate_len) {
+ *error_string = "encode didn't write any data";
+ return AWS_OP_ERR;
+ }
+
+ if (result != AWS_OP_SUCCESS && aws_last_error() != AWS_ERROR_SHORT_BUFFER) {
+ *error_string = "encode returned wrong error code";
+ return AWS_OP_ERR;
+ }
+ } while (result != AWS_OP_SUCCESS);
+ }
+
+ if (result != AWS_OP_SUCCESS) {
+ *error_string = "aws_huffman_encode failed";
+ return AWS_OP_ERR;
+ }
+ if (intermediate_buf.len > intermediate_buffer_size) {
+ *error_string = "too much data encoded";
+ return AWS_OP_ERR;
+ }
+ if (encoded_size && intermediate_buf.len != encoded_size) {
+ *error_string = "encoded length is incorrect";
+ return AWS_OP_ERR;
+ }
+
+ struct aws_byte_cursor intermediate_cur = aws_byte_cursor_from_buf(&intermediate_buf);
+
+ {
+ do {
+ const size_t previous_output_len = output_buf.len;
+
+ output_buf.capacity += output_chunk_size;
+ if (output_buf.capacity > size) {
+ output_buf.capacity = size;
+ }
+
+ result = aws_huffman_decode(&decoder, &intermediate_cur, &output_buf);
+
+ if (output_buf.len == previous_output_len) {
+ *error_string = "decode didn't write any data";
+ return AWS_OP_ERR;
+ }
+
+ if (result != AWS_OP_SUCCESS && aws_last_error() != AWS_ERROR_SHORT_BUFFER) {
+ *error_string = "decode returned wrong error code";
+ return AWS_OP_ERR;
+ }
+ } while (result != AWS_OP_SUCCESS);
+ }
+
+ if (result != AWS_OP_SUCCESS) {
+ *error_string = "aws_huffman_decode failed";
+ return AWS_OP_ERR;
+ }
+ if (output_buf.len != size) {
+ *error_string = "decode output size incorrect";
+ return AWS_OP_ERR;
+ }
+ if (memcmp(input, output_buffer, size) != 0) {
+ *error_string = "decoded data does not match input data";
+ return AWS_OP_ERR;
+ }
+
+ return AWS_OP_SUCCESS;
+}