Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 1 of 2.

author: orivej <orivej@yandex-team.ru> 2022-02-10 16:44:49 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:44:49 +0300
commit: 718c552901d703c502ccbefdfc3c9028d608b947 (patch)
tree: 46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/restricted/aws/aws-c-common/source/xml_parser.c
parent: e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff)
download: ydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz
1 files changed, 455 insertions, 455 deletions
diff --git a/contrib/restricted/aws/aws-c-common/source/xml_parser.c b/contrib/restricted/aws/aws-c-common/source/xml_parser.c
index 692324ac9a..7fa4da3461 100644
--- a/contrib/restricted/aws/aws-c-common/source/xml_parser.c
+++ b/contrib/restricted/aws/aws-c-common/source/xml_parser.c
@@ -1,455 +1,455 @@
-/**
- * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
- * SPDX-License-Identifier: Apache-2.0.
- */
-
-#include <aws/common/array_list.h>
-#include <aws/common/logging.h>
-#include <aws/common/private/xml_parser_impl.h>
-
-#ifdef _MSC_VER
-/* allow non-constant declared initializers. */
-#    pragma warning(disable : 4204)
-#endif
-
-static const size_t s_max_document_depth = 20;
-#define MAX_NAME_LEN ((size_t)256)
-#define NODE_CLOSE_OVERHEAD ((size_t)3)
-
-struct cb_stack_data {
-    aws_xml_parser_on_node_encountered_fn *cb;
-    void *user_data;
-};
-
-struct aws_xml_parser *aws_xml_parser_new(
-    struct aws_allocator *allocator,
-    const struct aws_xml_parser_options *options) {
-
-    AWS_PRECONDITION(allocator);
-    AWS_PRECONDITION(options);
-
-    struct aws_xml_parser *parser = aws_mem_calloc(allocator, 1, sizeof(struct aws_xml_parser));
-
-    if (parser == NULL) {
-        return NULL;
-    }
-
-    parser->allocator = allocator;
-    parser->doc = options->doc;
-
-    parser->max_depth = s_max_document_depth;
-    parser->error = AWS_OP_SUCCESS;
-
-    if (options->max_depth) {
-        parser->max_depth = options->max_depth;
-    }
-
-    if (aws_array_list_init_dynamic(&parser->callback_stack, allocator, 4, sizeof(struct cb_stack_data))) {
-        aws_mem_release(allocator, parser);
-        return NULL;
-    }
-
-    return parser;
-}
-
-void aws_xml_parser_destroy(struct aws_xml_parser *parser) {
-    AWS_PRECONDITION(parser);
-
-    aws_array_list_clean_up(&parser->callback_stack);
-
-    aws_mem_release(parser->allocator, parser);
-}
-
-int s_node_next_sibling(struct aws_xml_parser *parser);
-
-static bool s_double_quote_fn(uint8_t value) {
-    return value == '"';
-}
-
-/* load the node declaration line, parsing node name and attributes.
- *
- * something of the form:
- * <NodeName Attribute1=Value1 Attribute2=Value2 ...>
- * */
-static int s_load_node_decl(
-    struct aws_xml_parser *parser,
-    struct aws_byte_cursor *decl_body,
-    struct aws_xml_node *node) {
-    AWS_PRECONDITION(parser);
-    AWS_PRECONDITION(decl_body);
-    AWS_PRECONDITION(node);
-
-    struct aws_array_list splits;
-    AWS_ZERO_STRUCT(splits);
-
-    AWS_ZERO_ARRAY(parser->split_scratch);
-    aws_array_list_init_static(
-        &splits, parser->split_scratch, AWS_ARRAY_SIZE(parser->split_scratch), sizeof(struct aws_byte_cursor));
-
-    /* split by space, first split will be the node name, everything after will be attribute=value pairs. For now
-     * we limit to 10 attributes, if this is exceeded we consider it invalid document. */
-    if (aws_byte_cursor_split_on_char(decl_body, ' ', &splits)) {
-        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
-        return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
-    }
-
-    size_t splits_count = aws_array_list_length(&splits);
-
-    if (splits_count < 1) {
-        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
-        return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
-    }
-
-    aws_array_list_get_at(&splits, &node->name, 0);
-
-    AWS_ZERO_ARRAY(parser->attributes);
-    if (splits.length > 1) {
-        aws_array_list_init_static(
-            &node->attributes,
-            parser->attributes,
-            AWS_ARRAY_SIZE(parser->attributes),
-            sizeof(struct aws_xml_attribute));
-
-        for (size_t i = 1; i < splits.length; ++i) {
-            struct aws_byte_cursor attribute_pair;
-            AWS_ZERO_STRUCT(attribute_pair);
-            aws_array_list_get_at(&splits, &attribute_pair, i);
-
-            struct aws_byte_cursor att_val_pair[2];
-            AWS_ZERO_ARRAY(att_val_pair);
-            struct aws_array_list att_val_pair_lst;
-            AWS_ZERO_STRUCT(att_val_pair_lst);
-            aws_array_list_init_static(&att_val_pair_lst, att_val_pair, 2, sizeof(struct aws_byte_cursor));
-
-            if (!aws_byte_cursor_split_on_char(&attribute_pair, '=', &att_val_pair_lst)) {
-                struct aws_xml_attribute attribute = {
-                    .name = att_val_pair[0],
-                    .value = aws_byte_cursor_trim_pred(&att_val_pair[1], s_double_quote_fn),
-                };
-                aws_array_list_push_back(&node->attributes, &attribute);
-            }
-        }
-    }
-
-    return AWS_OP_SUCCESS;
-}
-
-int aws_xml_parser_parse(
-    struct aws_xml_parser *parser,
-    aws_xml_parser_on_node_encountered_fn *on_node_encountered,
-    void *user_data) {
-
-    AWS_PRECONDITION(parser);
-
-    if (on_node_encountered == NULL) {
-        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'on_node_encountered' argument for aws_xml_parser_parse is invalid.");
-        aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
-        return AWS_OP_ERR;
-    }
-
-    aws_array_list_clear(&parser->callback_stack);
-
-    /* burn everything that precedes the actual xml nodes. */
-    while (parser->doc.len) {
-        uint8_t *start = memchr(parser->doc.ptr, '<', parser->doc.len);
-        if (!start) {
-            AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
-            return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
-        }
-
-        uint8_t *location = memchr(parser->doc.ptr, '>', parser->doc.len);
-
-        if (!location) {
-            AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
-            return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
-        }
-
-        aws_byte_cursor_advance(&parser->doc, start - parser->doc.ptr);
-        /* if these are preamble statements, burn them. otherwise don't seek at all
-         * and assume it's just the doc with no preamble statements. */
-        if (*(parser->doc.ptr + 1) == '?' || *(parser->doc.ptr + 1) == '!') {
-            /* nobody cares about the preamble */
-            size_t advance = location - parser->doc.ptr + 1;
-            aws_byte_cursor_advance(&parser->doc, advance);
-        } else {
-            break;
-        }
-    }
-
-    /* now we should be at the start of the actual document. */
-    struct cb_stack_data stack_data = {
-        .cb = on_node_encountered,
-        .user_data = user_data,
-    };
-
-    AWS_FATAL_ASSERT(!aws_array_list_push_back(&parser->callback_stack, &stack_data));
-    return s_node_next_sibling(parser);
-}
-
-int s_advance_to_closing_tag(
-    struct aws_xml_parser *parser,
-    struct aws_xml_node *node,
-    struct aws_byte_cursor *out_body) {
-    AWS_PRECONDITION(parser);
-    AWS_PRECONDITION(node);
-
-    /* currently the max node name is 256 characters. This is arbitrary, but should be enough
-     * for our uses. If we ever generalize this, we'll have to come back and rethink this. */
-    uint8_t name_close[MAX_NAME_LEN + NODE_CLOSE_OVERHEAD] = {0};
-    uint8_t name_open[MAX_NAME_LEN + NODE_CLOSE_OVERHEAD] = {0};
-
-    struct aws_byte_buf closing_cmp_buf = aws_byte_buf_from_empty_array(name_close, sizeof(name_close));
-    struct aws_byte_buf open_cmp_buf = aws_byte_buf_from_empty_array(name_open, sizeof(name_open));
-
-    size_t closing_name_len = node->name.len + NODE_CLOSE_OVERHEAD;
-
-    if (closing_name_len > node->doc_at_body.len) {
-        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
-        parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
-        return AWS_OP_ERR;
-    }
-
-    if (sizeof(name_close) < closing_name_len) {
-        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
-        parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
-        return AWS_OP_ERR;
-    }
-
-    struct aws_byte_cursor open_bracket = aws_byte_cursor_from_c_str("<");
-    struct aws_byte_cursor close_token = aws_byte_cursor_from_c_str("/");
-    struct aws_byte_cursor close_bracket = aws_byte_cursor_from_c_str(">");
-
-    aws_byte_buf_append(&open_cmp_buf, &open_bracket);
-    aws_byte_buf_append(&open_cmp_buf, &node->name);
-
-    aws_byte_buf_append(&closing_cmp_buf, &open_bracket);
-    aws_byte_buf_append(&closing_cmp_buf, &close_token);
-    aws_byte_buf_append(&closing_cmp_buf, &node->name);
-    aws_byte_buf_append(&closing_cmp_buf, &close_bracket);
-
-    size_t depth_count = 1;
-    struct aws_byte_cursor to_find_open = aws_byte_cursor_from_buf(&open_cmp_buf);
-    struct aws_byte_cursor to_find_close = aws_byte_cursor_from_buf(&closing_cmp_buf);
-    struct aws_byte_cursor close_find_result;
-    AWS_ZERO_STRUCT(close_find_result);
-    do {
-        if (aws_byte_cursor_find_exact(&parser->doc, &to_find_close, &close_find_result)) {
-            AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
-            return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
-        }
-
-        /* if we find an opening node with the same name, before the closing tag keep going. */
-        struct aws_byte_cursor open_find_result;
-        AWS_ZERO_STRUCT(open_find_result);
-
-        while (parser->doc.len) {
-            if (!aws_byte_cursor_find_exact(&parser->doc, &to_find_open, &open_find_result)) {
-                if (open_find_result.ptr < close_find_result.ptr) {
-                    size_t skip_len = open_find_result.ptr - parser->doc.ptr;
-                    aws_byte_cursor_advance(&parser->doc, skip_len + 1);
-                    depth_count++;
-                    continue;
-                }
-            }
-            size_t skip_len = close_find_result.ptr - parser->doc.ptr;
-            aws_byte_cursor_advance(&parser->doc, skip_len + closing_cmp_buf.len);
-            depth_count--;
-            break;
-        }
-    } while (depth_count > 0);
-
-    size_t len = close_find_result.ptr - node->doc_at_body.ptr;
-
-    if (out_body) {
-        *out_body = aws_byte_cursor_from_array(node->doc_at_body.ptr, len);
-    }
-
-    return parser->error;
-}
-
-int aws_xml_node_as_body(struct aws_xml_parser *parser, struct aws_xml_node *node, struct aws_byte_cursor *out_body) {
-    AWS_PRECONDITION(parser);
-    AWS_PRECONDITION(node);
-
-    node->processed = true;
-    return s_advance_to_closing_tag(parser, node, out_body);
-}
-
-int aws_xml_node_traverse(
-    struct aws_xml_parser *parser,
-    struct aws_xml_node *node,
-    aws_xml_parser_on_node_encountered_fn *on_node_encountered,
-    void *user_data) {
-    AWS_PRECONDITION(parser);
-    AWS_PRECONDITION(node);
-
-    if (on_node_encountered == NULL) {
-        AWS_LOGF_ERROR(
-            AWS_LS_COMMON_XML_PARSER, "Callback 'on_node_encountered' for aws_xml_node_traverse is invalid.");
-        aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
-        return AWS_OP_ERR;
-    }
-
-    node->processed = true;
-    struct cb_stack_data stack_data = {
-        .cb = on_node_encountered,
-        .user_data = user_data,
-    };
-
-    size_t doc_depth = aws_array_list_length(&parser->callback_stack);
-    if (doc_depth >= parser->max_depth) {
-        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
-        parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
-        return AWS_OP_ERR;
-    }
-
-    if (aws_array_list_push_back(&parser->callback_stack, &stack_data)) {
-        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
-        parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
-        return AWS_OP_ERR;
-    }
-
-    /* look for the next node at the current level. do this until we encounter the parent node's
-     * closing tag. */
-    while (!parser->stop_parsing && !parser->error) {
-        uint8_t *next_location = memchr(parser->doc.ptr, '<', parser->doc.len);
-
-        if (!next_location) {
-            AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
-            return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
-        }
-
-        uint8_t *end_location = memchr(parser->doc.ptr, '>', parser->doc.len);
-
-        if (!end_location) {
-            AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
-            return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
-        }
-
-        bool parent_closed = false;
-
-        if (*(next_location + 1) == '/') {
-            parent_closed = true;
-        }
-
-        size_t node_name_len = end_location - next_location;
-
-        aws_byte_cursor_advance(&parser->doc, end_location - parser->doc.ptr + 1);
-
-        if (parent_closed) {
-            break;
-        }
-
-        struct aws_byte_cursor decl_body = aws_byte_cursor_from_array(next_location + 1, node_name_len - 1);
-
-        struct aws_xml_node next_node = {
-            .doc_at_body = parser->doc,
-            .processed = false,
-        };
-
-        if (s_load_node_decl(parser, &decl_body, &next_node)) {
-            return AWS_OP_ERR;
-        }
-
-        if (!on_node_encountered(parser, &next_node, user_data)) {
-            parser->stop_parsing = true;
-            return parser->error;
-        }
-
-        /* if the user simply returned while skipping the node altogether, go ahead and do the skip over. */
-        if (!parser->stop_parsing && !next_node.processed) {
-            if (s_advance_to_closing_tag(parser, &next_node, NULL)) {
-                return AWS_OP_ERR;
-            }
-        }
-    }
-
-    if (parser->stop_parsing) {
-        return parser->error;
-    }
-
-    aws_array_list_pop_back(&parser->callback_stack);
-    return parser->error;
-}
-
-int aws_xml_node_get_name(const struct aws_xml_node *node, struct aws_byte_cursor *out_name) {
-    AWS_PRECONDITION(node);
-
-    if (out_name == NULL) {
-        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'out_name' argument for aws_xml_node_get_name is invalid.");
-        aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
-        return AWS_OP_ERR;
-    }
-
-    *out_name = node->name;
-    return AWS_OP_SUCCESS;
-}
-
-size_t aws_xml_node_get_num_attributes(const struct aws_xml_node *node) {
-    AWS_PRECONDITION(node);
-    return aws_array_list_length(&node->attributes);
-}
-
-int aws_xml_node_get_attribute(
-    const struct aws_xml_node *node,
-    size_t attribute_index,
-    struct aws_xml_attribute *out_attribute) {
-    AWS_PRECONDITION(node);
-
-    if (out_attribute == NULL) {
-        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'out_attribute' argument for aws_xml_node_get_attribute is invalid.");
-        aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
-        return AWS_OP_ERR;
-    }
-
-    return aws_array_list_get_at(&node->attributes, out_attribute, attribute_index);
-}
-
-/* advance the parser to the next sibling node.*/
-int s_node_next_sibling(struct aws_xml_parser *parser) {
-    AWS_PRECONDITION(parser);
-
-    uint8_t *next_location = memchr(parser->doc.ptr, '<', parser->doc.len);
-
-    if (!next_location) {
-        return parser->error;
-    }
-
-    aws_byte_cursor_advance(&parser->doc, next_location - parser->doc.ptr);
-    uint8_t *end_location = memchr(parser->doc.ptr, '>', parser->doc.len);
-
-    if (!end_location) {
-        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
-        return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
-    }
-
-    size_t node_name_len = end_location - next_location;
-    aws_byte_cursor_advance(&parser->doc, end_location - parser->doc.ptr + 1);
-
-    struct aws_byte_cursor node_decl_body = aws_byte_cursor_from_array(next_location + 1, node_name_len - 1);
-
-    struct aws_xml_node sibling_node = {
-        .doc_at_body = parser->doc,
-        .processed = false,
-    };
-
-    if (s_load_node_decl(parser, &node_decl_body, &sibling_node)) {
-        return AWS_OP_ERR;
-    }
-
-    struct cb_stack_data stack_data;
-    AWS_ZERO_STRUCT(stack_data);
-    aws_array_list_back(&parser->callback_stack, &stack_data);
-    AWS_FATAL_ASSERT(stack_data.cb);
-
-    parser->stop_parsing = !stack_data.cb(parser, &sibling_node, stack_data.user_data);
-
-    /* if the user simply returned while skipping the node altogether, go ahead and do the skip over. */
-    if (!sibling_node.processed) {
-        if (s_advance_to_closing_tag(parser, &sibling_node, NULL)) {
-            return AWS_OP_ERR;
-        }
-    }
-
-    return parser->error;
-}
+/** 
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 
+ * SPDX-License-Identifier: Apache-2.0. 
+ */ 
+ 
+#include <aws/common/array_list.h> 
+#include <aws/common/logging.h> 
+#include <aws/common/private/xml_parser_impl.h> 
+ 
+#ifdef _MSC_VER 
+/* allow non-constant declared initializers. */ 
+#    pragma warning(disable : 4204) 
+#endif 
+ 
+static const size_t s_max_document_depth = 20; 
+#define MAX_NAME_LEN ((size_t)256) 
+#define NODE_CLOSE_OVERHEAD ((size_t)3) 
+ 
+struct cb_stack_data { 
+    aws_xml_parser_on_node_encountered_fn *cb; 
+    void *user_data; 
+}; 
+ 
+struct aws_xml_parser *aws_xml_parser_new( 
+    struct aws_allocator *allocator, 
+    const struct aws_xml_parser_options *options) { 
+ 
+    AWS_PRECONDITION(allocator); 
+    AWS_PRECONDITION(options); 
+ 
+    struct aws_xml_parser *parser = aws_mem_calloc(allocator, 1, sizeof(struct aws_xml_parser)); 
+ 
+    if (parser == NULL) { 
+        return NULL; 
+    } 
+ 
+    parser->allocator = allocator; 
+    parser->doc = options->doc; 
+ 
+    parser->max_depth = s_max_document_depth; 
+    parser->error = AWS_OP_SUCCESS; 
+ 
+    if (options->max_depth) { 
+        parser->max_depth = options->max_depth; 
+    } 
+ 
+    if (aws_array_list_init_dynamic(&parser->callback_stack, allocator, 4, sizeof(struct cb_stack_data))) { 
+        aws_mem_release(allocator, parser); 
+        return NULL; 
+    } 
+ 
+    return parser; 
+} 
+ 
+void aws_xml_parser_destroy(struct aws_xml_parser *parser) { 
+    AWS_PRECONDITION(parser); 
+ 
+    aws_array_list_clean_up(&parser->callback_stack); 
+ 
+    aws_mem_release(parser->allocator, parser); 
+} 
+ 
+int s_node_next_sibling(struct aws_xml_parser *parser); 
+ 
+static bool s_double_quote_fn(uint8_t value) { 
+    return value == '"'; 
+} 
+ 
+/* load the node declaration line, parsing node name and attributes. 
+ * 
+ * something of the form: 
+ * <NodeName Attribute1=Value1 Attribute2=Value2 ...> 
+ * */ 
+static int s_load_node_decl( 
+    struct aws_xml_parser *parser, 
+    struct aws_byte_cursor *decl_body, 
+    struct aws_xml_node *node) { 
+    AWS_PRECONDITION(parser); 
+    AWS_PRECONDITION(decl_body); 
+    AWS_PRECONDITION(node); 
+ 
+    struct aws_array_list splits; 
+    AWS_ZERO_STRUCT(splits); 
+ 
+    AWS_ZERO_ARRAY(parser->split_scratch); 
+    aws_array_list_init_static( 
+        &splits, parser->split_scratch, AWS_ARRAY_SIZE(parser->split_scratch), sizeof(struct aws_byte_cursor)); 
+ 
+    /* split by space, first split will be the node name, everything after will be attribute=value pairs. For now 
+     * we limit to 10 attributes, if this is exceeded we consider it invalid document. */ 
+    if (aws_byte_cursor_split_on_char(decl_body, ' ', &splits)) { 
+        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); 
+        return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); 
+    } 
+ 
+    size_t splits_count = aws_array_list_length(&splits); 
+ 
+    if (splits_count < 1) { 
+        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); 
+        return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); 
+    } 
+ 
+    aws_array_list_get_at(&splits, &node->name, 0); 
+ 
+    AWS_ZERO_ARRAY(parser->attributes); 
+    if (splits.length > 1) { 
+        aws_array_list_init_static( 
+            &node->attributes, 
+            parser->attributes, 
+            AWS_ARRAY_SIZE(parser->attributes), 
+            sizeof(struct aws_xml_attribute)); 
+ 
+        for (size_t i = 1; i < splits.length; ++i) { 
+            struct aws_byte_cursor attribute_pair; 
+            AWS_ZERO_STRUCT(attribute_pair); 
+            aws_array_list_get_at(&splits, &attribute_pair, i); 
+ 
+            struct aws_byte_cursor att_val_pair[2]; 
+            AWS_ZERO_ARRAY(att_val_pair); 
+            struct aws_array_list att_val_pair_lst; 
+            AWS_ZERO_STRUCT(att_val_pair_lst); 
+            aws_array_list_init_static(&att_val_pair_lst, att_val_pair, 2, sizeof(struct aws_byte_cursor)); 
+ 
+            if (!aws_byte_cursor_split_on_char(&attribute_pair, '=', &att_val_pair_lst)) { 
+                struct aws_xml_attribute attribute = { 
+                    .name = att_val_pair[0], 
+                    .value = aws_byte_cursor_trim_pred(&att_val_pair[1], s_double_quote_fn), 
+                }; 
+                aws_array_list_push_back(&node->attributes, &attribute); 
+            } 
+        } 
+    } 
+ 
+    return AWS_OP_SUCCESS; 
+} 
+ 
+int aws_xml_parser_parse( 
+    struct aws_xml_parser *parser, 
+    aws_xml_parser_on_node_encountered_fn *on_node_encountered, 
+    void *user_data) { 
+ 
+    AWS_PRECONDITION(parser); 
+ 
+    if (on_node_encountered == NULL) { 
+        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'on_node_encountered' argument for aws_xml_parser_parse is invalid."); 
+        aws_raise_error(AWS_ERROR_INVALID_ARGUMENT); 
+        return AWS_OP_ERR; 
+    } 
+ 
+    aws_array_list_clear(&parser->callback_stack); 
+ 
+    /* burn everything that precedes the actual xml nodes. */ 
+    while (parser->doc.len) { 
+        uint8_t *start = memchr(parser->doc.ptr, '<', parser->doc.len); 
+        if (!start) { 
+            AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); 
+            return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); 
+        } 
+ 
+        uint8_t *location = memchr(parser->doc.ptr, '>', parser->doc.len); 
+ 
+        if (!location) { 
+            AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); 
+            return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); 
+        } 
+ 
+        aws_byte_cursor_advance(&parser->doc, start - parser->doc.ptr); 
+        /* if these are preamble statements, burn them. otherwise don't seek at all 
+         * and assume it's just the doc with no preamble statements. */ 
+        if (*(parser->doc.ptr + 1) == '?' || *(parser->doc.ptr + 1) == '!') { 
+            /* nobody cares about the preamble */ 
+            size_t advance = location - parser->doc.ptr + 1; 
+            aws_byte_cursor_advance(&parser->doc, advance); 
+        } else { 
+            break; 
+        } 
+    } 
+ 
+    /* now we should be at the start of the actual document. */ 
+    struct cb_stack_data stack_data = { 
+        .cb = on_node_encountered, 
+        .user_data = user_data, 
+    }; 
+ 
+    AWS_FATAL_ASSERT(!aws_array_list_push_back(&parser->callback_stack, &stack_data)); 
+    return s_node_next_sibling(parser); 
+} 
+ 
+int s_advance_to_closing_tag( 
+    struct aws_xml_parser *parser, 
+    struct aws_xml_node *node, 
+    struct aws_byte_cursor *out_body) { 
+    AWS_PRECONDITION(parser); 
+    AWS_PRECONDITION(node); 
+ 
+    /* currently the max node name is 256 characters. This is arbitrary, but should be enough 
+     * for our uses. If we ever generalize this, we'll have to come back and rethink this. */ 
+    uint8_t name_close[MAX_NAME_LEN + NODE_CLOSE_OVERHEAD] = {0}; 
+    uint8_t name_open[MAX_NAME_LEN + NODE_CLOSE_OVERHEAD] = {0}; 
+ 
+    struct aws_byte_buf closing_cmp_buf = aws_byte_buf_from_empty_array(name_close, sizeof(name_close)); 
+    struct aws_byte_buf open_cmp_buf = aws_byte_buf_from_empty_array(name_open, sizeof(name_open)); 
+ 
+    size_t closing_name_len = node->name.len + NODE_CLOSE_OVERHEAD; 
+ 
+    if (closing_name_len > node->doc_at_body.len) { 
+        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); 
+        parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); 
+        return AWS_OP_ERR; 
+    } 
+ 
+    if (sizeof(name_close) < closing_name_len) { 
+        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); 
+        parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); 
+        return AWS_OP_ERR; 
+    } 
+ 
+    struct aws_byte_cursor open_bracket = aws_byte_cursor_from_c_str("<"); 
+    struct aws_byte_cursor close_token = aws_byte_cursor_from_c_str("/"); 
+    struct aws_byte_cursor close_bracket = aws_byte_cursor_from_c_str(">"); 
+ 
+    aws_byte_buf_append(&open_cmp_buf, &open_bracket); 
+    aws_byte_buf_append(&open_cmp_buf, &node->name); 
+ 
+    aws_byte_buf_append(&closing_cmp_buf, &open_bracket); 
+    aws_byte_buf_append(&closing_cmp_buf, &close_token); 
+    aws_byte_buf_append(&closing_cmp_buf, &node->name); 
+    aws_byte_buf_append(&closing_cmp_buf, &close_bracket); 
+ 
+    size_t depth_count = 1; 
+    struct aws_byte_cursor to_find_open = aws_byte_cursor_from_buf(&open_cmp_buf); 
+    struct aws_byte_cursor to_find_close = aws_byte_cursor_from_buf(&closing_cmp_buf); 
+    struct aws_byte_cursor close_find_result; 
+    AWS_ZERO_STRUCT(close_find_result); 
+    do { 
+        if (aws_byte_cursor_find_exact(&parser->doc, &to_find_close, &close_find_result)) { 
+            AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); 
+            return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); 
+        } 
+ 
+        /* if we find an opening node with the same name, before the closing tag keep going. */ 
+        struct aws_byte_cursor open_find_result; 
+        AWS_ZERO_STRUCT(open_find_result); 
+ 
+        while (parser->doc.len) { 
+            if (!aws_byte_cursor_find_exact(&parser->doc, &to_find_open, &open_find_result)) { 
+                if (open_find_result.ptr < close_find_result.ptr) { 
+                    size_t skip_len = open_find_result.ptr - parser->doc.ptr; 
+                    aws_byte_cursor_advance(&parser->doc, skip_len + 1); 
+                    depth_count++; 
+                    continue; 
+                } 
+            } 
+            size_t skip_len = close_find_result.ptr - parser->doc.ptr; 
+            aws_byte_cursor_advance(&parser->doc, skip_len + closing_cmp_buf.len); 
+            depth_count--; 
+            break; 
+        } 
+    } while (depth_count > 0); 
+ 
+    size_t len = close_find_result.ptr - node->doc_at_body.ptr; 
+ 
+    if (out_body) { 
+        *out_body = aws_byte_cursor_from_array(node->doc_at_body.ptr, len); 
+    } 
+ 
+    return parser->error; 
+} 
+ 
+int aws_xml_node_as_body(struct aws_xml_parser *parser, struct aws_xml_node *node, struct aws_byte_cursor *out_body) { 
+    AWS_PRECONDITION(parser); 
+    AWS_PRECONDITION(node); 
+ 
+    node->processed = true; 
+    return s_advance_to_closing_tag(parser, node, out_body); 
+} 
+ 
+int aws_xml_node_traverse( 
+    struct aws_xml_parser *parser, 
+    struct aws_xml_node *node, 
+    aws_xml_parser_on_node_encountered_fn *on_node_encountered, 
+    void *user_data) { 
+    AWS_PRECONDITION(parser); 
+    AWS_PRECONDITION(node); 
+ 
+    if (on_node_encountered == NULL) { 
+        AWS_LOGF_ERROR( 
+            AWS_LS_COMMON_XML_PARSER, "Callback 'on_node_encountered' for aws_xml_node_traverse is invalid."); 
+        aws_raise_error(AWS_ERROR_INVALID_ARGUMENT); 
+        return AWS_OP_ERR; 
+    } 
+ 
+    node->processed = true; 
+    struct cb_stack_data stack_data = { 
+        .cb = on_node_encountered, 
+        .user_data = user_data, 
+    }; 
+ 
+    size_t doc_depth = aws_array_list_length(&parser->callback_stack); 
+    if (doc_depth >= parser->max_depth) { 
+        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); 
+        parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); 
+        return AWS_OP_ERR; 
+    } 
+ 
+    if (aws_array_list_push_back(&parser->callback_stack, &stack_data)) { 
+        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); 
+        parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); 
+        return AWS_OP_ERR; 
+    } 
+ 
+    /* look for the next node at the current level. do this until we encounter the parent node's 
+     * closing tag. */ 
+    while (!parser->stop_parsing && !parser->error) { 
+        uint8_t *next_location = memchr(parser->doc.ptr, '<', parser->doc.len); 
+ 
+        if (!next_location) { 
+            AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); 
+            return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); 
+        } 
+ 
+        uint8_t *end_location = memchr(parser->doc.ptr, '>', parser->doc.len); 
+ 
+        if (!end_location) { 
+            AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); 
+            return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); 
+        } 
+ 
+        bool parent_closed = false; 
+ 
+        if (*(next_location + 1) == '/') { 
+            parent_closed = true; 
+        } 
+ 
+        size_t node_name_len = end_location - next_location; 
+ 
+        aws_byte_cursor_advance(&parser->doc, end_location - parser->doc.ptr + 1); 
+ 
+        if (parent_closed) { 
+            break; 
+        } 
+ 
+        struct aws_byte_cursor decl_body = aws_byte_cursor_from_array(next_location + 1, node_name_len - 1); 
+ 
+        struct aws_xml_node next_node = { 
+            .doc_at_body = parser->doc, 
+            .processed = false, 
+        }; 
+ 
+        if (s_load_node_decl(parser, &decl_body, &next_node)) { 
+            return AWS_OP_ERR; 
+        } 
+ 
+        if (!on_node_encountered(parser, &next_node, user_data)) { 
+            parser->stop_parsing = true; 
+            return parser->error; 
+        } 
+ 
+        /* if the user simply returned while skipping the node altogether, go ahead and do the skip over. */ 
+        if (!parser->stop_parsing && !next_node.processed) { 
+            if (s_advance_to_closing_tag(parser, &next_node, NULL)) { 
+                return AWS_OP_ERR; 
+            } 
+        } 
+    } 
+ 
+    if (parser->stop_parsing) { 
+        return parser->error; 
+    } 
+ 
+    aws_array_list_pop_back(&parser->callback_stack); 
+    return parser->error; 
+} 
+ 
+int aws_xml_node_get_name(const struct aws_xml_node *node, struct aws_byte_cursor *out_name) { 
+    AWS_PRECONDITION(node); 
+ 
+    if (out_name == NULL) { 
+        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'out_name' argument for aws_xml_node_get_name is invalid."); 
+        aws_raise_error(AWS_ERROR_INVALID_ARGUMENT); 
+        return AWS_OP_ERR; 
+    } 
+ 
+    *out_name = node->name; 
+    return AWS_OP_SUCCESS; 
+} 
+ 
+size_t aws_xml_node_get_num_attributes(const struct aws_xml_node *node) { 
+    AWS_PRECONDITION(node); 
+    return aws_array_list_length(&node->attributes); 
+} 
+ 
+int aws_xml_node_get_attribute( 
+    const struct aws_xml_node *node, 
+    size_t attribute_index, 
+    struct aws_xml_attribute *out_attribute) { 
+    AWS_PRECONDITION(node); 
+ 
+    if (out_attribute == NULL) { 
+        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'out_attribute' argument for aws_xml_node_get_attribute is invalid."); 
+        aws_raise_error(AWS_ERROR_INVALID_ARGUMENT); 
+        return AWS_OP_ERR; 
+    } 
+ 
+    return aws_array_list_get_at(&node->attributes, out_attribute, attribute_index); 
+} 
+ 
+/* advance the parser to the next sibling node.*/ 
+int s_node_next_sibling(struct aws_xml_parser *parser) { 
+    AWS_PRECONDITION(parser); 
+ 
+    uint8_t *next_location = memchr(parser->doc.ptr, '<', parser->doc.len); 
+ 
+    if (!next_location) { 
+        return parser->error; 
+    } 
+ 
+    aws_byte_cursor_advance(&parser->doc, next_location - parser->doc.ptr); 
+    uint8_t *end_location = memchr(parser->doc.ptr, '>', parser->doc.len); 
+ 
+    if (!end_location) { 
+        AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid."); 
+        return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING); 
+    } 
+ 
+    size_t node_name_len = end_location - next_location; 
+    aws_byte_cursor_advance(&parser->doc, end_location - parser->doc.ptr + 1); 
+ 
+    struct aws_byte_cursor node_decl_body = aws_byte_cursor_from_array(next_location + 1, node_name_len - 1); 
+ 
+    struct aws_xml_node sibling_node = { 
+        .doc_at_body = parser->doc, 
+        .processed = false, 
+    }; 
+ 
+    if (s_load_node_decl(parser, &node_decl_body, &sibling_node)) { 
+        return AWS_OP_ERR; 
+    } 
+ 
+    struct cb_stack_data stack_data; 
+    AWS_ZERO_STRUCT(stack_data); 
+    aws_array_list_back(&parser->callback_stack, &stack_data); 
+    AWS_FATAL_ASSERT(stack_data.cb); 
+ 
+    parser->stop_parsing = !stack_data.cb(parser, &sibling_node, stack_data.user_data); 
+ 
+    /* if the user simply returned while skipping the node altogether, go ahead and do the skip over. */ 
+    if (!sibling_node.processed) { 
+        if (s_advance_to_closing_tag(parser, &sibling_node, NULL)) { 
+            return AWS_OP_ERR; 
+        } 
+    } 
+ 
+    return parser->error; 
+}
author	orivej <orivej@yandex-team.ru>	2022-02-10 16:44:49 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:44:49 +0300
commit	718c552901d703c502ccbefdfc3c9028d608b947 (patch)
tree	46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/restricted/aws/aws-c-common/source/xml_parser.c
parent	e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff)
download	ydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz