aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/restricted/aws/aws-c-common/source/xml_parser.c
diff options
context:
space:
mode:
authororivej <orivej@yandex-team.ru>2022-02-10 16:44:49 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:49 +0300
commit718c552901d703c502ccbefdfc3c9028d608b947 (patch)
tree46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/restricted/aws/aws-c-common/source/xml_parser.c
parente9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff)
downloadydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz
Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/restricted/aws/aws-c-common/source/xml_parser.c')
-rw-r--r--contrib/restricted/aws/aws-c-common/source/xml_parser.c910
1 files changed, 455 insertions, 455 deletions
diff --git a/contrib/restricted/aws/aws-c-common/source/xml_parser.c b/contrib/restricted/aws/aws-c-common/source/xml_parser.c
index 692324ac9a..7fa4da3461 100644
--- a/contrib/restricted/aws/aws-c-common/source/xml_parser.c
+++ b/contrib/restricted/aws/aws-c-common/source/xml_parser.c
@@ -1,455 +1,455 @@
-/**
- * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
- * SPDX-License-Identifier: Apache-2.0.
- */
-
-#include <aws/common/array_list.h>
-#include <aws/common/logging.h>
-#include <aws/common/private/xml_parser_impl.h>
-
-#ifdef _MSC_VER
-/* allow non-constant declared initializers. */
-# pragma warning(disable : 4204)
-#endif
-
-static const size_t s_max_document_depth = 20;
-#define MAX_NAME_LEN ((size_t)256)
-#define NODE_CLOSE_OVERHEAD ((size_t)3)
-
-struct cb_stack_data {
- aws_xml_parser_on_node_encountered_fn *cb;
- void *user_data;
-};
-
-struct aws_xml_parser *aws_xml_parser_new(
- struct aws_allocator *allocator,
- const struct aws_xml_parser_options *options) {
-
- AWS_PRECONDITION(allocator);
- AWS_PRECONDITION(options);
-
- struct aws_xml_parser *parser = aws_mem_calloc(allocator, 1, sizeof(struct aws_xml_parser));
-
- if (parser == NULL) {
- return NULL;
- }
-
- parser->allocator = allocator;
- parser->doc = options->doc;
-
- parser->max_depth = s_max_document_depth;
- parser->error = AWS_OP_SUCCESS;
-
- if (options->max_depth) {
- parser->max_depth = options->max_depth;
- }
-
- if (aws_array_list_init_dynamic(&parser->callback_stack, allocator, 4, sizeof(struct cb_stack_data))) {
- aws_mem_release(allocator, parser);
- return NULL;
- }
-
- return parser;
-}
-
-void aws_xml_parser_destroy(struct aws_xml_parser *parser) {
- AWS_PRECONDITION(parser);
-
- aws_array_list_clean_up(&parser->callback_stack);
-
- aws_mem_release(parser->allocator, parser);
-}
-
-int s_node_next_sibling(struct aws_xml_parser *parser);
-
-static bool s_double_quote_fn(uint8_t value) {
- return value == '"';
-}
-
-/* load the node declaration line, parsing node name and attributes.
- *
- * something of the form:
- * <NodeName Attribute1=Value1 Attribute2=Value2 ...>
- * */
-static int s_load_node_decl(
- struct aws_xml_parser *parser,
- struct aws_byte_cursor *decl_body,
- struct aws_xml_node *node) {
- AWS_PRECONDITION(parser);
- AWS_PRECONDITION(decl_body);
- AWS_PRECONDITION(node);
-
- struct aws_array_list splits;
- AWS_ZERO_STRUCT(splits);
-
- AWS_ZERO_ARRAY(parser->split_scratch);
- aws_array_list_init_static(
- &splits, parser->split_scratch, AWS_ARRAY_SIZE(parser->split_scratch), sizeof(struct aws_byte_cursor));
-
- /* split by space, first split will be the node name, everything after will be attribute=value pairs. For now
- * we limit to 10 attributes, if this is exceeded we consider it invalid document. */
- if (aws_byte_cursor_split_on_char(decl_body, ' ', &splits)) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
- return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
- }
-
- size_t splits_count = aws_array_list_length(&splits);
-
- if (splits_count < 1) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
- return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
- }
-
- aws_array_list_get_at(&splits, &node->name, 0);
-
- AWS_ZERO_ARRAY(parser->attributes);
- if (splits.length > 1) {
- aws_array_list_init_static(
- &node->attributes,
- parser->attributes,
- AWS_ARRAY_SIZE(parser->attributes),
- sizeof(struct aws_xml_attribute));
-
- for (size_t i = 1; i < splits.length; ++i) {
- struct aws_byte_cursor attribute_pair;
- AWS_ZERO_STRUCT(attribute_pair);
- aws_array_list_get_at(&splits, &attribute_pair, i);
-
- struct aws_byte_cursor att_val_pair[2];
- AWS_ZERO_ARRAY(att_val_pair);
- struct aws_array_list att_val_pair_lst;
- AWS_ZERO_STRUCT(att_val_pair_lst);
- aws_array_list_init_static(&att_val_pair_lst, att_val_pair, 2, sizeof(struct aws_byte_cursor));
-
- if (!aws_byte_cursor_split_on_char(&attribute_pair, '=', &att_val_pair_lst)) {
- struct aws_xml_attribute attribute = {
- .name = att_val_pair[0],
- .value = aws_byte_cursor_trim_pred(&att_val_pair[1], s_double_quote_fn),
- };
- aws_array_list_push_back(&node->attributes, &attribute);
- }
- }
- }
-
- return AWS_OP_SUCCESS;
-}
-
-int aws_xml_parser_parse(
- struct aws_xml_parser *parser,
- aws_xml_parser_on_node_encountered_fn *on_node_encountered,
- void *user_data) {
-
- AWS_PRECONDITION(parser);
-
- if (on_node_encountered == NULL) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'on_node_encountered' argument for aws_xml_parser_parse is invalid.");
- aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
- return AWS_OP_ERR;
- }
-
- aws_array_list_clear(&parser->callback_stack);
-
- /* burn everything that precedes the actual xml nodes. */
- while (parser->doc.len) {
- uint8_t *start = memchr(parser->doc.ptr, '<', parser->doc.len);
- if (!start) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
- return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
- }
-
- uint8_t *location = memchr(parser->doc.ptr, '>', parser->doc.len);
-
- if (!location) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
- return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
- }
-
- aws_byte_cursor_advance(&parser->doc, start - parser->doc.ptr);
- /* if these are preamble statements, burn them. otherwise don't seek at all
- * and assume it's just the doc with no preamble statements. */
- if (*(parser->doc.ptr + 1) == '?' || *(parser->doc.ptr + 1) == '!') {
- /* nobody cares about the preamble */
- size_t advance = location - parser->doc.ptr + 1;
- aws_byte_cursor_advance(&parser->doc, advance);
- } else {
- break;
- }
- }
-
- /* now we should be at the start of the actual document. */
- struct cb_stack_data stack_data = {
- .cb = on_node_encountered,
- .user_data = user_data,
- };
-
- AWS_FATAL_ASSERT(!aws_array_list_push_back(&parser->callback_stack, &stack_data));
- return s_node_next_sibling(parser);
-}
-
-int s_advance_to_closing_tag(
- struct aws_xml_parser *parser,
- struct aws_xml_node *node,
- struct aws_byte_cursor *out_body) {
- AWS_PRECONDITION(parser);
- AWS_PRECONDITION(node);
-
- /* currently the max node name is 256 characters. This is arbitrary, but should be enough
- * for our uses. If we ever generalize this, we'll have to come back and rethink this. */
- uint8_t name_close[MAX_NAME_LEN + NODE_CLOSE_OVERHEAD] = {0};
- uint8_t name_open[MAX_NAME_LEN + NODE_CLOSE_OVERHEAD] = {0};
-
- struct aws_byte_buf closing_cmp_buf = aws_byte_buf_from_empty_array(name_close, sizeof(name_close));
- struct aws_byte_buf open_cmp_buf = aws_byte_buf_from_empty_array(name_open, sizeof(name_open));
-
- size_t closing_name_len = node->name.len + NODE_CLOSE_OVERHEAD;
-
- if (closing_name_len > node->doc_at_body.len) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
- parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
- return AWS_OP_ERR;
- }
-
- if (sizeof(name_close) < closing_name_len) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
- parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
- return AWS_OP_ERR;
- }
-
- struct aws_byte_cursor open_bracket = aws_byte_cursor_from_c_str("<");
- struct aws_byte_cursor close_token = aws_byte_cursor_from_c_str("/");
- struct aws_byte_cursor close_bracket = aws_byte_cursor_from_c_str(">");
-
- aws_byte_buf_append(&open_cmp_buf, &open_bracket);
- aws_byte_buf_append(&open_cmp_buf, &node->name);
-
- aws_byte_buf_append(&closing_cmp_buf, &open_bracket);
- aws_byte_buf_append(&closing_cmp_buf, &close_token);
- aws_byte_buf_append(&closing_cmp_buf, &node->name);
- aws_byte_buf_append(&closing_cmp_buf, &close_bracket);
-
- size_t depth_count = 1;
- struct aws_byte_cursor to_find_open = aws_byte_cursor_from_buf(&open_cmp_buf);
- struct aws_byte_cursor to_find_close = aws_byte_cursor_from_buf(&closing_cmp_buf);
- struct aws_byte_cursor close_find_result;
- AWS_ZERO_STRUCT(close_find_result);
- do {
- if (aws_byte_cursor_find_exact(&parser->doc, &to_find_close, &close_find_result)) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
- return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
- }
-
- /* if we find an opening node with the same name, before the closing tag keep going. */
- struct aws_byte_cursor open_find_result;
- AWS_ZERO_STRUCT(open_find_result);
-
- while (parser->doc.len) {
- if (!aws_byte_cursor_find_exact(&parser->doc, &to_find_open, &open_find_result)) {
- if (open_find_result.ptr < close_find_result.ptr) {
- size_t skip_len = open_find_result.ptr - parser->doc.ptr;
- aws_byte_cursor_advance(&parser->doc, skip_len + 1);
- depth_count++;
- continue;
- }
- }
- size_t skip_len = close_find_result.ptr - parser->doc.ptr;
- aws_byte_cursor_advance(&parser->doc, skip_len + closing_cmp_buf.len);
- depth_count--;
- break;
- }
- } while (depth_count > 0);
-
- size_t len = close_find_result.ptr - node->doc_at_body.ptr;
-
- if (out_body) {
- *out_body = aws_byte_cursor_from_array(node->doc_at_body.ptr, len);
- }
-
- return parser->error;
-}
-
-int aws_xml_node_as_body(struct aws_xml_parser *parser, struct aws_xml_node *node, struct aws_byte_cursor *out_body) {
- AWS_PRECONDITION(parser);
- AWS_PRECONDITION(node);
-
- node->processed = true;
- return s_advance_to_closing_tag(parser, node, out_body);
-}
-
-int aws_xml_node_traverse(
- struct aws_xml_parser *parser,
- struct aws_xml_node *node,
- aws_xml_parser_on_node_encountered_fn *on_node_encountered,
- void *user_data) {
- AWS_PRECONDITION(parser);
- AWS_PRECONDITION(node);
-
- if (on_node_encountered == NULL) {
- AWS_LOGF_ERROR(
- AWS_LS_COMMON_XML_PARSER, "Callback 'on_node_encountered' for aws_xml_node_traverse is invalid.");
- aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
- return AWS_OP_ERR;
- }
-
- node->processed = true;
- struct cb_stack_data stack_data = {
- .cb = on_node_encountered,
- .user_data = user_data,
- };
-
- size_t doc_depth = aws_array_list_length(&parser->callback_stack);
- if (doc_depth >= parser->max_depth) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
- parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
- return AWS_OP_ERR;
- }
-
- if (aws_array_list_push_back(&parser->callback_stack, &stack_data)) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
- parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
- return AWS_OP_ERR;
- }
-
- /* look for the next node at the current level. do this until we encounter the parent node's
- * closing tag. */
- while (!parser->stop_parsing && !parser->error) {
- uint8_t *next_location = memchr(parser->doc.ptr, '<', parser->doc.len);
-
- if (!next_location) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
- return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
- }
-
- uint8_t *end_location = memchr(parser->doc.ptr, '>', parser->doc.len);
-
- if (!end_location) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
- return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
- }
-
- bool parent_closed = false;
-
- if (*(next_location + 1) == '/') {
- parent_closed = true;
- }
-
- size_t node_name_len = end_location - next_location;
-
- aws_byte_cursor_advance(&parser->doc, end_location - parser->doc.ptr + 1);
-
- if (parent_closed) {
- break;
- }
-
- struct aws_byte_cursor decl_body = aws_byte_cursor_from_array(next_location + 1, node_name_len - 1);
-
- struct aws_xml_node next_node = {
- .doc_at_body = parser->doc,
- .processed = false,
- };
-
- if (s_load_node_decl(parser, &decl_body, &next_node)) {
- return AWS_OP_ERR;
- }
-
- if (!on_node_encountered(parser, &next_node, user_data)) {
- parser->stop_parsing = true;
- return parser->error;
- }
-
- /* if the user simply returned while skipping the node altogether, go ahead and do the skip over. */
- if (!parser->stop_parsing && !next_node.processed) {
- if (s_advance_to_closing_tag(parser, &next_node, NULL)) {
- return AWS_OP_ERR;
- }
- }
- }
-
- if (parser->stop_parsing) {
- return parser->error;
- }
-
- aws_array_list_pop_back(&parser->callback_stack);
- return parser->error;
-}
-
-int aws_xml_node_get_name(const struct aws_xml_node *node, struct aws_byte_cursor *out_name) {
- AWS_PRECONDITION(node);
-
- if (out_name == NULL) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'out_name' argument for aws_xml_node_get_name is invalid.");
- aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
- return AWS_OP_ERR;
- }
-
- *out_name = node->name;
- return AWS_OP_SUCCESS;
-}
-
-size_t aws_xml_node_get_num_attributes(const struct aws_xml_node *node) {
- AWS_PRECONDITION(node);
- return aws_array_list_length(&node->attributes);
-}
-
-int aws_xml_node_get_attribute(
- const struct aws_xml_node *node,
- size_t attribute_index,
- struct aws_xml_attribute *out_attribute) {
- AWS_PRECONDITION(node);
-
- if (out_attribute == NULL) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'out_attribute' argument for aws_xml_node_get_attribute is invalid.");
- aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
- return AWS_OP_ERR;
- }
-
- return aws_array_list_get_at(&node->attributes, out_attribute, attribute_index);
-}
-
-/* advance the parser to the next sibling node.*/
-int s_node_next_sibling(struct aws_xml_parser *parser) {
- AWS_PRECONDITION(parser);
-
- uint8_t *next_location = memchr(parser->doc.ptr, '<', parser->doc.len);
-
- if (!next_location) {
- return parser->error;
- }
-
- aws_byte_cursor_advance(&parser->doc, next_location - parser->doc.ptr);
- uint8_t *end_location = memchr(parser->doc.ptr, '>', parser->doc.len);
-
- if (!end_location) {
- AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
- return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
- }
-
- size_t node_name_len = end_location - next_location;
- aws_byte_cursor_advance(&parser->doc, end_location - parser->doc.ptr + 1);
-
- struct aws_byte_cursor node_decl_body = aws_byte_cursor_from_array(next_location + 1, node_name_len - 1);
-
- struct aws_xml_node sibling_node = {
- .doc_at_body = parser->doc,
- .processed = false,
- };
-
- if (s_load_node_decl(parser, &node_decl_body, &sibling_node)) {
- return AWS_OP_ERR;
- }
-
- struct cb_stack_data stack_data;
- AWS_ZERO_STRUCT(stack_data);
- aws_array_list_back(&parser->callback_stack, &stack_data);
- AWS_FATAL_ASSERT(stack_data.cb);
-
- parser->stop_parsing = !stack_data.cb(parser, &sibling_node, stack_data.user_data);
-
- /* if the user simply returned while skipping the node altogether, go ahead and do the skip over. */
- if (!sibling_node.processed) {
- if (s_advance_to_closing_tag(parser, &sibling_node, NULL)) {
- return AWS_OP_ERR;
- }
- }
-
- return parser->error;
-}
+/**
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0.
+ */
+
+#include <aws/common/array_list.h>
+#include <aws/common/logging.h>
+#include <aws/common/private/xml_parser_impl.h>
+
+#ifdef _MSC_VER
+/* allow non-constant declared initializers. */
+# pragma warning(disable : 4204)
+#endif
+
+static const size_t s_max_document_depth = 20;
+#define MAX_NAME_LEN ((size_t)256)
+#define NODE_CLOSE_OVERHEAD ((size_t)3)
+
+struct cb_stack_data {
+ aws_xml_parser_on_node_encountered_fn *cb;
+ void *user_data;
+};
+
+struct aws_xml_parser *aws_xml_parser_new(
+ struct aws_allocator *allocator,
+ const struct aws_xml_parser_options *options) {
+
+ AWS_PRECONDITION(allocator);
+ AWS_PRECONDITION(options);
+
+ struct aws_xml_parser *parser = aws_mem_calloc(allocator, 1, sizeof(struct aws_xml_parser));
+
+ if (parser == NULL) {
+ return NULL;
+ }
+
+ parser->allocator = allocator;
+ parser->doc = options->doc;
+
+ parser->max_depth = s_max_document_depth;
+ parser->error = AWS_OP_SUCCESS;
+
+ if (options->max_depth) {
+ parser->max_depth = options->max_depth;
+ }
+
+ if (aws_array_list_init_dynamic(&parser->callback_stack, allocator, 4, sizeof(struct cb_stack_data))) {
+ aws_mem_release(allocator, parser);
+ return NULL;
+ }
+
+ return parser;
+}
+
+void aws_xml_parser_destroy(struct aws_xml_parser *parser) {
+ AWS_PRECONDITION(parser);
+
+ aws_array_list_clean_up(&parser->callback_stack);
+
+ aws_mem_release(parser->allocator, parser);
+}
+
+int s_node_next_sibling(struct aws_xml_parser *parser);
+
+static bool s_double_quote_fn(uint8_t value) {
+ return value == '"';
+}
+
+/* load the node declaration line, parsing node name and attributes.
+ *
+ * something of the form:
+ * <NodeName Attribute1=Value1 Attribute2=Value2 ...>
+ * */
+static int s_load_node_decl(
+ struct aws_xml_parser *parser,
+ struct aws_byte_cursor *decl_body,
+ struct aws_xml_node *node) {
+ AWS_PRECONDITION(parser);
+ AWS_PRECONDITION(decl_body);
+ AWS_PRECONDITION(node);
+
+ struct aws_array_list splits;
+ AWS_ZERO_STRUCT(splits);
+
+ AWS_ZERO_ARRAY(parser->split_scratch);
+ aws_array_list_init_static(
+ &splits, parser->split_scratch, AWS_ARRAY_SIZE(parser->split_scratch), sizeof(struct aws_byte_cursor));
+
+ /* split by space, first split will be the node name, everything after will be attribute=value pairs. For now
+ * we limit to 10 attributes, if this is exceeded we consider it invalid document. */
+ if (aws_byte_cursor_split_on_char(decl_body, ' ', &splits)) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
+ return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
+ }
+
+ size_t splits_count = aws_array_list_length(&splits);
+
+ if (splits_count < 1) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
+ return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
+ }
+
+ aws_array_list_get_at(&splits, &node->name, 0);
+
+ AWS_ZERO_ARRAY(parser->attributes);
+ if (splits.length > 1) {
+ aws_array_list_init_static(
+ &node->attributes,
+ parser->attributes,
+ AWS_ARRAY_SIZE(parser->attributes),
+ sizeof(struct aws_xml_attribute));
+
+ for (size_t i = 1; i < splits.length; ++i) {
+ struct aws_byte_cursor attribute_pair;
+ AWS_ZERO_STRUCT(attribute_pair);
+ aws_array_list_get_at(&splits, &attribute_pair, i);
+
+ struct aws_byte_cursor att_val_pair[2];
+ AWS_ZERO_ARRAY(att_val_pair);
+ struct aws_array_list att_val_pair_lst;
+ AWS_ZERO_STRUCT(att_val_pair_lst);
+ aws_array_list_init_static(&att_val_pair_lst, att_val_pair, 2, sizeof(struct aws_byte_cursor));
+
+ if (!aws_byte_cursor_split_on_char(&attribute_pair, '=', &att_val_pair_lst)) {
+ struct aws_xml_attribute attribute = {
+ .name = att_val_pair[0],
+ .value = aws_byte_cursor_trim_pred(&att_val_pair[1], s_double_quote_fn),
+ };
+ aws_array_list_push_back(&node->attributes, &attribute);
+ }
+ }
+ }
+
+ return AWS_OP_SUCCESS;
+}
+
+int aws_xml_parser_parse(
+ struct aws_xml_parser *parser,
+ aws_xml_parser_on_node_encountered_fn *on_node_encountered,
+ void *user_data) {
+
+ AWS_PRECONDITION(parser);
+
+ if (on_node_encountered == NULL) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'on_node_encountered' argument for aws_xml_parser_parse is invalid.");
+ aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
+ return AWS_OP_ERR;
+ }
+
+ aws_array_list_clear(&parser->callback_stack);
+
+ /* burn everything that precedes the actual xml nodes. */
+ while (parser->doc.len) {
+ uint8_t *start = memchr(parser->doc.ptr, '<', parser->doc.len);
+ if (!start) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
+ return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
+ }
+
+ uint8_t *location = memchr(parser->doc.ptr, '>', parser->doc.len);
+
+ if (!location) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
+ return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
+ }
+
+ aws_byte_cursor_advance(&parser->doc, start - parser->doc.ptr);
+ /* if these are preamble statements, burn them. otherwise don't seek at all
+ * and assume it's just the doc with no preamble statements. */
+ if (*(parser->doc.ptr + 1) == '?' || *(parser->doc.ptr + 1) == '!') {
+ /* nobody cares about the preamble */
+ size_t advance = location - parser->doc.ptr + 1;
+ aws_byte_cursor_advance(&parser->doc, advance);
+ } else {
+ break;
+ }
+ }
+
+ /* now we should be at the start of the actual document. */
+ struct cb_stack_data stack_data = {
+ .cb = on_node_encountered,
+ .user_data = user_data,
+ };
+
+ AWS_FATAL_ASSERT(!aws_array_list_push_back(&parser->callback_stack, &stack_data));
+ return s_node_next_sibling(parser);
+}
+
+int s_advance_to_closing_tag(
+ struct aws_xml_parser *parser,
+ struct aws_xml_node *node,
+ struct aws_byte_cursor *out_body) {
+ AWS_PRECONDITION(parser);
+ AWS_PRECONDITION(node);
+
+ /* currently the max node name is 256 characters. This is arbitrary, but should be enough
+ * for our uses. If we ever generalize this, we'll have to come back and rethink this. */
+ uint8_t name_close[MAX_NAME_LEN + NODE_CLOSE_OVERHEAD] = {0};
+ uint8_t name_open[MAX_NAME_LEN + NODE_CLOSE_OVERHEAD] = {0};
+
+ struct aws_byte_buf closing_cmp_buf = aws_byte_buf_from_empty_array(name_close, sizeof(name_close));
+ struct aws_byte_buf open_cmp_buf = aws_byte_buf_from_empty_array(name_open, sizeof(name_open));
+
+ size_t closing_name_len = node->name.len + NODE_CLOSE_OVERHEAD;
+
+ if (closing_name_len > node->doc_at_body.len) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
+ parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
+ return AWS_OP_ERR;
+ }
+
+ if (sizeof(name_close) < closing_name_len) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
+ parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
+ return AWS_OP_ERR;
+ }
+
+ struct aws_byte_cursor open_bracket = aws_byte_cursor_from_c_str("<");
+ struct aws_byte_cursor close_token = aws_byte_cursor_from_c_str("/");
+ struct aws_byte_cursor close_bracket = aws_byte_cursor_from_c_str(">");
+
+ aws_byte_buf_append(&open_cmp_buf, &open_bracket);
+ aws_byte_buf_append(&open_cmp_buf, &node->name);
+
+ aws_byte_buf_append(&closing_cmp_buf, &open_bracket);
+ aws_byte_buf_append(&closing_cmp_buf, &close_token);
+ aws_byte_buf_append(&closing_cmp_buf, &node->name);
+ aws_byte_buf_append(&closing_cmp_buf, &close_bracket);
+
+ size_t depth_count = 1;
+ struct aws_byte_cursor to_find_open = aws_byte_cursor_from_buf(&open_cmp_buf);
+ struct aws_byte_cursor to_find_close = aws_byte_cursor_from_buf(&closing_cmp_buf);
+ struct aws_byte_cursor close_find_result;
+ AWS_ZERO_STRUCT(close_find_result);
+ do {
+ if (aws_byte_cursor_find_exact(&parser->doc, &to_find_close, &close_find_result)) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
+ return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
+ }
+
+ /* if we find an opening node with the same name, before the closing tag keep going. */
+ struct aws_byte_cursor open_find_result;
+ AWS_ZERO_STRUCT(open_find_result);
+
+ while (parser->doc.len) {
+ if (!aws_byte_cursor_find_exact(&parser->doc, &to_find_open, &open_find_result)) {
+ if (open_find_result.ptr < close_find_result.ptr) {
+ size_t skip_len = open_find_result.ptr - parser->doc.ptr;
+ aws_byte_cursor_advance(&parser->doc, skip_len + 1);
+ depth_count++;
+ continue;
+ }
+ }
+ size_t skip_len = close_find_result.ptr - parser->doc.ptr;
+ aws_byte_cursor_advance(&parser->doc, skip_len + closing_cmp_buf.len);
+ depth_count--;
+ break;
+ }
+ } while (depth_count > 0);
+
+ size_t len = close_find_result.ptr - node->doc_at_body.ptr;
+
+ if (out_body) {
+ *out_body = aws_byte_cursor_from_array(node->doc_at_body.ptr, len);
+ }
+
+ return parser->error;
+}
+
+int aws_xml_node_as_body(struct aws_xml_parser *parser, struct aws_xml_node *node, struct aws_byte_cursor *out_body) {
+ AWS_PRECONDITION(parser);
+ AWS_PRECONDITION(node);
+
+ node->processed = true;
+ return s_advance_to_closing_tag(parser, node, out_body);
+}
+
+int aws_xml_node_traverse(
+ struct aws_xml_parser *parser,
+ struct aws_xml_node *node,
+ aws_xml_parser_on_node_encountered_fn *on_node_encountered,
+ void *user_data) {
+ AWS_PRECONDITION(parser);
+ AWS_PRECONDITION(node);
+
+ if (on_node_encountered == NULL) {
+ AWS_LOGF_ERROR(
+ AWS_LS_COMMON_XML_PARSER, "Callback 'on_node_encountered' for aws_xml_node_traverse is invalid.");
+ aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
+ return AWS_OP_ERR;
+ }
+
+ node->processed = true;
+ struct cb_stack_data stack_data = {
+ .cb = on_node_encountered,
+ .user_data = user_data,
+ };
+
+ size_t doc_depth = aws_array_list_length(&parser->callback_stack);
+ if (doc_depth >= parser->max_depth) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
+ parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
+ return AWS_OP_ERR;
+ }
+
+ if (aws_array_list_push_back(&parser->callback_stack, &stack_data)) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
+ parser->error = aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
+ return AWS_OP_ERR;
+ }
+
+ /* look for the next node at the current level. do this until we encounter the parent node's
+ * closing tag. */
+ while (!parser->stop_parsing && !parser->error) {
+ uint8_t *next_location = memchr(parser->doc.ptr, '<', parser->doc.len);
+
+ if (!next_location) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
+ return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
+ }
+
+ uint8_t *end_location = memchr(parser->doc.ptr, '>', parser->doc.len);
+
+ if (!end_location) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
+ return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
+ }
+
+ bool parent_closed = false;
+
+ if (*(next_location + 1) == '/') {
+ parent_closed = true;
+ }
+
+ size_t node_name_len = end_location - next_location;
+
+ aws_byte_cursor_advance(&parser->doc, end_location - parser->doc.ptr + 1);
+
+ if (parent_closed) {
+ break;
+ }
+
+ struct aws_byte_cursor decl_body = aws_byte_cursor_from_array(next_location + 1, node_name_len - 1);
+
+ struct aws_xml_node next_node = {
+ .doc_at_body = parser->doc,
+ .processed = false,
+ };
+
+ if (s_load_node_decl(parser, &decl_body, &next_node)) {
+ return AWS_OP_ERR;
+ }
+
+ if (!on_node_encountered(parser, &next_node, user_data)) {
+ parser->stop_parsing = true;
+ return parser->error;
+ }
+
+ /* if the user simply returned while skipping the node altogether, go ahead and do the skip over. */
+ if (!parser->stop_parsing && !next_node.processed) {
+ if (s_advance_to_closing_tag(parser, &next_node, NULL)) {
+ return AWS_OP_ERR;
+ }
+ }
+ }
+
+ if (parser->stop_parsing) {
+ return parser->error;
+ }
+
+ aws_array_list_pop_back(&parser->callback_stack);
+ return parser->error;
+}
+
+int aws_xml_node_get_name(const struct aws_xml_node *node, struct aws_byte_cursor *out_name) {
+ AWS_PRECONDITION(node);
+
+ if (out_name == NULL) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'out_name' argument for aws_xml_node_get_name is invalid.");
+ aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
+ return AWS_OP_ERR;
+ }
+
+ *out_name = node->name;
+ return AWS_OP_SUCCESS;
+}
+
+size_t aws_xml_node_get_num_attributes(const struct aws_xml_node *node) {
+ AWS_PRECONDITION(node);
+ return aws_array_list_length(&node->attributes);
+}
+
+int aws_xml_node_get_attribute(
+ const struct aws_xml_node *node,
+ size_t attribute_index,
+ struct aws_xml_attribute *out_attribute) {
+ AWS_PRECONDITION(node);
+
+ if (out_attribute == NULL) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "'out_attribute' argument for aws_xml_node_get_attribute is invalid.");
+ aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
+ return AWS_OP_ERR;
+ }
+
+ return aws_array_list_get_at(&node->attributes, out_attribute, attribute_index);
+}
+
+/* advance the parser to the next sibling node.*/
+int s_node_next_sibling(struct aws_xml_parser *parser) {
+ AWS_PRECONDITION(parser);
+
+ uint8_t *next_location = memchr(parser->doc.ptr, '<', parser->doc.len);
+
+ if (!next_location) {
+ return parser->error;
+ }
+
+ aws_byte_cursor_advance(&parser->doc, next_location - parser->doc.ptr);
+ uint8_t *end_location = memchr(parser->doc.ptr, '>', parser->doc.len);
+
+ if (!end_location) {
+ AWS_LOGF_ERROR(AWS_LS_COMMON_XML_PARSER, "XML document is invalid.");
+ return aws_raise_error(AWS_ERROR_MALFORMED_INPUT_STRING);
+ }
+
+ size_t node_name_len = end_location - next_location;
+ aws_byte_cursor_advance(&parser->doc, end_location - parser->doc.ptr + 1);
+
+ struct aws_byte_cursor node_decl_body = aws_byte_cursor_from_array(next_location + 1, node_name_len - 1);
+
+ struct aws_xml_node sibling_node = {
+ .doc_at_body = parser->doc,
+ .processed = false,
+ };
+
+ if (s_load_node_decl(parser, &node_decl_body, &sibling_node)) {
+ return AWS_OP_ERR;
+ }
+
+ struct cb_stack_data stack_data;
+ AWS_ZERO_STRUCT(stack_data);
+ aws_array_list_back(&parser->callback_stack, &stack_data);
+ AWS_FATAL_ASSERT(stack_data.cb);
+
+ parser->stop_parsing = !stack_data.cb(parser, &sibling_node, stack_data.user_data);
+
+ /* if the user simply returned while skipping the node altogether, go ahead and do the skip over. */
+ if (!sibling_node.processed) {
+ if (s_advance_to_closing_tag(parser, &sibling_node, NULL)) {
+ return AWS_OP_ERR;
+ }
+ }
+
+ return parser->error;
+}