summaryrefslogtreecommitdiffstats
path: root/contrib/restricted/aws/aws-c-s3/source/s3_auto_ranged_get.c
diff options
context:
space:
mode:
authorthegeorg <[email protected]>2025-05-12 15:51:24 +0300
committerthegeorg <[email protected]>2025-05-12 16:06:27 +0300
commitd629bb70c8773d2c0c43f5088ddbb5a86d8c37ea (patch)
tree4f678e0d65ad08c800db21c657d3b0f71fafed06 /contrib/restricted/aws/aws-c-s3/source/s3_auto_ranged_get.c
parent92c4b696d7a1c03d54e13aff7a7c20a078d90dd7 (diff)
Update contrib/restricted/aws libraries to nixpkgs 24.05
commit_hash:f8083acb039e6005e820cdee77b84e0a6b6c6d6d
Diffstat (limited to 'contrib/restricted/aws/aws-c-s3/source/s3_auto_ranged_get.c')
-rw-r--r--contrib/restricted/aws/aws-c-s3/source/s3_auto_ranged_get.c545
1 files changed, 401 insertions, 144 deletions
diff --git a/contrib/restricted/aws/aws-c-s3/source/s3_auto_ranged_get.c b/contrib/restricted/aws/aws-c-s3/source/s3_auto_ranged_get.c
index 75689aaa424..a71d418e387 100644
--- a/contrib/restricted/aws/aws-c-s3/source/s3_auto_ranged_get.c
+++ b/contrib/restricted/aws/aws-c-s3/source/s3_auto_ranged_get.c
@@ -11,14 +11,13 @@
#include <aws/common/string.h>
#include <inttypes.h>
-#ifdef _MSC_VER
-/* sscanf warning (not currently scanning for strings) */
-# pragma warning(disable : 4996)
-#endif
-
+/* Dont use buffer pool when we know response size, and its below this number,
+ * i.e. when user provides explicit range that is small, ex. range = 1-100.
+ * Instead of going through the pool in that case, we just use a dynamic buffer
+ * for response (pre-mempool behavior). */
+const uint64_t s_min_size_response_for_pooling = 1 * 1024 * 1024;
const uint32_t s_conservative_max_requests_in_flight = 8;
const struct aws_byte_cursor g_application_xml_value = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("application/xml");
-const struct aws_byte_cursor g_object_size_value = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("ActualObjectSize");
static void s_s3_meta_request_auto_ranged_get_destroy(struct aws_s3_meta_request *meta_request);
@@ -27,9 +26,7 @@ static bool s_s3_auto_ranged_get_update(
uint32_t flags,
struct aws_s3_request **out_request);
-static int s_s3_auto_ranged_get_prepare_request(
- struct aws_s3_meta_request *meta_request,
- struct aws_s3_request *request);
+static struct aws_future_void *s_s3_auto_ranged_get_prepare_request(struct aws_s3_request *request);
static void s_s3_auto_ranged_get_request_finished(
struct aws_s3_meta_request *meta_request,
@@ -54,10 +51,10 @@ static int s_s3_auto_ranged_get_success_status(struct aws_s3_meta_request *meta_
AWS_PRECONDITION(auto_ranged_get);
if (auto_ranged_get->initial_message_has_range_header) {
- return AWS_S3_RESPONSE_STATUS_RANGE_SUCCESS;
+ return AWS_HTTP_STATUS_CODE_206_PARTIAL_CONTENT;
}
- return AWS_S3_RESPONSE_STATUS_SUCCESS;
+ return AWS_HTTP_STATUS_CODE_200_OK;
}
/* Allocate a new auto-ranged-get meta request. */
@@ -96,13 +93,36 @@ struct aws_s3_meta_request *aws_s3_meta_request_auto_ranged_get_new(
struct aws_http_headers *headers = aws_http_message_get_headers(auto_ranged_get->base.initial_request_message);
AWS_ASSERT(headers != NULL);
- auto_ranged_get->initial_message_has_range_header = aws_http_headers_has(headers, g_range_header_name);
+ if (aws_http_headers_has(headers, g_range_header_name)) {
+ auto_ranged_get->initial_message_has_range_header = true;
+ if (aws_s3_parse_request_range_header(
+ headers,
+ &auto_ranged_get->initial_message_has_start_range,
+ &auto_ranged_get->initial_message_has_end_range,
+ &auto_ranged_get->initial_range_start,
+ &auto_ranged_get->initial_range_end)) {
+ AWS_LOGF_ERROR(
+ AWS_LS_S3_META_REQUEST,
+ "id=%p Could not parse Range header for Auto-Ranged-Get Meta Request.",
+ (void *)auto_ranged_get);
+ goto on_error;
+ }
+ }
auto_ranged_get->initial_message_has_if_match_header = aws_http_headers_has(headers, g_if_match_header_name);
-
+ auto_ranged_get->synced_data.first_part_size = auto_ranged_get->base.part_size;
+ if (options->object_size_hint != NULL) {
+ auto_ranged_get->object_size_hint_available = true;
+ auto_ranged_get->object_size_hint = *options->object_size_hint;
+ }
AWS_LOGF_DEBUG(
AWS_LS_S3_META_REQUEST, "id=%p Created new Auto-Ranged Get Meta Request.", (void *)&auto_ranged_get->base);
return &auto_ranged_get->base;
+
+on_error:
+ /* This will also clean up the auto_ranged_get */
+ aws_s3_meta_request_release(&(auto_ranged_get->base));
+ return NULL;
}
static void s_s3_meta_request_auto_ranged_get_destroy(struct aws_s3_meta_request *meta_request) {
@@ -114,6 +134,56 @@ static void s_s3_meta_request_auto_ranged_get_destroy(struct aws_s3_meta_request
aws_mem_release(meta_request->allocator, auto_ranged_get);
}
+/*
+ * This function returns the type of first request which we will also use to discover overall object size.
+ */
+static enum aws_s3_auto_ranged_get_request_type s_s3_get_request_type_for_discovering_object_size(
+ const struct aws_s3_meta_request *meta_request) {
+ AWS_PRECONDITION(meta_request);
+ struct aws_s3_auto_ranged_get *auto_ranged_get = meta_request->impl;
+ AWS_ASSERT(auto_ranged_get);
+
+ /*
+ * When we attempt to download an empty file using the `AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_RANGE`
+ * request type, the request fails with an empty file error. We then reset `object_range_known`
+ * (`object_range_empty` is set to true) and try to download the file again with
+ * `AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_PART_NUMBER_1`. We send another request, even though there is
+ * no body, to provide successful response headers to the user. If the file is still empty, successful response
+ * headers will be provided to the users. Otherwise, the newer version of the file will be downloaded.
+ */
+ if (auto_ranged_get->synced_data.object_range_empty != 0) {
+ auto_ranged_get->synced_data.object_range_empty = 0;
+ return AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_PART_NUMBER_1;
+ }
+
+ /*
+ * If a range header exists but has no start-range (i.e. Range: bytes=-100), we perform a HeadRequest. If the
+ * start-range is unknown, we could potentially execute a request from the end-range and keep that request around
+ * until the meta request finishes. However, this approach involves the complexity of managing backpressure. For
+ * simplicity, we execute a HeadRequest if the start-range is not specified.
+ */
+ if (auto_ranged_get->initial_message_has_range_header != 0) {
+ return auto_ranged_get->initial_message_has_start_range
+ ? AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_RANGE
+ : AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_HEAD_OBJECT;
+ }
+
+ /* If we don't need checksum validation, then discover the size of the object while trying to get the first part. */
+ if (!meta_request->checksum_config.validate_response_checksum) {
+ return AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_RANGE;
+ }
+
+ /* If the object_size_hint indicates that it is a small one part file, then try to get the file directly
+ * TODO: Bypass memory limiter so that we don't overallocate memory for small files
+ */
+ if (auto_ranged_get->object_size_hint_available && auto_ranged_get->object_size_hint <= meta_request->part_size) {
+ return AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_PART_NUMBER_1;
+ }
+
+ /* Otherwise, do a headObject so that we can validate checksum if the file was uploaded as a single part */
+ return AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_HEAD_OBJECT;
+}
+
static bool s_s3_auto_ranged_get_update(
struct aws_s3_meta_request *meta_request,
uint32_t flags,
@@ -142,7 +212,7 @@ static bool s_s3_auto_ranged_get_update(
/* auto-ranged-gets make use of body streaming, which will hold onto response bodies if parts earlier in
* the file haven't arrived yet. This can potentially create a lot of backed up requests, causing us to
* hit our global request limit. To help mitigate this, when the "conservative" flag is passed in, we
- * only allow the total amount of requests being sent/streamed to be inside of a set limit. */
+ * only allow the total amount of requests being sent/streamed to be inside a set limit. */
if (num_requests_in_flight > s_conservative_max_requests_in_flight) {
goto has_work_remaining;
}
@@ -151,67 +221,110 @@ static bool s_s3_auto_ranged_get_update(
/* If the overall range of the object that we are trying to retrieve isn't known yet, then we need to send a
* request to figure that out. */
if (!auto_ranged_get->synced_data.object_range_known) {
-
- /* If there exists a range header or we require validation of the response checksum, we currently always
- * do a head request first.
- * S3 returns the checksum of the entire object from the HEAD response
- *
- * For the range header value could be parsed client-side, doing so presents a number of
- * complications. For example, the given range could be an unsatisfiable range, and might not even
- * specify a complete range. To keep things simple, we are currently relying on the service to handle
- * turning the Range header into a Content-Range response header.*/
- bool head_object_required = auto_ranged_get->initial_message_has_range_header != 0 ||
- meta_request->checksum_config.validate_response_checksum;
-
- if (head_object_required) {
- /* If the head object request hasn't been sent yet, then send it now. */
- if (!auto_ranged_get->synced_data.head_object_sent) {
+ if (auto_ranged_get->synced_data.head_object_sent ||
+ auto_ranged_get->synced_data.num_parts_requested > 0) {
+ goto has_work_remaining;
+ }
+ struct aws_s3_buffer_pool_ticket *ticket = NULL;
+ switch (s_s3_get_request_type_for_discovering_object_size(meta_request)) {
+ case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_HEAD_OBJECT:
+ AWS_LOGF_INFO(
+ AWS_LS_S3_META_REQUEST,
+ "id=%p: Doing a HeadObject to discover the size of the object",
+ (void *)meta_request);
request = aws_s3_request_new(
meta_request,
AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_HEAD_OBJECT,
- 0,
- AWS_S3_REQUEST_FLAG_RECORD_RESPONSE_HEADERS | AWS_S3_REQUEST_FLAG_PART_SIZE_RESPONSE_BODY);
-
- request->discovers_object_size = true;
-
+ AWS_S3_REQUEST_TYPE_HEAD_OBJECT,
+ 0 /*part_number*/,
+ AWS_S3_REQUEST_FLAG_RECORD_RESPONSE_HEADERS);
auto_ranged_get->synced_data.head_object_sent = true;
- }
- } else if (auto_ranged_get->synced_data.num_parts_requested == 0) {
- /* If we aren't using a head object, then discover the size of the object while trying to get the
- * first part. */
- request = aws_s3_request_new(
- meta_request,
- AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_PART,
- 1,
- AWS_S3_REQUEST_FLAG_RECORD_RESPONSE_HEADERS | AWS_S3_REQUEST_FLAG_PART_SIZE_RESPONSE_BODY);
-
- request->part_range_start = 0;
- request->part_range_end = meta_request->part_size - 1;
- request->discovers_object_size = true;
-
- ++auto_ranged_get->synced_data.num_parts_requested;
- }
+ break;
+ case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_PART_NUMBER_1:
+ AWS_LOGF_INFO(
+ AWS_LS_S3_META_REQUEST,
+ "id=%p: Doing a 'GET_OBJECT_WITH_PART_NUMBER_1' to discover the size of the object and get "
+ "the first part",
+ (void *)meta_request);
+ ticket = aws_s3_buffer_pool_reserve(meta_request->client->buffer_pool, meta_request->part_size);
+
+ if (ticket == NULL) {
+ goto has_work_remaining;
+ }
- goto has_work_remaining;
- }
+ request = aws_s3_request_new(
+ meta_request,
+ AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_PART_NUMBER_1,
+ AWS_S3_REQUEST_TYPE_GET_OBJECT,
+ 1 /*part_number*/,
+ AWS_S3_REQUEST_FLAG_RECORD_RESPONSE_HEADERS | AWS_S3_REQUEST_FLAG_PART_SIZE_RESPONSE_BODY);
+ request->ticket = ticket;
+ ++auto_ranged_get->synced_data.num_parts_requested;
+
+ break;
+ case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_RANGE:
+ AWS_LOGF_INFO(
+ AWS_LS_S3_META_REQUEST,
+ "id=%p: Doing a 'GET_OBJECT_WITH_RANGE' to discover the size of the object and get the "
+ "first part",
+ (void *)meta_request);
+
+ uint64_t part_range_start = 0;
+ uint64_t first_part_size = meta_request->part_size;
+ if (auto_ranged_get->initial_message_has_range_header) {
+ /*
+ * Currently, we only discover the size of the object when the initial range header includes
+ * a start-range. If we ever implement skipping the HeadRequest for a Range request without
+ * a start-range, this will need to update.
+ */
+ AWS_ASSERT(auto_ranged_get->initial_message_has_start_range);
+ part_range_start = auto_ranged_get->initial_range_start;
+
+ if (auto_ranged_get->initial_message_has_end_range) {
+ first_part_size = aws_min_u64(
+ first_part_size,
+ auto_ranged_get->initial_range_end - auto_ranged_get->initial_range_start + 1);
+ }
+
+ auto_ranged_get->synced_data.first_part_size = first_part_size;
+ }
+ AWS_LOGF_INFO(
+ AWS_LS_S3_META_REQUEST,
+ "id=%p: Doing a ranged get to discover the size of the object and get the first part",
+ (void *)meta_request);
+
+ if (first_part_size >= s_min_size_response_for_pooling) {
+ /* Note: explicitly reserving the whole part size
+ * even if expect to receive less data. Pool will
+ * reserve the whole part size for it anyways, so no
+ * reason getting a smaller chunk. */
+ ticket = aws_s3_buffer_pool_reserve(
+ meta_request->client->buffer_pool, (size_t)meta_request->part_size);
+
+ if (ticket == NULL) {
+ goto has_work_remaining;
+ }
+ } else {
+ ticket = NULL;
+ }
- /* If the object range is known and that range is empty, then we have an empty file to request. */
- if (auto_ranged_get->synced_data.object_range_start == 0 &&
- auto_ranged_get->synced_data.object_range_end == 0) {
- if (auto_ranged_get->synced_data.get_without_range_sent) {
- if (auto_ranged_get->synced_data.get_without_range_completed) {
- goto no_work_remaining;
- } else {
- goto has_work_remaining;
- }
+ request = aws_s3_request_new(
+ meta_request,
+ AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_RANGE,
+ AWS_S3_REQUEST_TYPE_GET_OBJECT,
+ 1 /*part_number*/,
+ AWS_S3_REQUEST_FLAG_RECORD_RESPONSE_HEADERS | AWS_S3_REQUEST_FLAG_PART_SIZE_RESPONSE_BODY);
+ request->ticket = ticket;
+ request->part_range_start = part_range_start;
+ request->part_range_end = part_range_start + first_part_size - 1; /* range-end is inclusive */
+ ++auto_ranged_get->synced_data.num_parts_requested;
+ break;
+ default:
+ AWS_FATAL_ASSERT(
+ 0 && "s_s3_get_request_type_for_discovering_object_size returned unexpected discover "
+ "object size request type");
}
- request = aws_s3_request_new(
- meta_request,
- AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_INITIAL_MESSAGE,
- 0,
- AWS_S3_REQUEST_FLAG_RECORD_RESPONSE_HEADERS);
-
- auto_ranged_get->synced_data.get_without_range_sent = true;
+ request->discovers_object_size = true;
goto has_work_remaining;
}
@@ -247,16 +360,27 @@ static bool s_s3_auto_ranged_get_update(
auto_ranged_get->synced_data.read_window_warning_issued = 0;
}
+ struct aws_s3_buffer_pool_ticket *ticket =
+ aws_s3_buffer_pool_reserve(meta_request->client->buffer_pool, meta_request->part_size);
+
+ if (ticket == NULL) {
+ goto has_work_remaining;
+ }
+
request = aws_s3_request_new(
meta_request,
- AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_PART,
- auto_ranged_get->synced_data.num_parts_requested + 1,
+ AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_RANGE,
+ AWS_S3_REQUEST_TYPE_GET_OBJECT,
+ auto_ranged_get->synced_data.num_parts_requested + 1 /*part_number*/,
AWS_S3_REQUEST_FLAG_PART_SIZE_RESPONSE_BODY);
- aws_s3_get_part_range(
+ request->ticket = ticket;
+
+ aws_s3_calculate_auto_ranged_get_part_range(
auto_ranged_get->synced_data.object_range_start,
auto_ranged_get->synced_data.object_range_end,
meta_request->part_size,
+ auto_ranged_get->synced_data.first_part_size,
request->part_number,
&request->part_range_start,
&request->part_range_end);
@@ -283,11 +407,6 @@ static bool s_s3_auto_ranged_get_update(
goto has_work_remaining;
}
- if (auto_ranged_get->synced_data.get_without_range_sent &&
- !auto_ranged_get->synced_data.get_without_range_completed) {
- goto has_work_remaining;
- }
-
/* If some parts are still being delivered to the caller, then wait for those to finish. */
if (meta_request->synced_data.num_parts_delivery_completed <
meta_request->synced_data.num_parts_delivery_sent) {
@@ -311,12 +430,16 @@ static bool s_s3_auto_ranged_get_update(
}
no_work_remaining:
+ /* If some events are still being delivered to caller, then wait for those to finish */
+ if (!work_remaining && aws_s3_meta_request_are_events_out_for_delivery_synced(meta_request)) {
+ work_remaining = true;
+ }
if (!work_remaining) {
aws_s3_meta_request_set_success_synced(meta_request, s_s3_auto_ranged_get_success_status(meta_request));
if (auto_ranged_get->synced_data.num_parts_checksum_validated ==
auto_ranged_get->synced_data.num_parts_requested) {
- /* If we have validated the checksum for every parts, we set the meta request level checksum validation
+ /* If we have validated the checksum for every part, we set the meta request level checksum validation
* result.*/
meta_request->synced_data.finish_result.did_validate = true;
meta_request->synced_data.finish_result.validation_algorithm = auto_ranged_get->validation_algorithm;
@@ -337,17 +460,18 @@ static bool s_s3_auto_ranged_get_update(
return work_remaining;
}
-/* Given a request, prepare it for sending based on its description. */
-static int s_s3_auto_ranged_get_prepare_request(
- struct aws_s3_meta_request *meta_request,
- struct aws_s3_request *request) {
- AWS_PRECONDITION(meta_request);
+/* Given a request, prepare it for sending based on its description.
+ * Currently, this is actually synchronous. */
+static struct aws_future_void *s_s3_auto_ranged_get_prepare_request(struct aws_s3_request *request) {
AWS_PRECONDITION(request);
+ struct aws_s3_meta_request *meta_request = request->meta_request;
/* Generate a new ranged get request based on the original message. */
struct aws_http_message *message = NULL;
struct aws_s3_auto_ranged_get *auto_ranged_get = meta_request->impl;
+ bool success = false;
+
switch (request->request_tag) {
case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_HEAD_OBJECT:
/* A head object will be a copy of the original headers but with a HEAD request method. */
@@ -357,16 +481,20 @@ static int s_s3_auto_ranged_get_prepare_request(
aws_http_message_set_request_method(message, g_head_method);
}
break;
- case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_PART:
+ case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_RANGE:
message = aws_s3_ranged_get_object_message_new(
meta_request->allocator,
meta_request->initial_request_message,
request->part_range_start,
request->part_range_end);
break;
- case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_INITIAL_MESSAGE:
+ case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_PART_NUMBER_1:
message = aws_s3_message_util_copy_http_message_no_body_all_headers(
meta_request->allocator, meta_request->initial_request_message);
+ if (message) {
+ aws_s3_message_util_set_multipart_request_path(
+ meta_request->allocator, NULL, request->part_number, false, message);
+ }
break;
}
@@ -376,7 +504,7 @@ static int s_s3_auto_ranged_get_prepare_request(
"id=%p Could not create message for request with tag %d for auto-ranged-get meta request.",
(void *)meta_request,
request->request_tag);
- goto message_alloc_failed;
+ goto finish;
}
if (meta_request->checksum_config.validate_response_checksum) {
aws_http_headers_set(aws_http_message_get_headers(message), g_request_validation_mode, g_enabled);
@@ -398,21 +526,30 @@ static int s_s3_auto_ranged_get_prepare_request(
aws_s3_request_setup_send_data(request, message);
aws_http_message_release(message);
+ /* Success! */
AWS_LOGF_DEBUG(
AWS_LS_S3_META_REQUEST,
- "id=%p: Created request %p for part %d",
+ "id=%p: Created request %p for part %d part sized %d",
(void *)meta_request,
(void *)request,
- request->part_number);
+ request->part_number,
+ request->has_part_size_response_body);
- return AWS_OP_SUCCESS;
+ success = true;
-message_alloc_failed:
-
- return AWS_OP_ERR;
+finish:;
+ struct aws_future_void *future = aws_future_void_new(meta_request->allocator);
+ if (success) {
+ aws_future_void_set_result(future);
+ } else {
+ aws_future_void_set_error(future, aws_last_error_or_unknown());
+ }
+ return future;
}
-/* Check the finish result of meta request, in case of the request failed because of downloading an empty file */
+/* Check the finish result of meta request.
+ * Return true if the request failed because it downloaded an empty file.
+ * Return false if the request failed for any other reason */
static bool s_check_empty_file_download_error(struct aws_s3_request *failed_request) {
struct aws_http_headers *failed_headers = failed_request->send_data.response_headers;
struct aws_byte_buf failed_body = failed_request->send_data.response_body;
@@ -423,12 +560,11 @@ static bool s_check_empty_file_download_error(struct aws_s3_request *failed_requ
/* Content type found */
if (aws_byte_cursor_eq_ignore_case(&content_type, &g_application_xml_value)) {
/* XML response */
- struct aws_byte_cursor body_cursor = aws_byte_cursor_from_buf(&failed_body);
- struct aws_string *size =
- aws_xml_get_top_level_tag(failed_request->allocator, &g_object_size_value, &body_cursor);
- bool check_size = aws_string_eq_c_str(size, "0");
- aws_string_destroy(size);
- if (check_size) {
+ struct aws_byte_cursor xml_doc = aws_byte_cursor_from_buf(&failed_body);
+ const char *path_to_size[] = {"Error", "ActualObjectSize", NULL};
+ struct aws_byte_cursor size = {0};
+ aws_xml_get_body_at_path(failed_request->allocator, xml_doc, path_to_size, &size);
+ if (aws_byte_cursor_eq_c_str(&size, "0")) {
return true;
}
}
@@ -437,22 +573,28 @@ static bool s_check_empty_file_download_error(struct aws_s3_request *failed_requ
return false;
}
-static int s_discover_object_range_and_content_length(
+static int s_discover_object_range_and_size(
struct aws_s3_meta_request *meta_request,
struct aws_s3_request *request,
int error_code,
- uint64_t *out_total_content_length,
uint64_t *out_object_range_start,
- uint64_t *out_object_range_end) {
- AWS_PRECONDITION(out_total_content_length);
+ uint64_t *out_object_range_end,
+ uint64_t *out_object_size,
+ uint64_t *out_first_part_size,
+ bool *out_empty_file_error) {
+
+ AWS_PRECONDITION(out_object_size);
AWS_PRECONDITION(out_object_range_start);
AWS_PRECONDITION(out_object_range_end);
+ AWS_PRECONDITION(out_first_part_size);
int result = AWS_OP_ERR;
- uint64_t total_content_length = 0;
+ uint64_t content_length = 0;
+ uint64_t object_size = 0;
uint64_t object_range_start = 0;
uint64_t object_range_end = 0;
+ uint64_t first_part_size = 0;
AWS_ASSERT(request->discovers_object_size);
struct aws_s3_auto_ranged_get *auto_ranged_get = meta_request->impl;
@@ -466,7 +608,7 @@ static int s_discover_object_range_and_content_length(
/* There should be a Content-Length header that indicates the total size of the range.*/
if (aws_s3_parse_content_length_response_header(
- meta_request->allocator, request->send_data.response_headers, &total_content_length)) {
+ meta_request->allocator, request->send_data.response_headers, &content_length)) {
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST,
@@ -477,16 +619,19 @@ static int s_discover_object_range_and_content_length(
}
/* if the inital message had a ranged header, there should also be a Content-Range header that specifies the
- * object range and total object size. Otherwise the size and range should be equal to the
+ * object range and total object size. Otherwise, the size and range should be equal to the
* total_content_length. */
if (!auto_ranged_get->initial_message_has_range_header) {
- object_range_end = total_content_length - 1;
+ object_size = content_length;
+ if (content_length > 0) {
+ object_range_end = content_length - 1; /* range-end is inclusive */
+ }
} else if (aws_s3_parse_content_range_response_header(
meta_request->allocator,
request->send_data.response_headers,
&object_range_start,
&object_range_end,
- NULL)) {
+ &object_size)) {
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST,
@@ -498,22 +643,58 @@ static int s_discover_object_range_and_content_length(
result = AWS_OP_SUCCESS;
break;
- case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_PART:
+ case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_PART_NUMBER_1:
+ AWS_ASSERT(request->part_number == 1);
+ AWS_ASSERT(request->send_data.response_headers != NULL);
+ /* There should be a Content-Length header that indicates the size of first part. */
+ if (aws_s3_parse_content_length_response_header(
+ meta_request->allocator, request->send_data.response_headers, &content_length)) {
+
+ AWS_LOGF_ERROR(
+ AWS_LS_S3_META_REQUEST,
+ "id=%p Could not find content-length header for request %p",
+ (void *)meta_request,
+ (void *)request);
+ break;
+ }
+ first_part_size = content_length;
+
+ if (first_part_size > 0) {
+ /* Parse the object size from the part response. */
+ if (aws_s3_parse_content_range_response_header(
+ meta_request->allocator, request->send_data.response_headers, NULL, NULL, &object_size)) {
+
+ AWS_LOGF_ERROR(
+ AWS_LS_S3_META_REQUEST,
+ "id=%p Could not find content-range header for request %p",
+ (void *)meta_request,
+ (void *)request);
+ break;
+ }
+ /* When discovering the object size via GET_OBJECT_WITH_PART_NUMBER_1, the object range is the entire
+ * object. */
+ object_range_start = 0;
+ object_range_end = object_size - 1; /* range-end is inclusive */
+ }
+
+ result = AWS_OP_SUCCESS;
+ break;
+ case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_RANGE:
AWS_ASSERT(request->part_number == 1);
if (error_code != AWS_ERROR_SUCCESS) {
/* If we hit an empty file while trying to discover the object-size via part, then this request
failure
* is as designed. */
- if (s_check_empty_file_download_error(request)) {
+ if (!auto_ranged_get->initial_message_has_range_header && s_check_empty_file_download_error(request)) {
AWS_LOGF_DEBUG(
AWS_LS_S3_META_REQUEST,
"id=%p Detected empty file with request %p. Sending new request without range header.",
(void *)meta_request,
(void *)request);
- total_content_length = 0ULL;
-
+ object_size = 0ULL;
+ *out_empty_file_error = true;
result = AWS_OP_SUCCESS;
} else {
/* Otherwise, resurface the error code. */
@@ -526,7 +707,11 @@ static int s_discover_object_range_and_content_length(
/* Parse the object size from the part response. */
if (aws_s3_parse_content_range_response_header(
- meta_request->allocator, request->send_data.response_headers, NULL, NULL, &total_content_length)) {
+ meta_request->allocator,
+ request->send_data.response_headers,
+ &object_range_start,
+ &object_range_end,
+ &object_size)) {
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST,
@@ -536,11 +721,17 @@ static int s_discover_object_range_and_content_length(
break;
}
-
- /* When discovering the object size via first-part, the object range is the entire object. */
- object_range_start = 0;
- object_range_end = total_content_length - 1;
-
+ if (auto_ranged_get->initial_message_has_range_header) {
+ if (auto_ranged_get->initial_message_has_end_range) {
+ object_range_end = aws_min_u64(object_size - 1, auto_ranged_get->initial_range_end);
+ } else {
+ object_range_end = object_size - 1;
+ }
+ } else {
+ /* When discovering the object size via GET_OBJECT_WITH_RANGE, the object range is the entire object. */
+ object_range_start = 0;
+ object_range_end = object_size - 1; /* range-end is inclusive */
+ }
result = AWS_OP_SUCCESS;
break;
default:
@@ -549,9 +740,10 @@ static int s_discover_object_range_and_content_length(
}
if (result == AWS_OP_SUCCESS) {
- *out_total_content_length = total_content_length;
+ *out_object_size = object_size;
*out_object_range_start = object_range_start;
*out_object_range_end = object_range_end;
+ *out_first_part_size = first_part_size;
}
return result;
@@ -568,25 +760,33 @@ static void s_s3_auto_ranged_get_request_finished(
struct aws_s3_auto_ranged_get *auto_ranged_get = meta_request->impl;
AWS_PRECONDITION(auto_ranged_get);
- uint64_t total_content_length = 0ULL;
uint64_t object_range_start = 0ULL;
uint64_t object_range_end = 0ULL;
+ uint64_t object_size = 0ULL;
+ uint64_t first_part_size = 0ULL;
bool found_object_size = false;
bool request_failed = error_code != AWS_ERROR_SUCCESS;
+ bool first_part_size_mismatch = (error_code == AWS_ERROR_S3_INTERNAL_PART_SIZE_MISMATCH_RETRYING_WITH_RANGE);
+ bool empty_file_error = false;
if (request->discovers_object_size) {
-
- /* Try to discover the object-range and content length.*/
- if (s_discover_object_range_and_content_length(
- meta_request, request, error_code, &total_content_length, &object_range_start, &object_range_end)) {
+ /* Try to discover the object-range and object-size.*/
+ if (s_discover_object_range_and_size(
+ meta_request,
+ request,
+ error_code,
+ &object_range_start,
+ &object_range_end,
+ &object_size,
+ &first_part_size,
+ &empty_file_error)) {
error_code = aws_last_error_or_unknown();
goto update_synced_data;
}
-
- if (!request_failed && !auto_ranged_get->initial_message_has_if_match_header) {
+ if ((!request_failed || first_part_size_mismatch) && !auto_ranged_get->initial_message_has_if_match_header) {
AWS_ASSERT(auto_ranged_get->etag == NULL);
struct aws_byte_cursor etag_header_value;
@@ -609,22 +809,37 @@ static void s_s3_auto_ranged_get_request_finished(
error_code = AWS_ERROR_SUCCESS;
found_object_size = true;
- if (meta_request->headers_callback != NULL) {
+ if (!empty_file_error && meta_request->headers_callback != NULL) {
struct aws_http_headers *response_headers = aws_http_headers_new(meta_request->allocator);
copy_http_headers(request->send_data.response_headers, response_headers);
- /* If this request is a part, then the content range isn't applicable. */
- if (request->request_tag == AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_PART) {
- /* For now, we can assume that discovery of size via the first part of the object does not apply to
- * breaking up a ranged request. If it ever does, then we will need to repopulate this header. */
- AWS_ASSERT(!auto_ranged_get->initial_message_has_range_header);
-
- aws_http_headers_erase(response_headers, g_content_range_header_name);
+ if (request->request_tag == AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_RANGE ||
+ request->request_tag == AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_PART_NUMBER_1) {
+
+ if (auto_ranged_get->initial_message_has_range_header) {
+ /* Populate the header with object_range */
+ char content_range_buffer[64] = "";
+ snprintf(
+ content_range_buffer,
+ sizeof(content_range_buffer),
+ "bytes %" PRIu64 "-%" PRIu64 "/%" PRIu64,
+ object_range_start,
+ object_range_end,
+ object_size);
+ aws_http_headers_set(
+ response_headers,
+ g_content_range_header_name,
+ aws_byte_cursor_from_c_str(content_range_buffer));
+ } else {
+ /* content range isn't applicable. */
+ aws_http_headers_erase(response_headers, g_content_range_header_name);
+ }
}
+ uint64_t content_length = object_size ? object_range_end - object_range_start + 1 : 0;
char content_length_buffer[64] = "";
- snprintf(content_length_buffer, sizeof(content_length_buffer), "%" PRIu64, total_content_length);
+ snprintf(content_length_buffer, sizeof(content_length_buffer), "%" PRIu64, content_length);
aws_http_headers_set(
response_headers, g_content_length_header_name, aws_byte_cursor_from_c_str(content_length_buffer));
@@ -647,16 +862,25 @@ update_synced_data:
/* BEGIN CRITICAL SECTION */
{
aws_s3_meta_request_lock_synced_data(meta_request);
+ bool finishing_metrics = true;
/* If the object range was found, then record it. */
if (found_object_size) {
AWS_ASSERT(!auto_ranged_get->synced_data.object_range_known);
-
auto_ranged_get->synced_data.object_range_known = true;
+ auto_ranged_get->synced_data.object_range_empty = (object_size == 0);
auto_ranged_get->synced_data.object_range_start = object_range_start;
auto_ranged_get->synced_data.object_range_end = object_range_end;
- auto_ranged_get->synced_data.total_num_parts =
- aws_s3_get_num_parts(meta_request->part_size, object_range_start, object_range_end);
+ if (!first_part_size_mismatch && first_part_size) {
+ auto_ranged_get->synced_data.first_part_size = first_part_size;
+ }
+ if (auto_ranged_get->synced_data.object_range_empty == 0) {
+ auto_ranged_get->synced_data.total_num_parts = aws_s3_calculate_auto_ranged_get_num_parts(
+ meta_request->part_size,
+ auto_ranged_get->synced_data.first_part_size,
+ object_range_start,
+ object_range_end);
+ }
}
switch (request->request_tag) {
@@ -664,7 +888,27 @@ update_synced_data:
auto_ranged_get->synced_data.head_object_completed = true;
AWS_LOGF_DEBUG(AWS_LS_S3_META_REQUEST, "id=%p Head object completed.", (void *)meta_request);
break;
- case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_PART:
+ case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_PART_NUMBER_1:
+ AWS_LOGF_DEBUG(AWS_LS_S3_META_REQUEST, "id=%p Get Part Number completed.", (void *)meta_request);
+ if (first_part_size_mismatch && found_object_size) {
+ /* We canceled GET_OBJECT_WITH_PART_NUMBER_1 request because the Content-Length was bigger than
+ * part_size. Try to fetch the first part again as a ranged get */
+ auto_ranged_get->synced_data.num_parts_requested = 0;
+ break;
+ }
+ /* fall through */
+ case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_GET_OBJECT_WITH_RANGE:
+ if (empty_file_error) {
+ /*
+ * Try to download the object again using GET_OBJECT_WITH_PART_NUMBER_1. If the file is still
+ * empty, successful response headers will be provided to users. If not, the newer version of the
+ * file will be downloaded.
+ */
+ auto_ranged_get->synced_data.num_parts_requested = 0;
+ auto_ranged_get->synced_data.object_range_known = 0;
+ break;
+ }
+
++auto_ranged_get->synced_data.num_parts_completed;
if (!request_failed) {
@@ -680,7 +924,23 @@ update_synced_data:
}
++auto_ranged_get->synced_data.num_parts_successful;
+ /* Send progress_callback for delivery on io_event_loop thread */
+ if (meta_request->progress_callback != NULL) {
+ struct aws_s3_meta_request_event event = {.type = AWS_S3_META_REQUEST_EVENT_PROGRESS};
+ event.u.progress.info.bytes_transferred = request->send_data.response_body.len;
+ if (auto_ranged_get->synced_data.object_range_empty) {
+ event.u.progress.info.content_length = 0;
+ } else {
+ /* Note that range-end is inclusive */
+ event.u.progress.info.content_length = auto_ranged_get->synced_data.object_range_end + 1 -
+ auto_ranged_get->synced_data.object_range_start;
+ }
+ aws_s3_meta_request_add_event_for_delivery_synced(meta_request, &event);
+ }
+
aws_s3_meta_request_stream_response_body_synced(meta_request, request);
+ /* The body of the request is queued to be streamed, don't finish the metrics yet. */
+ finishing_metrics = false;
AWS_LOGF_DEBUG(
AWS_LS_S3_META_REQUEST,
@@ -693,11 +953,6 @@ update_synced_data:
++auto_ranged_get->synced_data.num_parts_failed;
}
break;
- case AWS_S3_AUTO_RANGE_GET_REQUEST_TYPE_INITIAL_MESSAGE:
- AWS_LOGF_DEBUG(
- AWS_LS_S3_META_REQUEST, "id=%p Get of file using initial message completed.", (void *)meta_request);
- auto_ranged_get->synced_data.get_without_range_completed = true;
- break;
}
if (error_code != AWS_ERROR_SUCCESS) {
@@ -715,7 +970,9 @@ update_synced_data:
meta_request->synced_data.finish_result.validation_algorithm = request->validation_algorithm;
}
}
-
+ if (finishing_metrics) {
+ aws_s3_request_finish_up_metrics_synced(request, meta_request);
+ }
aws_s3_meta_request_unlock_synced_data(meta_request);
}
/* END CRITICAL SECTION */