diff options
author | gebetix <gebetix@yandex-team.ru> | 2022-02-10 16:48:54 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:48:54 +0300 |
commit | e14861bee6e628671fd6da83422252ca41bd2e6d (patch) | |
tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /contrib | |
parent | 41f5af039e2324748594194babdd070299be3a3b (diff) | |
download | ydb-e14861bee6e628671fd6da83422252ca41bd2e6d.tar.gz |
Restoring authorship annotation for <gebetix@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib')
31 files changed, 10323 insertions, 10323 deletions
diff --git a/contrib/python/boto3/ya.make b/contrib/python/boto3/ya.make index 711e1f16ac..bd12ae12b7 100644 --- a/contrib/python/boto3/ya.make +++ b/contrib/python/boto3/ya.make @@ -9,7 +9,7 @@ LICENSE(Apache-2.0) PEERDIR( contrib/python/botocore contrib/python/jmespath - contrib/python/s3transfer + contrib/python/s3transfer ) NO_LINT() diff --git a/contrib/python/s3transfer/py2/s3transfer/__init__.py b/contrib/python/s3transfer/py2/s3transfer/__init__.py index d109318a0a..63735d66b1 100644 --- a/contrib/python/s3transfer/py2/s3transfer/__init__.py +++ b/contrib/python/s3transfer/py2/s3transfer/__init__.py @@ -1,741 +1,741 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""Abstractions over S3's upload/download operations. - -This module provides high level abstractions for efficient -uploads/downloads. It handles several things for the user: - -* Automatically switching to multipart transfers when - a file is over a specific size threshold -* Uploading/downloading a file in parallel -* Throttling based on max bandwidth -* Progress callbacks to monitor transfers -* Retries. While botocore handles retries for streaming uploads, - it is not possible for it to handle retries for streaming - downloads. This module handles retries for both cases so - you don't need to implement any retry logic yourself. - -This module has a reasonable set of defaults. It also allows you -to configure many aspects of the transfer process including: - -* Multipart threshold size -* Max parallel downloads -* Max bandwidth -* Socket timeouts -* Retry amounts - -There is no support for s3->s3 multipart copies at this -time. - - -.. _ref_s3transfer_usage: - -Usage -===== - -The simplest way to use this module is: - -.. code-block:: python - - client = boto3.client('s3', 'us-west-2') - transfer = S3Transfer(client) - # Upload /tmp/myfile to s3://bucket/key - transfer.upload_file('/tmp/myfile', 'bucket', 'key') - - # Download s3://bucket/key to /tmp/myfile - transfer.download_file('bucket', 'key', '/tmp/myfile') - -The ``upload_file`` and ``download_file`` methods also accept -``**kwargs``, which will be forwarded through to the corresponding -client operation. Here are a few examples using ``upload_file``:: - - # Making the object public - transfer.upload_file('/tmp/myfile', 'bucket', 'key', - extra_args={'ACL': 'public-read'}) - - # Setting metadata - transfer.upload_file('/tmp/myfile', 'bucket', 'key', - extra_args={'Metadata': {'a': 'b', 'c': 'd'}}) - - # Setting content type - transfer.upload_file('/tmp/myfile.json', 'bucket', 'key', - extra_args={'ContentType': "application/json"}) - - -The ``S3Transfer`` clas also supports progress callbacks so you can -provide transfer progress to users. Both the ``upload_file`` and -``download_file`` methods take an optional ``callback`` parameter. -Here's an example of how to print a simple progress percentage -to the user: - -.. code-block:: python - - class ProgressPercentage(object): - def __init__(self, filename): - self._filename = filename - self._size = float(os.path.getsize(filename)) - self._seen_so_far = 0 - self._lock = threading.Lock() - - def __call__(self, bytes_amount): - # To simplify we'll assume this is hooked up - # to a single filename. - with self._lock: - self._seen_so_far += bytes_amount - percentage = (self._seen_so_far / self._size) * 100 - sys.stdout.write( - "\r%s %s / %s (%.2f%%)" % (self._filename, self._seen_so_far, - self._size, percentage)) - sys.stdout.flush() - - - transfer = S3Transfer(boto3.client('s3', 'us-west-2')) - # Upload /tmp/myfile to s3://bucket/key and print upload progress. - transfer.upload_file('/tmp/myfile', 'bucket', 'key', - callback=ProgressPercentage('/tmp/myfile')) - - - -You can also provide a TransferConfig object to the S3Transfer -object that gives you more fine grained control over the -transfer. For example: - -.. code-block:: python - - client = boto3.client('s3', 'us-west-2') - config = TransferConfig( - multipart_threshold=8 * 1024 * 1024, - max_concurrency=10, - num_download_attempts=10, - ) - transfer = S3Transfer(client, config) - transfer.upload_file('/tmp/foo', 'bucket', 'key') - - -""" -import os -import math -import functools -import logging -import socket -import threading -import random -import string -import concurrent.futures - -from botocore.compat import six +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Abstractions over S3's upload/download operations. + +This module provides high level abstractions for efficient +uploads/downloads. It handles several things for the user: + +* Automatically switching to multipart transfers when + a file is over a specific size threshold +* Uploading/downloading a file in parallel +* Throttling based on max bandwidth +* Progress callbacks to monitor transfers +* Retries. While botocore handles retries for streaming uploads, + it is not possible for it to handle retries for streaming + downloads. This module handles retries for both cases so + you don't need to implement any retry logic yourself. + +This module has a reasonable set of defaults. It also allows you +to configure many aspects of the transfer process including: + +* Multipart threshold size +* Max parallel downloads +* Max bandwidth +* Socket timeouts +* Retry amounts + +There is no support for s3->s3 multipart copies at this +time. + + +.. _ref_s3transfer_usage: + +Usage +===== + +The simplest way to use this module is: + +.. code-block:: python + + client = boto3.client('s3', 'us-west-2') + transfer = S3Transfer(client) + # Upload /tmp/myfile to s3://bucket/key + transfer.upload_file('/tmp/myfile', 'bucket', 'key') + + # Download s3://bucket/key to /tmp/myfile + transfer.download_file('bucket', 'key', '/tmp/myfile') + +The ``upload_file`` and ``download_file`` methods also accept +``**kwargs``, which will be forwarded through to the corresponding +client operation. Here are a few examples using ``upload_file``:: + + # Making the object public + transfer.upload_file('/tmp/myfile', 'bucket', 'key', + extra_args={'ACL': 'public-read'}) + + # Setting metadata + transfer.upload_file('/tmp/myfile', 'bucket', 'key', + extra_args={'Metadata': {'a': 'b', 'c': 'd'}}) + + # Setting content type + transfer.upload_file('/tmp/myfile.json', 'bucket', 'key', + extra_args={'ContentType': "application/json"}) + + +The ``S3Transfer`` clas also supports progress callbacks so you can +provide transfer progress to users. Both the ``upload_file`` and +``download_file`` methods take an optional ``callback`` parameter. +Here's an example of how to print a simple progress percentage +to the user: + +.. code-block:: python + + class ProgressPercentage(object): + def __init__(self, filename): + self._filename = filename + self._size = float(os.path.getsize(filename)) + self._seen_so_far = 0 + self._lock = threading.Lock() + + def __call__(self, bytes_amount): + # To simplify we'll assume this is hooked up + # to a single filename. + with self._lock: + self._seen_so_far += bytes_amount + percentage = (self._seen_so_far / self._size) * 100 + sys.stdout.write( + "\r%s %s / %s (%.2f%%)" % (self._filename, self._seen_so_far, + self._size, percentage)) + sys.stdout.flush() + + + transfer = S3Transfer(boto3.client('s3', 'us-west-2')) + # Upload /tmp/myfile to s3://bucket/key and print upload progress. + transfer.upload_file('/tmp/myfile', 'bucket', 'key', + callback=ProgressPercentage('/tmp/myfile')) + + + +You can also provide a TransferConfig object to the S3Transfer +object that gives you more fine grained control over the +transfer. For example: + +.. code-block:: python + + client = boto3.client('s3', 'us-west-2') + config = TransferConfig( + multipart_threshold=8 * 1024 * 1024, + max_concurrency=10, + num_download_attempts=10, + ) + transfer = S3Transfer(client, config) + transfer.upload_file('/tmp/foo', 'bucket', 'key') + + +""" +import os +import math +import functools +import logging +import socket +import threading +import random +import string +import concurrent.futures + +from botocore.compat import six from botocore.vendored.requests.packages.urllib3.exceptions import \ - ReadTimeoutError -from botocore.exceptions import IncompleteReadError - -import s3transfer.compat -from s3transfer.exceptions import RetriesExceededError, S3UploadFailedError - - -__author__ = 'Amazon Web Services' + ReadTimeoutError +from botocore.exceptions import IncompleteReadError + +import s3transfer.compat +from s3transfer.exceptions import RetriesExceededError, S3UploadFailedError + + +__author__ = 'Amazon Web Services' __version__ = '0.4.2' - - -class NullHandler(logging.Handler): - def emit(self, record): - pass - - -logger = logging.getLogger(__name__) -logger.addHandler(NullHandler()) - -queue = six.moves.queue - -MB = 1024 * 1024 -SHUTDOWN_SENTINEL = object() - - -def random_file_extension(num_digits=8): - return ''.join(random.choice(string.hexdigits) for _ in range(num_digits)) - - -def disable_upload_callbacks(request, operation_name, **kwargs): - if operation_name in ['PutObject', 'UploadPart'] and \ - hasattr(request.body, 'disable_callback'): - request.body.disable_callback() - - -def enable_upload_callbacks(request, operation_name, **kwargs): - if operation_name in ['PutObject', 'UploadPart'] and \ - hasattr(request.body, 'enable_callback'): - request.body.enable_callback() - - -class QueueShutdownError(Exception): - pass - - -class ReadFileChunk(object): - def __init__(self, fileobj, start_byte, chunk_size, full_file_size, - callback=None, enable_callback=True): - """ - - Given a file object shown below: - - |___________________________________________________| - 0 | | full_file_size - |----chunk_size---| - start_byte - - :type fileobj: file - :param fileobj: File like object - - :type start_byte: int - :param start_byte: The first byte from which to start reading. - - :type chunk_size: int - :param chunk_size: The max chunk size to read. Trying to read - pass the end of the chunk size will behave like you've - reached the end of the file. - - :type full_file_size: int - :param full_file_size: The entire content length associated - with ``fileobj``. - - :type callback: function(amount_read) - :param callback: Called whenever data is read from this object. - - """ - self._fileobj = fileobj - self._start_byte = start_byte - self._size = self._calculate_file_size( - self._fileobj, requested_size=chunk_size, - start_byte=start_byte, actual_file_size=full_file_size) - self._fileobj.seek(self._start_byte) - self._amount_read = 0 - self._callback = callback - self._callback_enabled = enable_callback - - @classmethod - def from_filename(cls, filename, start_byte, chunk_size, callback=None, - enable_callback=True): - """Convenience factory function to create from a filename. - - :type start_byte: int - :param start_byte: The first byte from which to start reading. - - :type chunk_size: int - :param chunk_size: The max chunk size to read. Trying to read - pass the end of the chunk size will behave like you've - reached the end of the file. - - :type full_file_size: int - :param full_file_size: The entire content length associated - with ``fileobj``. - - :type callback: function(amount_read) - :param callback: Called whenever data is read from this object. - - :type enable_callback: bool - :param enable_callback: Indicate whether to invoke callback - during read() calls. - - :rtype: ``ReadFileChunk`` - :return: A new instance of ``ReadFileChunk`` - - """ - f = open(filename, 'rb') - file_size = os.fstat(f.fileno()).st_size - return cls(f, start_byte, chunk_size, file_size, callback, - enable_callback) - - def _calculate_file_size(self, fileobj, requested_size, start_byte, - actual_file_size): - max_chunk_size = actual_file_size - start_byte - return min(max_chunk_size, requested_size) - - def read(self, amount=None): - if amount is None: - amount_to_read = self._size - self._amount_read - else: - amount_to_read = min(self._size - self._amount_read, amount) - data = self._fileobj.read(amount_to_read) - self._amount_read += len(data) - if self._callback is not None and self._callback_enabled: - self._callback(len(data)) - return data - - def enable_callback(self): - self._callback_enabled = True - - def disable_callback(self): - self._callback_enabled = False - - def seek(self, where): - self._fileobj.seek(self._start_byte + where) - if self._callback is not None and self._callback_enabled: - # To also rewind the callback() for an accurate progress report - self._callback(where - self._amount_read) - self._amount_read = where - - def close(self): - self._fileobj.close() - - def tell(self): - return self._amount_read - - def __len__(self): - # __len__ is defined because requests will try to determine the length - # of the stream to set a content length. In the normal case - # of the file it will just stat the file, but we need to change that - # behavior. By providing a __len__, requests will use that instead - # of stat'ing the file. - return self._size - - def __enter__(self): - return self - - def __exit__(self, *args, **kwargs): - self.close() - - def __iter__(self): - # This is a workaround for http://bugs.python.org/issue17575 - # Basically httplib will try to iterate over the contents, even - # if its a file like object. This wasn't noticed because we've - # already exhausted the stream so iterating over the file immediately - # stops, which is what we're simulating here. - return iter([]) - - -class StreamReaderProgress(object): - """Wrapper for a read only stream that adds progress callbacks.""" - def __init__(self, stream, callback=None): - self._stream = stream - self._callback = callback - - def read(self, *args, **kwargs): - value = self._stream.read(*args, **kwargs) - if self._callback is not None: - self._callback(len(value)) - return value - - -class OSUtils(object): - def get_file_size(self, filename): - return os.path.getsize(filename) - - def open_file_chunk_reader(self, filename, start_byte, size, callback): - return ReadFileChunk.from_filename(filename, start_byte, - size, callback, - enable_callback=False) - - def open(self, filename, mode): - return open(filename, mode) - - def remove_file(self, filename): - """Remove a file, noop if file does not exist.""" - # Unlike os.remove, if the file does not exist, - # then this method does nothing. - try: - os.remove(filename) - except OSError: - pass - - def rename_file(self, current_filename, new_filename): - s3transfer.compat.rename_file(current_filename, new_filename) - - -class MultipartUploader(object): - # These are the extra_args that need to be forwarded onto - # subsequent upload_parts. - UPLOAD_PART_ARGS = [ - 'SSECustomerKey', - 'SSECustomerAlgorithm', - 'SSECustomerKeyMD5', - 'RequestPayer', - ] - - def __init__(self, client, config, osutil, - executor_cls=concurrent.futures.ThreadPoolExecutor): - self._client = client - self._config = config - self._os = osutil - self._executor_cls = executor_cls - - def _extra_upload_part_args(self, extra_args): - # Only the args in UPLOAD_PART_ARGS actually need to be passed - # onto the upload_part calls. - upload_parts_args = {} - for key, value in extra_args.items(): - if key in self.UPLOAD_PART_ARGS: - upload_parts_args[key] = value - return upload_parts_args - - def upload_file(self, filename, bucket, key, callback, extra_args): - response = self._client.create_multipart_upload(Bucket=bucket, - Key=key, **extra_args) - upload_id = response['UploadId'] - try: - parts = self._upload_parts(upload_id, filename, bucket, key, - callback, extra_args) - except Exception as e: - logger.debug("Exception raised while uploading parts, " - "aborting multipart upload.", exc_info=True) - self._client.abort_multipart_upload( - Bucket=bucket, Key=key, UploadId=upload_id) - raise S3UploadFailedError( - "Failed to upload %s to %s: %s" % ( - filename, '/'.join([bucket, key]), e)) - self._client.complete_multipart_upload( - Bucket=bucket, Key=key, UploadId=upload_id, - MultipartUpload={'Parts': parts}) - - def _upload_parts(self, upload_id, filename, bucket, key, callback, - extra_args): - upload_parts_extra_args = self._extra_upload_part_args(extra_args) - parts = [] - part_size = self._config.multipart_chunksize - num_parts = int( - math.ceil(self._os.get_file_size(filename) / float(part_size))) - max_workers = self._config.max_concurrency - with self._executor_cls(max_workers=max_workers) as executor: - upload_partial = functools.partial( - self._upload_one_part, filename, bucket, key, upload_id, - part_size, upload_parts_extra_args, callback) - for part in executor.map(upload_partial, range(1, num_parts + 1)): - parts.append(part) - return parts - - def _upload_one_part(self, filename, bucket, key, - upload_id, part_size, extra_args, - callback, part_number): - open_chunk_reader = self._os.open_file_chunk_reader - with open_chunk_reader(filename, part_size * (part_number - 1), - part_size, callback) as body: - response = self._client.upload_part( - Bucket=bucket, Key=key, - UploadId=upload_id, PartNumber=part_number, Body=body, - **extra_args) - etag = response['ETag'] - return {'ETag': etag, 'PartNumber': part_number} - - -class ShutdownQueue(queue.Queue): - """A queue implementation that can be shutdown. - - Shutting down a queue means that this class adds a - trigger_shutdown method that will trigger all subsequent - calls to put() to fail with a ``QueueShutdownError``. - - It purposefully deviates from queue.Queue, and is *not* meant - to be a drop in replacement for ``queue.Queue``. - - """ - def _init(self, maxsize): - self._shutdown = False - self._shutdown_lock = threading.Lock() - # queue.Queue is an old style class so we don't use super(). - return queue.Queue._init(self, maxsize) - - def trigger_shutdown(self): - with self._shutdown_lock: - self._shutdown = True - logger.debug("The IO queue is now shutdown.") - - def put(self, item): - # Note: this is not sufficient, it's still possible to deadlock! - # Need to hook into the condition vars used by this class. - with self._shutdown_lock: - if self._shutdown: - raise QueueShutdownError("Cannot put item to queue when " - "queue has been shutdown.") - return queue.Queue.put(self, item) - - -class MultipartDownloader(object): - def __init__(self, client, config, osutil, - executor_cls=concurrent.futures.ThreadPoolExecutor): - self._client = client - self._config = config - self._os = osutil - self._executor_cls = executor_cls - self._ioqueue = ShutdownQueue(self._config.max_io_queue) - - def download_file(self, bucket, key, filename, object_size, - extra_args, callback=None): - with self._executor_cls(max_workers=2) as controller: - # 1 thread for the future that manages the uploading of files - # 1 thread for the future that manages IO writes. - download_parts_handler = functools.partial( - self._download_file_as_future, - bucket, key, filename, object_size, callback) - parts_future = controller.submit(download_parts_handler) - - io_writes_handler = functools.partial( - self._perform_io_writes, filename) - io_future = controller.submit(io_writes_handler) - results = concurrent.futures.wait( - [parts_future, io_future], - return_when=concurrent.futures.FIRST_EXCEPTION) - self._process_future_results(results) - - def _process_future_results(self, futures): - finished, unfinished = futures - for future in finished: - future.result() - - def _download_file_as_future(self, bucket, key, filename, object_size, - callback): - part_size = self._config.multipart_chunksize - num_parts = int(math.ceil(object_size / float(part_size))) - max_workers = self._config.max_concurrency - download_partial = functools.partial( - self._download_range, bucket, key, filename, - part_size, num_parts, callback) - try: - with self._executor_cls(max_workers=max_workers) as executor: - list(executor.map(download_partial, range(num_parts))) - finally: - self._ioqueue.put(SHUTDOWN_SENTINEL) - - def _calculate_range_param(self, part_size, part_index, num_parts): - start_range = part_index * part_size - if part_index == num_parts - 1: - end_range = '' - else: - end_range = start_range + part_size - 1 - range_param = 'bytes=%s-%s' % (start_range, end_range) - return range_param - - def _download_range(self, bucket, key, filename, - part_size, num_parts, callback, part_index): - try: - range_param = self._calculate_range_param( - part_size, part_index, num_parts) - - max_attempts = self._config.num_download_attempts - last_exception = None - for i in range(max_attempts): - try: - logger.debug("Making get_object call.") - response = self._client.get_object( - Bucket=bucket, Key=key, Range=range_param) - streaming_body = StreamReaderProgress( - response['Body'], callback) - buffer_size = 1024 * 16 - current_index = part_size * part_index - for chunk in iter(lambda: streaming_body.read(buffer_size), - b''): - self._ioqueue.put((current_index, chunk)) - current_index += len(chunk) - return - except (socket.timeout, socket.error, - ReadTimeoutError, IncompleteReadError) as e: - logger.debug("Retrying exception caught (%s), " - "retrying request, (attempt %s / %s)", e, i, - max_attempts, exc_info=True) - last_exception = e - continue - raise RetriesExceededError(last_exception) - finally: - logger.debug("EXITING _download_range for part: %s", part_index) - - def _perform_io_writes(self, filename): - with self._os.open(filename, 'wb') as f: - while True: - task = self._ioqueue.get() - if task is SHUTDOWN_SENTINEL: - logger.debug("Shutdown sentinel received in IO handler, " - "shutting down IO handler.") - return - else: - try: - offset, data = task - f.seek(offset) - f.write(data) - except Exception as e: - logger.debug("Caught exception in IO thread: %s", - e, exc_info=True) - self._ioqueue.trigger_shutdown() - raise - - -class TransferConfig(object): - def __init__(self, - multipart_threshold=8 * MB, - max_concurrency=10, - multipart_chunksize=8 * MB, - num_download_attempts=5, - max_io_queue=100): - self.multipart_threshold = multipart_threshold - self.max_concurrency = max_concurrency - self.multipart_chunksize = multipart_chunksize - self.num_download_attempts = num_download_attempts - self.max_io_queue = max_io_queue - - -class S3Transfer(object): - - ALLOWED_DOWNLOAD_ARGS = [ - 'VersionId', - 'SSECustomerAlgorithm', - 'SSECustomerKey', - 'SSECustomerKeyMD5', - 'RequestPayer', - ] - - ALLOWED_UPLOAD_ARGS = [ - 'ACL', - 'CacheControl', - 'ContentDisposition', - 'ContentEncoding', - 'ContentLanguage', - 'ContentType', - 'Expires', - 'GrantFullControl', - 'GrantRead', - 'GrantReadACP', - 'GrantWriteACL', - 'Metadata', - 'RequestPayer', - 'ServerSideEncryption', - 'StorageClass', - 'SSECustomerAlgorithm', - 'SSECustomerKey', - 'SSECustomerKeyMD5', - 'SSEKMSKeyId', + + +class NullHandler(logging.Handler): + def emit(self, record): + pass + + +logger = logging.getLogger(__name__) +logger.addHandler(NullHandler()) + +queue = six.moves.queue + +MB = 1024 * 1024 +SHUTDOWN_SENTINEL = object() + + +def random_file_extension(num_digits=8): + return ''.join(random.choice(string.hexdigits) for _ in range(num_digits)) + + +def disable_upload_callbacks(request, operation_name, **kwargs): + if operation_name in ['PutObject', 'UploadPart'] and \ + hasattr(request.body, 'disable_callback'): + request.body.disable_callback() + + +def enable_upload_callbacks(request, operation_name, **kwargs): + if operation_name in ['PutObject', 'UploadPart'] and \ + hasattr(request.body, 'enable_callback'): + request.body.enable_callback() + + +class QueueShutdownError(Exception): + pass + + +class ReadFileChunk(object): + def __init__(self, fileobj, start_byte, chunk_size, full_file_size, + callback=None, enable_callback=True): + """ + + Given a file object shown below: + + |___________________________________________________| + 0 | | full_file_size + |----chunk_size---| + start_byte + + :type fileobj: file + :param fileobj: File like object + + :type start_byte: int + :param start_byte: The first byte from which to start reading. + + :type chunk_size: int + :param chunk_size: The max chunk size to read. Trying to read + pass the end of the chunk size will behave like you've + reached the end of the file. + + :type full_file_size: int + :param full_file_size: The entire content length associated + with ``fileobj``. + + :type callback: function(amount_read) + :param callback: Called whenever data is read from this object. + + """ + self._fileobj = fileobj + self._start_byte = start_byte + self._size = self._calculate_file_size( + self._fileobj, requested_size=chunk_size, + start_byte=start_byte, actual_file_size=full_file_size) + self._fileobj.seek(self._start_byte) + self._amount_read = 0 + self._callback = callback + self._callback_enabled = enable_callback + + @classmethod + def from_filename(cls, filename, start_byte, chunk_size, callback=None, + enable_callback=True): + """Convenience factory function to create from a filename. + + :type start_byte: int + :param start_byte: The first byte from which to start reading. + + :type chunk_size: int + :param chunk_size: The max chunk size to read. Trying to read + pass the end of the chunk size will behave like you've + reached the end of the file. + + :type full_file_size: int + :param full_file_size: The entire content length associated + with ``fileobj``. + + :type callback: function(amount_read) + :param callback: Called whenever data is read from this object. + + :type enable_callback: bool + :param enable_callback: Indicate whether to invoke callback + during read() calls. + + :rtype: ``ReadFileChunk`` + :return: A new instance of ``ReadFileChunk`` + + """ + f = open(filename, 'rb') + file_size = os.fstat(f.fileno()).st_size + return cls(f, start_byte, chunk_size, file_size, callback, + enable_callback) + + def _calculate_file_size(self, fileobj, requested_size, start_byte, + actual_file_size): + max_chunk_size = actual_file_size - start_byte + return min(max_chunk_size, requested_size) + + def read(self, amount=None): + if amount is None: + amount_to_read = self._size - self._amount_read + else: + amount_to_read = min(self._size - self._amount_read, amount) + data = self._fileobj.read(amount_to_read) + self._amount_read += len(data) + if self._callback is not None and self._callback_enabled: + self._callback(len(data)) + return data + + def enable_callback(self): + self._callback_enabled = True + + def disable_callback(self): + self._callback_enabled = False + + def seek(self, where): + self._fileobj.seek(self._start_byte + where) + if self._callback is not None and self._callback_enabled: + # To also rewind the callback() for an accurate progress report + self._callback(where - self._amount_read) + self._amount_read = where + + def close(self): + self._fileobj.close() + + def tell(self): + return self._amount_read + + def __len__(self): + # __len__ is defined because requests will try to determine the length + # of the stream to set a content length. In the normal case + # of the file it will just stat the file, but we need to change that + # behavior. By providing a __len__, requests will use that instead + # of stat'ing the file. + return self._size + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + self.close() + + def __iter__(self): + # This is a workaround for http://bugs.python.org/issue17575 + # Basically httplib will try to iterate over the contents, even + # if its a file like object. This wasn't noticed because we've + # already exhausted the stream so iterating over the file immediately + # stops, which is what we're simulating here. + return iter([]) + + +class StreamReaderProgress(object): + """Wrapper for a read only stream that adds progress callbacks.""" + def __init__(self, stream, callback=None): + self._stream = stream + self._callback = callback + + def read(self, *args, **kwargs): + value = self._stream.read(*args, **kwargs) + if self._callback is not None: + self._callback(len(value)) + return value + + +class OSUtils(object): + def get_file_size(self, filename): + return os.path.getsize(filename) + + def open_file_chunk_reader(self, filename, start_byte, size, callback): + return ReadFileChunk.from_filename(filename, start_byte, + size, callback, + enable_callback=False) + + def open(self, filename, mode): + return open(filename, mode) + + def remove_file(self, filename): + """Remove a file, noop if file does not exist.""" + # Unlike os.remove, if the file does not exist, + # then this method does nothing. + try: + os.remove(filename) + except OSError: + pass + + def rename_file(self, current_filename, new_filename): + s3transfer.compat.rename_file(current_filename, new_filename) + + +class MultipartUploader(object): + # These are the extra_args that need to be forwarded onto + # subsequent upload_parts. + UPLOAD_PART_ARGS = [ + 'SSECustomerKey', + 'SSECustomerAlgorithm', + 'SSECustomerKeyMD5', + 'RequestPayer', + ] + + def __init__(self, client, config, osutil, + executor_cls=concurrent.futures.ThreadPoolExecutor): + self._client = client + self._config = config + self._os = osutil + self._executor_cls = executor_cls + + def _extra_upload_part_args(self, extra_args): + # Only the args in UPLOAD_PART_ARGS actually need to be passed + # onto the upload_part calls. + upload_parts_args = {} + for key, value in extra_args.items(): + if key in self.UPLOAD_PART_ARGS: + upload_parts_args[key] = value + return upload_parts_args + + def upload_file(self, filename, bucket, key, callback, extra_args): + response = self._client.create_multipart_upload(Bucket=bucket, + Key=key, **extra_args) + upload_id = response['UploadId'] + try: + parts = self._upload_parts(upload_id, filename, bucket, key, + callback, extra_args) + except Exception as e: + logger.debug("Exception raised while uploading parts, " + "aborting multipart upload.", exc_info=True) + self._client.abort_multipart_upload( + Bucket=bucket, Key=key, UploadId=upload_id) + raise S3UploadFailedError( + "Failed to upload %s to %s: %s" % ( + filename, '/'.join([bucket, key]), e)) + self._client.complete_multipart_upload( + Bucket=bucket, Key=key, UploadId=upload_id, + MultipartUpload={'Parts': parts}) + + def _upload_parts(self, upload_id, filename, bucket, key, callback, + extra_args): + upload_parts_extra_args = self._extra_upload_part_args(extra_args) + parts = [] + part_size = self._config.multipart_chunksize + num_parts = int( + math.ceil(self._os.get_file_size(filename) / float(part_size))) + max_workers = self._config.max_concurrency + with self._executor_cls(max_workers=max_workers) as executor: + upload_partial = functools.partial( + self._upload_one_part, filename, bucket, key, upload_id, + part_size, upload_parts_extra_args, callback) + for part in executor.map(upload_partial, range(1, num_parts + 1)): + parts.append(part) + return parts + + def _upload_one_part(self, filename, bucket, key, + upload_id, part_size, extra_args, + callback, part_number): + open_chunk_reader = self._os.open_file_chunk_reader + with open_chunk_reader(filename, part_size * (part_number - 1), + part_size, callback) as body: + response = self._client.upload_part( + Bucket=bucket, Key=key, + UploadId=upload_id, PartNumber=part_number, Body=body, + **extra_args) + etag = response['ETag'] + return {'ETag': etag, 'PartNumber': part_number} + + +class ShutdownQueue(queue.Queue): + """A queue implementation that can be shutdown. + + Shutting down a queue means that this class adds a + trigger_shutdown method that will trigger all subsequent + calls to put() to fail with a ``QueueShutdownError``. + + It purposefully deviates from queue.Queue, and is *not* meant + to be a drop in replacement for ``queue.Queue``. + + """ + def _init(self, maxsize): + self._shutdown = False + self._shutdown_lock = threading.Lock() + # queue.Queue is an old style class so we don't use super(). + return queue.Queue._init(self, maxsize) + + def trigger_shutdown(self): + with self._shutdown_lock: + self._shutdown = True + logger.debug("The IO queue is now shutdown.") + + def put(self, item): + # Note: this is not sufficient, it's still possible to deadlock! + # Need to hook into the condition vars used by this class. + with self._shutdown_lock: + if self._shutdown: + raise QueueShutdownError("Cannot put item to queue when " + "queue has been shutdown.") + return queue.Queue.put(self, item) + + +class MultipartDownloader(object): + def __init__(self, client, config, osutil, + executor_cls=concurrent.futures.ThreadPoolExecutor): + self._client = client + self._config = config + self._os = osutil + self._executor_cls = executor_cls + self._ioqueue = ShutdownQueue(self._config.max_io_queue) + + def download_file(self, bucket, key, filename, object_size, + extra_args, callback=None): + with self._executor_cls(max_workers=2) as controller: + # 1 thread for the future that manages the uploading of files + # 1 thread for the future that manages IO writes. + download_parts_handler = functools.partial( + self._download_file_as_future, + bucket, key, filename, object_size, callback) + parts_future = controller.submit(download_parts_handler) + + io_writes_handler = functools.partial( + self._perform_io_writes, filename) + io_future = controller.submit(io_writes_handler) + results = concurrent.futures.wait( + [parts_future, io_future], + return_when=concurrent.futures.FIRST_EXCEPTION) + self._process_future_results(results) + + def _process_future_results(self, futures): + finished, unfinished = futures + for future in finished: + future.result() + + def _download_file_as_future(self, bucket, key, filename, object_size, + callback): + part_size = self._config.multipart_chunksize + num_parts = int(math.ceil(object_size / float(part_size))) + max_workers = self._config.max_concurrency + download_partial = functools.partial( + self._download_range, bucket, key, filename, + part_size, num_parts, callback) + try: + with self._executor_cls(max_workers=max_workers) as executor: + list(executor.map(download_partial, range(num_parts))) + finally: + self._ioqueue.put(SHUTDOWN_SENTINEL) + + def _calculate_range_param(self, part_size, part_index, num_parts): + start_range = part_index * part_size + if part_index == num_parts - 1: + end_range = '' + else: + end_range = start_range + part_size - 1 + range_param = 'bytes=%s-%s' % (start_range, end_range) + return range_param + + def _download_range(self, bucket, key, filename, + part_size, num_parts, callback, part_index): + try: + range_param = self._calculate_range_param( + part_size, part_index, num_parts) + + max_attempts = self._config.num_download_attempts + last_exception = None + for i in range(max_attempts): + try: + logger.debug("Making get_object call.") + response = self._client.get_object( + Bucket=bucket, Key=key, Range=range_param) + streaming_body = StreamReaderProgress( + response['Body'], callback) + buffer_size = 1024 * 16 + current_index = part_size * part_index + for chunk in iter(lambda: streaming_body.read(buffer_size), + b''): + self._ioqueue.put((current_index, chunk)) + current_index += len(chunk) + return + except (socket.timeout, socket.error, + ReadTimeoutError, IncompleteReadError) as e: + logger.debug("Retrying exception caught (%s), " + "retrying request, (attempt %s / %s)", e, i, + max_attempts, exc_info=True) + last_exception = e + continue + raise RetriesExceededError(last_exception) + finally: + logger.debug("EXITING _download_range for part: %s", part_index) + + def _perform_io_writes(self, filename): + with self._os.open(filename, 'wb') as f: + while True: + task = self._ioqueue.get() + if task is SHUTDOWN_SENTINEL: + logger.debug("Shutdown sentinel received in IO handler, " + "shutting down IO handler.") + return + else: + try: + offset, data = task + f.seek(offset) + f.write(data) + except Exception as e: + logger.debug("Caught exception in IO thread: %s", + e, exc_info=True) + self._ioqueue.trigger_shutdown() + raise + + +class TransferConfig(object): + def __init__(self, + multipart_threshold=8 * MB, + max_concurrency=10, + multipart_chunksize=8 * MB, + num_download_attempts=5, + max_io_queue=100): + self.multipart_threshold = multipart_threshold + self.max_concurrency = max_concurrency + self.multipart_chunksize = multipart_chunksize + self.num_download_attempts = num_download_attempts + self.max_io_queue = max_io_queue + + +class S3Transfer(object): + + ALLOWED_DOWNLOAD_ARGS = [ + 'VersionId', + 'SSECustomerAlgorithm', + 'SSECustomerKey', + 'SSECustomerKeyMD5', + 'RequestPayer', + ] + + ALLOWED_UPLOAD_ARGS = [ + 'ACL', + 'CacheControl', + 'ContentDisposition', + 'ContentEncoding', + 'ContentLanguage', + 'ContentType', + 'Expires', + 'GrantFullControl', + 'GrantRead', + 'GrantReadACP', + 'GrantWriteACL', + 'Metadata', + 'RequestPayer', + 'ServerSideEncryption', + 'StorageClass', + 'SSECustomerAlgorithm', + 'SSECustomerKey', + 'SSECustomerKeyMD5', + 'SSEKMSKeyId', 'SSEKMSEncryptionContext', 'Tagging', - ] - - def __init__(self, client, config=None, osutil=None): - self._client = client - if config is None: - config = TransferConfig() - self._config = config - if osutil is None: - osutil = OSUtils() - self._osutil = osutil - - def upload_file(self, filename, bucket, key, - callback=None, extra_args=None): - """Upload a file to an S3 object. - - Variants have also been injected into S3 client, Bucket and Object. - You don't have to use S3Transfer.upload_file() directly. - """ - if extra_args is None: - extra_args = {} - self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS) - events = self._client.meta.events - events.register_first('request-created.s3', - disable_upload_callbacks, - unique_id='s3upload-callback-disable') - events.register_last('request-created.s3', - enable_upload_callbacks, - unique_id='s3upload-callback-enable') - if self._osutil.get_file_size(filename) >= \ - self._config.multipart_threshold: - self._multipart_upload(filename, bucket, key, callback, extra_args) - else: - self._put_object(filename, bucket, key, callback, extra_args) - - def _put_object(self, filename, bucket, key, callback, extra_args): - # We're using open_file_chunk_reader so we can take advantage of the - # progress callback functionality. - open_chunk_reader = self._osutil.open_file_chunk_reader - with open_chunk_reader(filename, 0, - self._osutil.get_file_size(filename), - callback=callback) as body: - self._client.put_object(Bucket=bucket, Key=key, Body=body, - **extra_args) - - def download_file(self, bucket, key, filename, extra_args=None, - callback=None): - """Download an S3 object to a file. - - Variants have also been injected into S3 client, Bucket and Object. - You don't have to use S3Transfer.download_file() directly. - """ - # This method will issue a ``head_object`` request to determine - # the size of the S3 object. This is used to determine if the - # object is downloaded in parallel. - if extra_args is None: - extra_args = {} - self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS) - object_size = self._object_size(bucket, key, extra_args) - temp_filename = filename + os.extsep + random_file_extension() - try: - self._download_file(bucket, key, temp_filename, object_size, - extra_args, callback) - except Exception: - logger.debug("Exception caught in download_file, removing partial " - "file: %s", temp_filename, exc_info=True) - self._osutil.remove_file(temp_filename) - raise - else: - self._osutil.rename_file(temp_filename, filename) - - def _download_file(self, bucket, key, filename, object_size, - extra_args, callback): - if object_size >= self._config.multipart_threshold: - self._ranged_download(bucket, key, filename, object_size, - extra_args, callback) - else: - self._get_object(bucket, key, filename, extra_args, callback) - - def _validate_all_known_args(self, actual, allowed): - for kwarg in actual: - if kwarg not in allowed: - raise ValueError( - "Invalid extra_args key '%s', " - "must be one of: %s" % ( - kwarg, ', '.join(allowed))) - - def _ranged_download(self, bucket, key, filename, object_size, - extra_args, callback): - downloader = MultipartDownloader(self._client, self._config, - self._osutil) - downloader.download_file(bucket, key, filename, object_size, - extra_args, callback) - - def _get_object(self, bucket, key, filename, extra_args, callback): - # precondition: num_download_attempts > 0 - max_attempts = self._config.num_download_attempts - last_exception = None - for i in range(max_attempts): - try: - return self._do_get_object(bucket, key, filename, - extra_args, callback) - except (socket.timeout, socket.error, - ReadTimeoutError, IncompleteReadError) as e: - # TODO: we need a way to reset the callback if the - # download failed. - logger.debug("Retrying exception caught (%s), " - "retrying request, (attempt %s / %s)", e, i, - max_attempts, exc_info=True) - last_exception = e - continue - raise RetriesExceededError(last_exception) - - def _do_get_object(self, bucket, key, filename, extra_args, callback): - response = self._client.get_object(Bucket=bucket, Key=key, - **extra_args) - streaming_body = StreamReaderProgress( - response['Body'], callback) - with self._osutil.open(filename, 'wb') as f: - for chunk in iter(lambda: streaming_body.read(8192), b''): - f.write(chunk) - - def _object_size(self, bucket, key, extra_args): - return self._client.head_object( - Bucket=bucket, Key=key, **extra_args)['ContentLength'] - - def _multipart_upload(self, filename, bucket, key, callback, extra_args): - uploader = MultipartUploader(self._client, self._config, self._osutil) - uploader.upload_file(filename, bucket, key, callback, extra_args) + ] + + def __init__(self, client, config=None, osutil=None): + self._client = client + if config is None: + config = TransferConfig() + self._config = config + if osutil is None: + osutil = OSUtils() + self._osutil = osutil + + def upload_file(self, filename, bucket, key, + callback=None, extra_args=None): + """Upload a file to an S3 object. + + Variants have also been injected into S3 client, Bucket and Object. + You don't have to use S3Transfer.upload_file() directly. + """ + if extra_args is None: + extra_args = {} + self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS) + events = self._client.meta.events + events.register_first('request-created.s3', + disable_upload_callbacks, + unique_id='s3upload-callback-disable') + events.register_last('request-created.s3', + enable_upload_callbacks, + unique_id='s3upload-callback-enable') + if self._osutil.get_file_size(filename) >= \ + self._config.multipart_threshold: + self._multipart_upload(filename, bucket, key, callback, extra_args) + else: + self._put_object(filename, bucket, key, callback, extra_args) + + def _put_object(self, filename, bucket, key, callback, extra_args): + # We're using open_file_chunk_reader so we can take advantage of the + # progress callback functionality. + open_chunk_reader = self._osutil.open_file_chunk_reader + with open_chunk_reader(filename, 0, + self._osutil.get_file_size(filename), + callback=callback) as body: + self._client.put_object(Bucket=bucket, Key=key, Body=body, + **extra_args) + + def download_file(self, bucket, key, filename, extra_args=None, + callback=None): + """Download an S3 object to a file. + + Variants have also been injected into S3 client, Bucket and Object. + You don't have to use S3Transfer.download_file() directly. + """ + # This method will issue a ``head_object`` request to determine + # the size of the S3 object. This is used to determine if the + # object is downloaded in parallel. + if extra_args is None: + extra_args = {} + self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS) + object_size = self._object_size(bucket, key, extra_args) + temp_filename = filename + os.extsep + random_file_extension() + try: + self._download_file(bucket, key, temp_filename, object_size, + extra_args, callback) + except Exception: + logger.debug("Exception caught in download_file, removing partial " + "file: %s", temp_filename, exc_info=True) + self._osutil.remove_file(temp_filename) + raise + else: + self._osutil.rename_file(temp_filename, filename) + + def _download_file(self, bucket, key, filename, object_size, + extra_args, callback): + if object_size >= self._config.multipart_threshold: + self._ranged_download(bucket, key, filename, object_size, + extra_args, callback) + else: + self._get_object(bucket, key, filename, extra_args, callback) + + def _validate_all_known_args(self, actual, allowed): + for kwarg in actual: + if kwarg not in allowed: + raise ValueError( + "Invalid extra_args key '%s', " + "must be one of: %s" % ( + kwarg, ', '.join(allowed))) + + def _ranged_download(self, bucket, key, filename, object_size, + extra_args, callback): + downloader = MultipartDownloader(self._client, self._config, + self._osutil) + downloader.download_file(bucket, key, filename, object_size, + extra_args, callback) + + def _get_object(self, bucket, key, filename, extra_args, callback): + # precondition: num_download_attempts > 0 + max_attempts = self._config.num_download_attempts + last_exception = None + for i in range(max_attempts): + try: + return self._do_get_object(bucket, key, filename, + extra_args, callback) + except (socket.timeout, socket.error, + ReadTimeoutError, IncompleteReadError) as e: + # TODO: we need a way to reset the callback if the + # download failed. + logger.debug("Retrying exception caught (%s), " + "retrying request, (attempt %s / %s)", e, i, + max_attempts, exc_info=True) + last_exception = e + continue + raise RetriesExceededError(last_exception) + + def _do_get_object(self, bucket, key, filename, extra_args, callback): + response = self._client.get_object(Bucket=bucket, Key=key, + **extra_args) + streaming_body = StreamReaderProgress( + response['Body'], callback) + with self._osutil.open(filename, 'wb') as f: + for chunk in iter(lambda: streaming_body.read(8192), b''): + f.write(chunk) + + def _object_size(self, bucket, key, extra_args): + return self._client.head_object( + Bucket=bucket, Key=key, **extra_args)['ContentLength'] + + def _multipart_upload(self, filename, bucket, key, callback, extra_args): + uploader = MultipartUploader(self._client, self._config, self._osutil) + uploader.upload_file(filename, bucket, key, callback, extra_args) diff --git a/contrib/python/s3transfer/py2/s3transfer/bandwidth.py b/contrib/python/s3transfer/py2/s3transfer/bandwidth.py index 2f964c6b4d..07f5096369 100644 --- a/contrib/python/s3transfer/py2/s3transfer/bandwidth.py +++ b/contrib/python/s3transfer/py2/s3transfer/bandwidth.py @@ -1,416 +1,416 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import time -import threading - - -class RequestExceededException(Exception): - def __init__(self, requested_amt, retry_time): - """Error when requested amount exceeds what is allowed - - The request that raised this error should be retried after waiting - the time specified by ``retry_time``. - - :type requested_amt: int - :param requested_amt: The originally requested byte amount - - :type retry_time: float - :param retry_time: The length in time to wait to retry for the - requested amount - """ - self.requested_amt = requested_amt - self.retry_time = retry_time - msg = ( - 'Request amount %s exceeded the amount available. Retry in %s' % ( - requested_amt, retry_time) - ) - super(RequestExceededException, self).__init__(msg) - - -class RequestToken(object): - """A token to pass as an identifier when consuming from the LeakyBucket""" - pass - - -class TimeUtils(object): - def time(self): - """Get the current time back - - :rtype: float - :returns: The current time in seconds - """ - return time.time() - - def sleep(self, value): - """Sleep for a designated time - - :type value: float - :param value: The time to sleep for in seconds - """ - return time.sleep(value) - - -class BandwidthLimiter(object): - def __init__(self, leaky_bucket, time_utils=None): - """Limits bandwidth for shared S3 transfers - - :type leaky_bucket: LeakyBucket - :param leaky_bucket: The leaky bucket to use limit bandwidth - - :type time_utils: TimeUtils - :param time_utils: Time utility to use for interacting with time. - """ - self._leaky_bucket = leaky_bucket - self._time_utils = time_utils - if time_utils is None: - self._time_utils = TimeUtils() - - def get_bandwith_limited_stream(self, fileobj, transfer_coordinator, - enabled=True): - """Wraps a fileobj in a bandwidth limited stream wrapper - - :type fileobj: file-like obj - :param fileobj: The file-like obj to wrap - - :type transfer_coordinator: s3transfer.futures.TransferCoordinator - param transfer_coordinator: The coordinator for the general transfer - that the wrapped stream is a part of - - :type enabled: boolean - :param enabled: Whether bandwidth limiting should be enabled to start - """ - stream = BandwidthLimitedStream( - fileobj, self._leaky_bucket, transfer_coordinator, - self._time_utils) - if not enabled: - stream.disable_bandwidth_limiting() - return stream - - -class BandwidthLimitedStream(object): - def __init__(self, fileobj, leaky_bucket, transfer_coordinator, - time_utils=None, bytes_threshold=256 * 1024): - """Limits bandwidth for reads on a wrapped stream - - :type fileobj: file-like object - :param fileobj: The file like object to wrap - - :type leaky_bucket: LeakyBucket - :param leaky_bucket: The leaky bucket to use to throttle reads on - the stream - - :type transfer_coordinator: s3transfer.futures.TransferCoordinator - param transfer_coordinator: The coordinator for the general transfer - that the wrapped stream is a part of - - :type time_utils: TimeUtils - :param time_utils: The time utility to use for interacting with time - """ - self._fileobj = fileobj - self._leaky_bucket = leaky_bucket - self._transfer_coordinator = transfer_coordinator - self._time_utils = time_utils - if time_utils is None: - self._time_utils = TimeUtils() - self._bandwidth_limiting_enabled = True - self._request_token = RequestToken() - self._bytes_seen = 0 - self._bytes_threshold = bytes_threshold - - def enable_bandwidth_limiting(self): - """Enable bandwidth limiting on reads to the stream""" - self._bandwidth_limiting_enabled = True - - def disable_bandwidth_limiting(self): - """Disable bandwidth limiting on reads to the stream""" - self._bandwidth_limiting_enabled = False - - def read(self, amount): - """Read a specified amount - - Reads will only be throttled if bandwidth limiting is enabled. - """ - if not self._bandwidth_limiting_enabled: - return self._fileobj.read(amount) - - # We do not want to be calling consume on every read as the read - # amounts can be small causing the lock of the leaky bucket to - # introduce noticeable overhead. So instead we keep track of - # how many bytes we have seen and only call consume once we pass a - # certain threshold. - self._bytes_seen += amount - if self._bytes_seen < self._bytes_threshold: - return self._fileobj.read(amount) - - self._consume_through_leaky_bucket() - return self._fileobj.read(amount) - - def _consume_through_leaky_bucket(self): - # NOTE: If the read amonut on the stream are high, it will result - # in large bursty behavior as there is not an interface for partial - # reads. However given the read's on this abstraction are at most 256KB - # (via downloads), it reduces the burstiness to be small KB bursts at - # worst. - while not self._transfer_coordinator.exception: - try: - self._leaky_bucket.consume( - self._bytes_seen, self._request_token) - self._bytes_seen = 0 - return - except RequestExceededException as e: - self._time_utils.sleep(e.retry_time) - else: - raise self._transfer_coordinator.exception - - def signal_transferring(self): - """Signal that data being read is being transferred to S3""" - self.enable_bandwidth_limiting() - - def signal_not_transferring(self): - """Signal that data being read is not being transferred to S3""" - self.disable_bandwidth_limiting() - +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import time +import threading + + +class RequestExceededException(Exception): + def __init__(self, requested_amt, retry_time): + """Error when requested amount exceeds what is allowed + + The request that raised this error should be retried after waiting + the time specified by ``retry_time``. + + :type requested_amt: int + :param requested_amt: The originally requested byte amount + + :type retry_time: float + :param retry_time: The length in time to wait to retry for the + requested amount + """ + self.requested_amt = requested_amt + self.retry_time = retry_time + msg = ( + 'Request amount %s exceeded the amount available. Retry in %s' % ( + requested_amt, retry_time) + ) + super(RequestExceededException, self).__init__(msg) + + +class RequestToken(object): + """A token to pass as an identifier when consuming from the LeakyBucket""" + pass + + +class TimeUtils(object): + def time(self): + """Get the current time back + + :rtype: float + :returns: The current time in seconds + """ + return time.time() + + def sleep(self, value): + """Sleep for a designated time + + :type value: float + :param value: The time to sleep for in seconds + """ + return time.sleep(value) + + +class BandwidthLimiter(object): + def __init__(self, leaky_bucket, time_utils=None): + """Limits bandwidth for shared S3 transfers + + :type leaky_bucket: LeakyBucket + :param leaky_bucket: The leaky bucket to use limit bandwidth + + :type time_utils: TimeUtils + :param time_utils: Time utility to use for interacting with time. + """ + self._leaky_bucket = leaky_bucket + self._time_utils = time_utils + if time_utils is None: + self._time_utils = TimeUtils() + + def get_bandwith_limited_stream(self, fileobj, transfer_coordinator, + enabled=True): + """Wraps a fileobj in a bandwidth limited stream wrapper + + :type fileobj: file-like obj + :param fileobj: The file-like obj to wrap + + :type transfer_coordinator: s3transfer.futures.TransferCoordinator + param transfer_coordinator: The coordinator for the general transfer + that the wrapped stream is a part of + + :type enabled: boolean + :param enabled: Whether bandwidth limiting should be enabled to start + """ + stream = BandwidthLimitedStream( + fileobj, self._leaky_bucket, transfer_coordinator, + self._time_utils) + if not enabled: + stream.disable_bandwidth_limiting() + return stream + + +class BandwidthLimitedStream(object): + def __init__(self, fileobj, leaky_bucket, transfer_coordinator, + time_utils=None, bytes_threshold=256 * 1024): + """Limits bandwidth for reads on a wrapped stream + + :type fileobj: file-like object + :param fileobj: The file like object to wrap + + :type leaky_bucket: LeakyBucket + :param leaky_bucket: The leaky bucket to use to throttle reads on + the stream + + :type transfer_coordinator: s3transfer.futures.TransferCoordinator + param transfer_coordinator: The coordinator for the general transfer + that the wrapped stream is a part of + + :type time_utils: TimeUtils + :param time_utils: The time utility to use for interacting with time + """ + self._fileobj = fileobj + self._leaky_bucket = leaky_bucket + self._transfer_coordinator = transfer_coordinator + self._time_utils = time_utils + if time_utils is None: + self._time_utils = TimeUtils() + self._bandwidth_limiting_enabled = True + self._request_token = RequestToken() + self._bytes_seen = 0 + self._bytes_threshold = bytes_threshold + + def enable_bandwidth_limiting(self): + """Enable bandwidth limiting on reads to the stream""" + self._bandwidth_limiting_enabled = True + + def disable_bandwidth_limiting(self): + """Disable bandwidth limiting on reads to the stream""" + self._bandwidth_limiting_enabled = False + + def read(self, amount): + """Read a specified amount + + Reads will only be throttled if bandwidth limiting is enabled. + """ + if not self._bandwidth_limiting_enabled: + return self._fileobj.read(amount) + + # We do not want to be calling consume on every read as the read + # amounts can be small causing the lock of the leaky bucket to + # introduce noticeable overhead. So instead we keep track of + # how many bytes we have seen and only call consume once we pass a + # certain threshold. + self._bytes_seen += amount + if self._bytes_seen < self._bytes_threshold: + return self._fileobj.read(amount) + + self._consume_through_leaky_bucket() + return self._fileobj.read(amount) + + def _consume_through_leaky_bucket(self): + # NOTE: If the read amonut on the stream are high, it will result + # in large bursty behavior as there is not an interface for partial + # reads. However given the read's on this abstraction are at most 256KB + # (via downloads), it reduces the burstiness to be small KB bursts at + # worst. + while not self._transfer_coordinator.exception: + try: + self._leaky_bucket.consume( + self._bytes_seen, self._request_token) + self._bytes_seen = 0 + return + except RequestExceededException as e: + self._time_utils.sleep(e.retry_time) + else: + raise self._transfer_coordinator.exception + + def signal_transferring(self): + """Signal that data being read is being transferred to S3""" + self.enable_bandwidth_limiting() + + def signal_not_transferring(self): + """Signal that data being read is not being transferred to S3""" + self.disable_bandwidth_limiting() + def seek(self, where, whence=0): self._fileobj.seek(where, whence) - - def tell(self): - return self._fileobj.tell() - - def close(self): - if self._bandwidth_limiting_enabled and self._bytes_seen: - # This handles the case where the file is small enough to never - # trigger the threshold and thus is never subjugated to the - # leaky bucket on read(). This specifically happens for small - # uploads. So instead to account for those bytes, have - # it go through the leaky bucket when the file gets closed. - self._consume_through_leaky_bucket() - self._fileobj.close() - - def __enter__(self): - return self - - def __exit__(self, *args, **kwargs): - self.close() - - -class LeakyBucket(object): - def __init__(self, max_rate, time_utils=None, rate_tracker=None, - consumption_scheduler=None): - """A leaky bucket abstraction to limit bandwidth consumption - - :type rate: int - :type rate: The maximum rate to allow. This rate is in terms of - bytes per second. - - :type time_utils: TimeUtils - :param time_utils: The time utility to use for interacting with time - - :type rate_tracker: BandwidthRateTracker - :param rate_tracker: Tracks bandwidth consumption - - :type consumption_scheduler: ConsumptionScheduler - :param consumption_scheduler: Schedules consumption retries when - necessary - """ - self._max_rate = float(max_rate) - self._time_utils = time_utils - if time_utils is None: - self._time_utils = TimeUtils() - self._lock = threading.Lock() - self._rate_tracker = rate_tracker - if rate_tracker is None: - self._rate_tracker = BandwidthRateTracker() - self._consumption_scheduler = consumption_scheduler - if consumption_scheduler is None: - self._consumption_scheduler = ConsumptionScheduler() - - def consume(self, amt, request_token): - """Consume an a requested amount - - :type amt: int - :param amt: The amount of bytes to request to consume - - :type request_token: RequestToken - :param request_token: The token associated to the consumption - request that is used to identify the request. So if a - RequestExceededException is raised the token should be used - in subsequent retry consume() request. - - :raises RequestExceededException: If the consumption amount would - exceed the maximum allocated bandwidth - - :rtype: int - :returns: The amount consumed - """ - with self._lock: - time_now = self._time_utils.time() - if self._consumption_scheduler.is_scheduled(request_token): - return self._release_requested_amt_for_scheduled_request( - amt, request_token, time_now) - elif self._projected_to_exceed_max_rate(amt, time_now): - self._raise_request_exceeded_exception( - amt, request_token, time_now) - else: - return self._release_requested_amt(amt, time_now) - - def _projected_to_exceed_max_rate(self, amt, time_now): - projected_rate = self._rate_tracker.get_projected_rate(amt, time_now) - return projected_rate > self._max_rate - - def _release_requested_amt_for_scheduled_request(self, amt, request_token, - time_now): - self._consumption_scheduler.process_scheduled_consumption( - request_token) - return self._release_requested_amt(amt, time_now) - - def _raise_request_exceeded_exception(self, amt, request_token, time_now): - allocated_time = amt/float(self._max_rate) - retry_time = self._consumption_scheduler.schedule_consumption( - amt, request_token, allocated_time) - raise RequestExceededException( - requested_amt=amt, retry_time=retry_time) - - def _release_requested_amt(self, amt, time_now): - self._rate_tracker.record_consumption_rate(amt, time_now) - return amt - - -class ConsumptionScheduler(object): - def __init__(self): - """Schedules when to consume a desired amount""" - self._tokens_to_scheduled_consumption = {} - self._total_wait = 0 - - def is_scheduled(self, token): - """Indicates if a consumption request has been scheduled - - :type token: RequestToken - :param token: The token associated to the consumption - request that is used to identify the request. - """ - return token in self._tokens_to_scheduled_consumption - - def schedule_consumption(self, amt, token, time_to_consume): - """Schedules a wait time to be able to consume an amount - - :type amt: int - :param amt: The amount of bytes scheduled to be consumed - - :type token: RequestToken - :param token: The token associated to the consumption - request that is used to identify the request. - - :type time_to_consume: float - :param time_to_consume: The desired time it should take for that - specific request amount to be consumed in regardless of previously - scheduled consumption requests - - :rtype: float - :returns: The amount of time to wait for the specific request before - actually consuming the specified amount. - """ - self._total_wait += time_to_consume - self._tokens_to_scheduled_consumption[token] = { - 'wait_duration': self._total_wait, - 'time_to_consume': time_to_consume, - } - return self._total_wait - - def process_scheduled_consumption(self, token): - """Processes a scheduled consumption request that has completed - - :type token: RequestToken - :param token: The token associated to the consumption - request that is used to identify the request. - """ - scheduled_retry = self._tokens_to_scheduled_consumption.pop(token) - self._total_wait = max( - self._total_wait - scheduled_retry['time_to_consume'], 0) - - -class BandwidthRateTracker(object): - def __init__(self, alpha=0.8): - """Tracks the rate of bandwidth consumption - - :type a: float - :param a: The constant to use in calculating the exponentional moving - average of the bandwidth rate. Specifically it is used in the - following calculation: - - current_rate = alpha * new_rate + (1 - alpha) * current_rate - - This value of this constant should be between 0 and 1. - """ - self._alpha = alpha - self._last_time = None - self._current_rate = None - - @property - def current_rate(self): - """The current transfer rate - - :rtype: float - :returns: The current tracked transfer rate - """ - if self._last_time is None: - return 0.0 - return self._current_rate - - def get_projected_rate(self, amt, time_at_consumption): - """Get the projected rate using a provided amount and time - - :type amt: int - :param amt: The proposed amount to consume - - :type time_at_consumption: float - :param time_at_consumption: The proposed time to consume at - - :rtype: float - :returns: The consumption rate if that amt and time were consumed - """ - if self._last_time is None: - return 0.0 - return self._calculate_exponential_moving_average_rate( - amt, time_at_consumption) - - def record_consumption_rate(self, amt, time_at_consumption): - """Record the consumption rate based off amount and time point - - :type amt: int - :param amt: The amount that got consumed - - :type time_at_consumption: float - :param time_at_consumption: The time at which the amount was consumed - """ - if self._last_time is None: - self._last_time = time_at_consumption - self._current_rate = 0.0 - return - self._current_rate = self._calculate_exponential_moving_average_rate( - amt, time_at_consumption) - self._last_time = time_at_consumption - - def _calculate_rate(self, amt, time_at_consumption): - time_delta = time_at_consumption - self._last_time - if time_delta <= 0: + + def tell(self): + return self._fileobj.tell() + + def close(self): + if self._bandwidth_limiting_enabled and self._bytes_seen: + # This handles the case where the file is small enough to never + # trigger the threshold and thus is never subjugated to the + # leaky bucket on read(). This specifically happens for small + # uploads. So instead to account for those bytes, have + # it go through the leaky bucket when the file gets closed. + self._consume_through_leaky_bucket() + self._fileobj.close() + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + self.close() + + +class LeakyBucket(object): + def __init__(self, max_rate, time_utils=None, rate_tracker=None, + consumption_scheduler=None): + """A leaky bucket abstraction to limit bandwidth consumption + + :type rate: int + :type rate: The maximum rate to allow. This rate is in terms of + bytes per second. + + :type time_utils: TimeUtils + :param time_utils: The time utility to use for interacting with time + + :type rate_tracker: BandwidthRateTracker + :param rate_tracker: Tracks bandwidth consumption + + :type consumption_scheduler: ConsumptionScheduler + :param consumption_scheduler: Schedules consumption retries when + necessary + """ + self._max_rate = float(max_rate) + self._time_utils = time_utils + if time_utils is None: + self._time_utils = TimeUtils() + self._lock = threading.Lock() + self._rate_tracker = rate_tracker + if rate_tracker is None: + self._rate_tracker = BandwidthRateTracker() + self._consumption_scheduler = consumption_scheduler + if consumption_scheduler is None: + self._consumption_scheduler = ConsumptionScheduler() + + def consume(self, amt, request_token): + """Consume an a requested amount + + :type amt: int + :param amt: The amount of bytes to request to consume + + :type request_token: RequestToken + :param request_token: The token associated to the consumption + request that is used to identify the request. So if a + RequestExceededException is raised the token should be used + in subsequent retry consume() request. + + :raises RequestExceededException: If the consumption amount would + exceed the maximum allocated bandwidth + + :rtype: int + :returns: The amount consumed + """ + with self._lock: + time_now = self._time_utils.time() + if self._consumption_scheduler.is_scheduled(request_token): + return self._release_requested_amt_for_scheduled_request( + amt, request_token, time_now) + elif self._projected_to_exceed_max_rate(amt, time_now): + self._raise_request_exceeded_exception( + amt, request_token, time_now) + else: + return self._release_requested_amt(amt, time_now) + + def _projected_to_exceed_max_rate(self, amt, time_now): + projected_rate = self._rate_tracker.get_projected_rate(amt, time_now) + return projected_rate > self._max_rate + + def _release_requested_amt_for_scheduled_request(self, amt, request_token, + time_now): + self._consumption_scheduler.process_scheduled_consumption( + request_token) + return self._release_requested_amt(amt, time_now) + + def _raise_request_exceeded_exception(self, amt, request_token, time_now): + allocated_time = amt/float(self._max_rate) + retry_time = self._consumption_scheduler.schedule_consumption( + amt, request_token, allocated_time) + raise RequestExceededException( + requested_amt=amt, retry_time=retry_time) + + def _release_requested_amt(self, amt, time_now): + self._rate_tracker.record_consumption_rate(amt, time_now) + return amt + + +class ConsumptionScheduler(object): + def __init__(self): + """Schedules when to consume a desired amount""" + self._tokens_to_scheduled_consumption = {} + self._total_wait = 0 + + def is_scheduled(self, token): + """Indicates if a consumption request has been scheduled + + :type token: RequestToken + :param token: The token associated to the consumption + request that is used to identify the request. + """ + return token in self._tokens_to_scheduled_consumption + + def schedule_consumption(self, amt, token, time_to_consume): + """Schedules a wait time to be able to consume an amount + + :type amt: int + :param amt: The amount of bytes scheduled to be consumed + + :type token: RequestToken + :param token: The token associated to the consumption + request that is used to identify the request. + + :type time_to_consume: float + :param time_to_consume: The desired time it should take for that + specific request amount to be consumed in regardless of previously + scheduled consumption requests + + :rtype: float + :returns: The amount of time to wait for the specific request before + actually consuming the specified amount. + """ + self._total_wait += time_to_consume + self._tokens_to_scheduled_consumption[token] = { + 'wait_duration': self._total_wait, + 'time_to_consume': time_to_consume, + } + return self._total_wait + + def process_scheduled_consumption(self, token): + """Processes a scheduled consumption request that has completed + + :type token: RequestToken + :param token: The token associated to the consumption + request that is used to identify the request. + """ + scheduled_retry = self._tokens_to_scheduled_consumption.pop(token) + self._total_wait = max( + self._total_wait - scheduled_retry['time_to_consume'], 0) + + +class BandwidthRateTracker(object): + def __init__(self, alpha=0.8): + """Tracks the rate of bandwidth consumption + + :type a: float + :param a: The constant to use in calculating the exponentional moving + average of the bandwidth rate. Specifically it is used in the + following calculation: + + current_rate = alpha * new_rate + (1 - alpha) * current_rate + + This value of this constant should be between 0 and 1. + """ + self._alpha = alpha + self._last_time = None + self._current_rate = None + + @property + def current_rate(self): + """The current transfer rate + + :rtype: float + :returns: The current tracked transfer rate + """ + if self._last_time is None: + return 0.0 + return self._current_rate + + def get_projected_rate(self, amt, time_at_consumption): + """Get the projected rate using a provided amount and time + + :type amt: int + :param amt: The proposed amount to consume + + :type time_at_consumption: float + :param time_at_consumption: The proposed time to consume at + + :rtype: float + :returns: The consumption rate if that amt and time were consumed + """ + if self._last_time is None: + return 0.0 + return self._calculate_exponential_moving_average_rate( + amt, time_at_consumption) + + def record_consumption_rate(self, amt, time_at_consumption): + """Record the consumption rate based off amount and time point + + :type amt: int + :param amt: The amount that got consumed + + :type time_at_consumption: float + :param time_at_consumption: The time at which the amount was consumed + """ + if self._last_time is None: + self._last_time = time_at_consumption + self._current_rate = 0.0 + return + self._current_rate = self._calculate_exponential_moving_average_rate( + amt, time_at_consumption) + self._last_time = time_at_consumption + + def _calculate_rate(self, amt, time_at_consumption): + time_delta = time_at_consumption - self._last_time + if time_delta <= 0: # While it is really unlikely to see this in an actual transfer, - # we do not want to be returning back a negative rate or try to - # divide the amount by zero. So instead return back an infinite - # rate as the time delta is infinitesimally small. - return float('inf') - return amt / (time_delta) - - def _calculate_exponential_moving_average_rate(self, amt, - time_at_consumption): - new_rate = self._calculate_rate(amt, time_at_consumption) - return self._alpha * new_rate + (1 - self._alpha) * self._current_rate + # we do not want to be returning back a negative rate or try to + # divide the amount by zero. So instead return back an infinite + # rate as the time delta is infinitesimally small. + return float('inf') + return amt / (time_delta) + + def _calculate_exponential_moving_average_rate(self, amt, + time_at_consumption): + new_rate = self._calculate_rate(amt, time_at_consumption) + return self._alpha * new_rate + (1 - self._alpha) * self._current_rate diff --git a/contrib/python/s3transfer/py2/s3transfer/compat.py b/contrib/python/s3transfer/py2/s3transfer/compat.py index 76543b9812..4909f1aeaa 100644 --- a/contrib/python/s3transfer/py2/s3transfer/compat.py +++ b/contrib/python/s3transfer/py2/s3transfer/compat.py @@ -1,94 +1,94 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import inspect -import sys -import os -import errno -import socket - -from botocore.compat import six - - -if sys.platform.startswith('win'): - def rename_file(current_filename, new_filename): - try: - os.remove(new_filename) - except OSError as e: - if not e.errno == errno.ENOENT: - # We only want to a ignore trying to remove - # a file that does not exist. If it fails - # for any other reason we should be propagating - # that exception. - raise - os.rename(current_filename, new_filename) -else: - rename_file = os.rename - -if six.PY3: - def accepts_kwargs(func): - # In python3.4.1, there's backwards incompatible - # changes when using getargspec with functools.partials. - return inspect.getfullargspec(func)[2] - - # In python3, socket.error is OSError, which is too general - # for what we want (i.e FileNotFoundError is a subclass of OSError). - # In py3 all the socket related errors are in a newly created - # ConnectionError - SOCKET_ERROR = ConnectionError - MAXINT = None -else: - def accepts_kwargs(func): - return inspect.getargspec(func)[2] - - SOCKET_ERROR = socket.error - MAXINT = sys.maxint - - -def seekable(fileobj): - """Backwards compat function to determine if a fileobj is seekable - - :param fileobj: The file-like object to determine if seekable - - :returns: True, if seekable. False, otherwise. - """ - # If the fileobj has a seekable attr, try calling the seekable() - # method on it. - if hasattr(fileobj, 'seekable'): - return fileobj.seekable() - # If there is no seekable attr, check if the object can be seeked - # or telled. If it can, try to seek to the current position. - elif hasattr(fileobj, 'seek') and hasattr(fileobj, 'tell'): - try: - fileobj.seek(0, 1) - return True - except (OSError, IOError): - # If an io related error was thrown then it is not seekable. - return False - # Else, the fileobj is not seekable - return False - - -def readable(fileobj): - """Determines whether or not a file-like object is readable. - - :param fileobj: The file-like object to determine if readable - - :returns: True, if readable. False otherwise. - """ - if hasattr(fileobj, 'readable'): - return fileobj.readable() - - return hasattr(fileobj, 'read') +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import inspect +import sys +import os +import errno +import socket + +from botocore.compat import six + + +if sys.platform.startswith('win'): + def rename_file(current_filename, new_filename): + try: + os.remove(new_filename) + except OSError as e: + if not e.errno == errno.ENOENT: + # We only want to a ignore trying to remove + # a file that does not exist. If it fails + # for any other reason we should be propagating + # that exception. + raise + os.rename(current_filename, new_filename) +else: + rename_file = os.rename + +if six.PY3: + def accepts_kwargs(func): + # In python3.4.1, there's backwards incompatible + # changes when using getargspec with functools.partials. + return inspect.getfullargspec(func)[2] + + # In python3, socket.error is OSError, which is too general + # for what we want (i.e FileNotFoundError is a subclass of OSError). + # In py3 all the socket related errors are in a newly created + # ConnectionError + SOCKET_ERROR = ConnectionError + MAXINT = None +else: + def accepts_kwargs(func): + return inspect.getargspec(func)[2] + + SOCKET_ERROR = socket.error + MAXINT = sys.maxint + + +def seekable(fileobj): + """Backwards compat function to determine if a fileobj is seekable + + :param fileobj: The file-like object to determine if seekable + + :returns: True, if seekable. False, otherwise. + """ + # If the fileobj has a seekable attr, try calling the seekable() + # method on it. + if hasattr(fileobj, 'seekable'): + return fileobj.seekable() + # If there is no seekable attr, check if the object can be seeked + # or telled. If it can, try to seek to the current position. + elif hasattr(fileobj, 'seek') and hasattr(fileobj, 'tell'): + try: + fileobj.seek(0, 1) + return True + except (OSError, IOError): + # If an io related error was thrown then it is not seekable. + return False + # Else, the fileobj is not seekable + return False + + +def readable(fileobj): + """Determines whether or not a file-like object is readable. + + :param fileobj: The file-like object to determine if readable + + :returns: True, if readable. False otherwise. + """ + if hasattr(fileobj, 'readable'): + return fileobj.readable() + + return hasattr(fileobj, 'read') def fallocate(fileobj, size): diff --git a/contrib/python/s3transfer/py2/s3transfer/copies.py b/contrib/python/s3transfer/py2/s3transfer/copies.py index e9c2140407..4b4086c24e 100644 --- a/contrib/python/s3transfer/py2/s3transfer/copies.py +++ b/contrib/python/s3transfer/py2/s3transfer/copies.py @@ -1,327 +1,327 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import copy -import math - -from s3transfer.tasks import Task -from s3transfer.tasks import SubmissionTask -from s3transfer.tasks import CreateMultipartUploadTask -from s3transfer.tasks import CompleteMultipartUploadTask -from s3transfer.utils import get_callbacks -from s3transfer.utils import calculate_range_parameter -from s3transfer.utils import get_filtered_dict -from s3transfer.utils import ChunksizeAdjuster - - -class CopySubmissionTask(SubmissionTask): - """Task for submitting tasks to execute a copy""" - - EXTRA_ARGS_TO_HEAD_ARGS_MAPPING = { - 'CopySourceIfMatch': 'IfMatch', - 'CopySourceIfModifiedSince': 'IfModifiedSince', - 'CopySourceIfNoneMatch': 'IfNoneMatch', - 'CopySourceIfUnmodifiedSince': 'IfUnmodifiedSince', - 'CopySourceSSECustomerKey': 'SSECustomerKey', - 'CopySourceSSECustomerAlgorithm': 'SSECustomerAlgorithm', - 'CopySourceSSECustomerKeyMD5': 'SSECustomerKeyMD5', +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import copy +import math + +from s3transfer.tasks import Task +from s3transfer.tasks import SubmissionTask +from s3transfer.tasks import CreateMultipartUploadTask +from s3transfer.tasks import CompleteMultipartUploadTask +from s3transfer.utils import get_callbacks +from s3transfer.utils import calculate_range_parameter +from s3transfer.utils import get_filtered_dict +from s3transfer.utils import ChunksizeAdjuster + + +class CopySubmissionTask(SubmissionTask): + """Task for submitting tasks to execute a copy""" + + EXTRA_ARGS_TO_HEAD_ARGS_MAPPING = { + 'CopySourceIfMatch': 'IfMatch', + 'CopySourceIfModifiedSince': 'IfModifiedSince', + 'CopySourceIfNoneMatch': 'IfNoneMatch', + 'CopySourceIfUnmodifiedSince': 'IfUnmodifiedSince', + 'CopySourceSSECustomerKey': 'SSECustomerKey', + 'CopySourceSSECustomerAlgorithm': 'SSECustomerAlgorithm', + 'CopySourceSSECustomerKeyMD5': 'SSECustomerKeyMD5', 'RequestPayer': 'RequestPayer', 'ExpectedBucketOwner': 'ExpectedBucketOwner' - } - - UPLOAD_PART_COPY_ARGS = [ - 'CopySourceIfMatch', - 'CopySourceIfModifiedSince', - 'CopySourceIfNoneMatch', - 'CopySourceIfUnmodifiedSince', - 'CopySourceSSECustomerKey', - 'CopySourceSSECustomerAlgorithm', - 'CopySourceSSECustomerKeyMD5', - 'SSECustomerKey', - 'SSECustomerAlgorithm', - 'SSECustomerKeyMD5', - 'RequestPayer', + } + + UPLOAD_PART_COPY_ARGS = [ + 'CopySourceIfMatch', + 'CopySourceIfModifiedSince', + 'CopySourceIfNoneMatch', + 'CopySourceIfUnmodifiedSince', + 'CopySourceSSECustomerKey', + 'CopySourceSSECustomerAlgorithm', + 'CopySourceSSECustomerKeyMD5', + 'SSECustomerKey', + 'SSECustomerAlgorithm', + 'SSECustomerKeyMD5', + 'RequestPayer', 'ExpectedBucketOwner' - ] - - CREATE_MULTIPART_ARGS_BLACKLIST = [ - 'CopySourceIfMatch', - 'CopySourceIfModifiedSince', - 'CopySourceIfNoneMatch', - 'CopySourceIfUnmodifiedSince', - 'CopySourceSSECustomerKey', - 'CopySourceSSECustomerAlgorithm', - 'CopySourceSSECustomerKeyMD5', + ] + + CREATE_MULTIPART_ARGS_BLACKLIST = [ + 'CopySourceIfMatch', + 'CopySourceIfModifiedSince', + 'CopySourceIfNoneMatch', + 'CopySourceIfUnmodifiedSince', + 'CopySourceSSECustomerKey', + 'CopySourceSSECustomerAlgorithm', + 'CopySourceSSECustomerKeyMD5', 'MetadataDirective', 'TaggingDirective', - ] - - COMPLETE_MULTIPART_ARGS = [ + ] + + COMPLETE_MULTIPART_ARGS = [ 'RequestPayer', 'ExpectedBucketOwner' - ] - - def _submit(self, client, config, osutil, request_executor, - transfer_future): - """ - :param client: The client associated with the transfer manager - - :type config: s3transfer.manager.TransferConfig - :param config: The transfer config associated with the transfer - manager - - :type osutil: s3transfer.utils.OSUtil - :param osutil: The os utility associated to the transfer manager - - :type request_executor: s3transfer.futures.BoundedExecutor - :param request_executor: The request executor associated with the - transfer manager - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future associated with the - transfer request that tasks are being submitted for - """ - # Determine the size if it was not provided - if transfer_future.meta.size is None: - # If a size was not provided figure out the size for the - # user. Note that we will only use the client provided to - # the TransferManager. If the object is outside of the region - # of the client, they may have to provide the file size themselves - # with a completely new client. - call_args = transfer_future.meta.call_args - head_object_request = \ - self._get_head_object_request_from_copy_source( - call_args.copy_source) - extra_args = call_args.extra_args - - # Map any values that may be used in the head object that is - # used in the copy object - for param, value in extra_args.items(): - if param in self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING: - head_object_request[ - self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING[param]] = value - - response = call_args.source_client.head_object( - **head_object_request) - transfer_future.meta.provide_transfer_size( - response['ContentLength']) - - # If it is greater than threshold do a multipart copy, otherwise - # do a regular copy object. - if transfer_future.meta.size < config.multipart_threshold: - self._submit_copy_request( - client, config, osutil, request_executor, transfer_future) - else: - self._submit_multipart_request( - client, config, osutil, request_executor, transfer_future) - - def _submit_copy_request(self, client, config, osutil, request_executor, - transfer_future): - call_args = transfer_future.meta.call_args - - # Get the needed progress callbacks for the task - progress_callbacks = get_callbacks(transfer_future, 'progress') - - # Submit the request of a single copy. - self._transfer_coordinator.submit( - request_executor, - CopyObjectTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'copy_source': call_args.copy_source, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': call_args.extra_args, - 'callbacks': progress_callbacks, - 'size': transfer_future.meta.size - }, - is_final=True - ) - ) - - def _submit_multipart_request(self, client, config, osutil, - request_executor, transfer_future): - call_args = transfer_future.meta.call_args - - # Submit the request to create a multipart upload and make sure it - # does not include any of the arguments used for copy part. - create_multipart_extra_args = {} - for param, val in call_args.extra_args.items(): - if param not in self.CREATE_MULTIPART_ARGS_BLACKLIST: - create_multipart_extra_args[param] = val - - create_multipart_future = self._transfer_coordinator.submit( - request_executor, - CreateMultipartUploadTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': create_multipart_extra_args, - } - ) - ) - - # Determine how many parts are needed based on filesize and - # desired chunksize. - part_size = config.multipart_chunksize - adjuster = ChunksizeAdjuster() - part_size = adjuster.adjust_chunksize( - part_size, transfer_future.meta.size) - num_parts = int( - math.ceil(transfer_future.meta.size / float(part_size))) - - # Submit requests to upload the parts of the file. - part_futures = [] - progress_callbacks = get_callbacks(transfer_future, 'progress') - - for part_number in range(1, num_parts + 1): - extra_part_args = self._extra_upload_part_args( - call_args.extra_args) - # The part number for upload part starts at 1 while the - # range parameter starts at zero, so just subtract 1 off of - # the part number - extra_part_args['CopySourceRange'] = calculate_range_parameter( - part_size, part_number-1, num_parts, transfer_future.meta.size) - # Get the size of the part copy as well for the progress - # callbacks. - size = self._get_transfer_size( - part_size, part_number-1, num_parts, transfer_future.meta.size - ) - part_futures.append( - self._transfer_coordinator.submit( - request_executor, - CopyPartTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'copy_source': call_args.copy_source, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'part_number': part_number, - 'extra_args': extra_part_args, - 'callbacks': progress_callbacks, - 'size': size - }, - pending_main_kwargs={ - 'upload_id': create_multipart_future - } - ) - ) - ) - - complete_multipart_extra_args = self._extra_complete_multipart_args( - call_args.extra_args) - # Submit the request to complete the multipart upload. - self._transfer_coordinator.submit( - request_executor, - CompleteMultipartUploadTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': complete_multipart_extra_args, - }, - pending_main_kwargs={ - 'upload_id': create_multipart_future, - 'parts': part_futures - }, - is_final=True - ) - ) - - def _get_head_object_request_from_copy_source(self, copy_source): - if isinstance(copy_source, dict): - return copy.copy(copy_source) - else: - raise TypeError( - 'Expecting dictionary formatted: ' - '{"Bucket": bucket_name, "Key": key} ' - 'but got %s or type %s.' - % (copy_source, type(copy_source)) - ) - - def _extra_upload_part_args(self, extra_args): - # Only the args in COPY_PART_ARGS actually need to be passed - # onto the upload_part_copy calls. - return get_filtered_dict(extra_args, self.UPLOAD_PART_COPY_ARGS) - - def _extra_complete_multipart_args(self, extra_args): - return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS) - - def _get_transfer_size(self, part_size, part_index, num_parts, - total_transfer_size): - if part_index == num_parts - 1: - # The last part may be different in size then the rest of the - # parts. - return total_transfer_size - (part_index * part_size) - return part_size - - -class CopyObjectTask(Task): - """Task to do a nonmultipart copy""" - def _main(self, client, copy_source, bucket, key, extra_args, callbacks, - size): - """ - :param client: The client to use when calling PutObject - :param copy_source: The CopySource parameter to use - :param bucket: The name of the bucket to copy to - :param key: The name of the key to copy to - :param extra_args: A dictionary of any extra arguments that may be - used in the upload. - :param callbacks: List of callbacks to call after copy - :param size: The size of the transfer. This value is passed into - the callbacks - - """ - client.copy_object( - CopySource=copy_source, Bucket=bucket, Key=key, **extra_args) - for callback in callbacks: - callback(bytes_transferred=size) - - -class CopyPartTask(Task): - """Task to upload a part in a multipart copy""" - def _main(self, client, copy_source, bucket, key, upload_id, part_number, - extra_args, callbacks, size): - """ - :param client: The client to use when calling PutObject - :param copy_source: The CopySource parameter to use - :param bucket: The name of the bucket to upload to - :param key: The name of the key to upload to - :param upload_id: The id of the upload - :param part_number: The number representing the part of the multipart - upload - :param extra_args: A dictionary of any extra arguments that may be - used in the upload. - :param callbacks: List of callbacks to call after copy part - :param size: The size of the transfer. This value is passed into - the callbacks - - :rtype: dict - :returns: A dictionary representing a part:: - - {'Etag': etag_value, 'PartNumber': part_number} - - This value can be appended to a list to be used to complete - the multipart upload. - """ - response = client.upload_part_copy( - CopySource=copy_source, Bucket=bucket, Key=key, - UploadId=upload_id, PartNumber=part_number, **extra_args) - for callback in callbacks: - callback(bytes_transferred=size) - etag = response['CopyPartResult']['ETag'] - return {'ETag': etag, 'PartNumber': part_number} + ] + + def _submit(self, client, config, osutil, request_executor, + transfer_future): + """ + :param client: The client associated with the transfer manager + + :type config: s3transfer.manager.TransferConfig + :param config: The transfer config associated with the transfer + manager + + :type osutil: s3transfer.utils.OSUtil + :param osutil: The os utility associated to the transfer manager + + :type request_executor: s3transfer.futures.BoundedExecutor + :param request_executor: The request executor associated with the + transfer manager + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future associated with the + transfer request that tasks are being submitted for + """ + # Determine the size if it was not provided + if transfer_future.meta.size is None: + # If a size was not provided figure out the size for the + # user. Note that we will only use the client provided to + # the TransferManager. If the object is outside of the region + # of the client, they may have to provide the file size themselves + # with a completely new client. + call_args = transfer_future.meta.call_args + head_object_request = \ + self._get_head_object_request_from_copy_source( + call_args.copy_source) + extra_args = call_args.extra_args + + # Map any values that may be used in the head object that is + # used in the copy object + for param, value in extra_args.items(): + if param in self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING: + head_object_request[ + self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING[param]] = value + + response = call_args.source_client.head_object( + **head_object_request) + transfer_future.meta.provide_transfer_size( + response['ContentLength']) + + # If it is greater than threshold do a multipart copy, otherwise + # do a regular copy object. + if transfer_future.meta.size < config.multipart_threshold: + self._submit_copy_request( + client, config, osutil, request_executor, transfer_future) + else: + self._submit_multipart_request( + client, config, osutil, request_executor, transfer_future) + + def _submit_copy_request(self, client, config, osutil, request_executor, + transfer_future): + call_args = transfer_future.meta.call_args + + # Get the needed progress callbacks for the task + progress_callbacks = get_callbacks(transfer_future, 'progress') + + # Submit the request of a single copy. + self._transfer_coordinator.submit( + request_executor, + CopyObjectTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'copy_source': call_args.copy_source, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'extra_args': call_args.extra_args, + 'callbacks': progress_callbacks, + 'size': transfer_future.meta.size + }, + is_final=True + ) + ) + + def _submit_multipart_request(self, client, config, osutil, + request_executor, transfer_future): + call_args = transfer_future.meta.call_args + + # Submit the request to create a multipart upload and make sure it + # does not include any of the arguments used for copy part. + create_multipart_extra_args = {} + for param, val in call_args.extra_args.items(): + if param not in self.CREATE_MULTIPART_ARGS_BLACKLIST: + create_multipart_extra_args[param] = val + + create_multipart_future = self._transfer_coordinator.submit( + request_executor, + CreateMultipartUploadTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'extra_args': create_multipart_extra_args, + } + ) + ) + + # Determine how many parts are needed based on filesize and + # desired chunksize. + part_size = config.multipart_chunksize + adjuster = ChunksizeAdjuster() + part_size = adjuster.adjust_chunksize( + part_size, transfer_future.meta.size) + num_parts = int( + math.ceil(transfer_future.meta.size / float(part_size))) + + # Submit requests to upload the parts of the file. + part_futures = [] + progress_callbacks = get_callbacks(transfer_future, 'progress') + + for part_number in range(1, num_parts + 1): + extra_part_args = self._extra_upload_part_args( + call_args.extra_args) + # The part number for upload part starts at 1 while the + # range parameter starts at zero, so just subtract 1 off of + # the part number + extra_part_args['CopySourceRange'] = calculate_range_parameter( + part_size, part_number-1, num_parts, transfer_future.meta.size) + # Get the size of the part copy as well for the progress + # callbacks. + size = self._get_transfer_size( + part_size, part_number-1, num_parts, transfer_future.meta.size + ) + part_futures.append( + self._transfer_coordinator.submit( + request_executor, + CopyPartTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'copy_source': call_args.copy_source, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'part_number': part_number, + 'extra_args': extra_part_args, + 'callbacks': progress_callbacks, + 'size': size + }, + pending_main_kwargs={ + 'upload_id': create_multipart_future + } + ) + ) + ) + + complete_multipart_extra_args = self._extra_complete_multipart_args( + call_args.extra_args) + # Submit the request to complete the multipart upload. + self._transfer_coordinator.submit( + request_executor, + CompleteMultipartUploadTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'extra_args': complete_multipart_extra_args, + }, + pending_main_kwargs={ + 'upload_id': create_multipart_future, + 'parts': part_futures + }, + is_final=True + ) + ) + + def _get_head_object_request_from_copy_source(self, copy_source): + if isinstance(copy_source, dict): + return copy.copy(copy_source) + else: + raise TypeError( + 'Expecting dictionary formatted: ' + '{"Bucket": bucket_name, "Key": key} ' + 'but got %s or type %s.' + % (copy_source, type(copy_source)) + ) + + def _extra_upload_part_args(self, extra_args): + # Only the args in COPY_PART_ARGS actually need to be passed + # onto the upload_part_copy calls. + return get_filtered_dict(extra_args, self.UPLOAD_PART_COPY_ARGS) + + def _extra_complete_multipart_args(self, extra_args): + return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS) + + def _get_transfer_size(self, part_size, part_index, num_parts, + total_transfer_size): + if part_index == num_parts - 1: + # The last part may be different in size then the rest of the + # parts. + return total_transfer_size - (part_index * part_size) + return part_size + + +class CopyObjectTask(Task): + """Task to do a nonmultipart copy""" + def _main(self, client, copy_source, bucket, key, extra_args, callbacks, + size): + """ + :param client: The client to use when calling PutObject + :param copy_source: The CopySource parameter to use + :param bucket: The name of the bucket to copy to + :param key: The name of the key to copy to + :param extra_args: A dictionary of any extra arguments that may be + used in the upload. + :param callbacks: List of callbacks to call after copy + :param size: The size of the transfer. This value is passed into + the callbacks + + """ + client.copy_object( + CopySource=copy_source, Bucket=bucket, Key=key, **extra_args) + for callback in callbacks: + callback(bytes_transferred=size) + + +class CopyPartTask(Task): + """Task to upload a part in a multipart copy""" + def _main(self, client, copy_source, bucket, key, upload_id, part_number, + extra_args, callbacks, size): + """ + :param client: The client to use when calling PutObject + :param copy_source: The CopySource parameter to use + :param bucket: The name of the bucket to upload to + :param key: The name of the key to upload to + :param upload_id: The id of the upload + :param part_number: The number representing the part of the multipart + upload + :param extra_args: A dictionary of any extra arguments that may be + used in the upload. + :param callbacks: List of callbacks to call after copy part + :param size: The size of the transfer. This value is passed into + the callbacks + + :rtype: dict + :returns: A dictionary representing a part:: + + {'Etag': etag_value, 'PartNumber': part_number} + + This value can be appended to a list to be used to complete + the multipart upload. + """ + response = client.upload_part_copy( + CopySource=copy_source, Bucket=bucket, Key=key, + UploadId=upload_id, PartNumber=part_number, **extra_args) + for callback in callbacks: + callback(bytes_transferred=size) + etag = response['CopyPartResult']['ETag'] + return {'ETag': etag, 'PartNumber': part_number} diff --git a/contrib/python/s3transfer/py2/s3transfer/delete.py b/contrib/python/s3transfer/py2/s3transfer/delete.py index 35b18a1139..ad9210b373 100644 --- a/contrib/python/s3transfer/py2/s3transfer/delete.py +++ b/contrib/python/s3transfer/py2/s3transfer/delete.py @@ -1,72 +1,72 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -from s3transfer.tasks import Task -from s3transfer.tasks import SubmissionTask - - -class DeleteSubmissionTask(SubmissionTask): - """Task for submitting tasks to execute an object deletion.""" - - def _submit(self, client, request_executor, transfer_future, **kwargs): - """ - :param client: The client associated with the transfer manager - - :type config: s3transfer.manager.TransferConfig - :param config: The transfer config associated with the transfer - manager - - :type osutil: s3transfer.utils.OSUtil - :param osutil: The os utility associated to the transfer manager - - :type request_executor: s3transfer.futures.BoundedExecutor - :param request_executor: The request executor associated with the - transfer manager - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future associated with the - transfer request that tasks are being submitted for - """ - call_args = transfer_future.meta.call_args - - self._transfer_coordinator.submit( - request_executor, - DeleteObjectTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': call_args.extra_args, - }, - is_final=True - ) - ) - - -class DeleteObjectTask(Task): - def _main(self, client, bucket, key, extra_args): - """ - - :param client: The S3 client to use when calling DeleteObject - - :type bucket: str - :param bucket: The name of the bucket. - - :type key: str - :param key: The name of the object to delete. - - :type extra_args: dict - :param extra_args: Extra arguments to pass to the DeleteObject call. - - """ - client.delete_object(Bucket=bucket, Key=key, **extra_args) +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from s3transfer.tasks import Task +from s3transfer.tasks import SubmissionTask + + +class DeleteSubmissionTask(SubmissionTask): + """Task for submitting tasks to execute an object deletion.""" + + def _submit(self, client, request_executor, transfer_future, **kwargs): + """ + :param client: The client associated with the transfer manager + + :type config: s3transfer.manager.TransferConfig + :param config: The transfer config associated with the transfer + manager + + :type osutil: s3transfer.utils.OSUtil + :param osutil: The os utility associated to the transfer manager + + :type request_executor: s3transfer.futures.BoundedExecutor + :param request_executor: The request executor associated with the + transfer manager + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future associated with the + transfer request that tasks are being submitted for + """ + call_args = transfer_future.meta.call_args + + self._transfer_coordinator.submit( + request_executor, + DeleteObjectTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'extra_args': call_args.extra_args, + }, + is_final=True + ) + ) + + +class DeleteObjectTask(Task): + def _main(self, client, bucket, key, extra_args): + """ + + :param client: The S3 client to use when calling DeleteObject + + :type bucket: str + :param bucket: The name of the bucket. + + :type key: str + :param key: The name of the object to delete. + + :type extra_args: dict + :param extra_args: Extra arguments to pass to the DeleteObject call. + + """ + client.delete_object(Bucket=bucket, Key=key, **extra_args) diff --git a/contrib/python/s3transfer/py2/s3transfer/download.py b/contrib/python/s3transfer/py2/s3transfer/download.py index ffac23d683..0da00c4541 100644 --- a/contrib/python/s3transfer/py2/s3transfer/download.py +++ b/contrib/python/s3transfer/py2/s3transfer/download.py @@ -1,710 +1,710 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import logging -import threading +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import logging +import threading import heapq - - + + from botocore.compat import six -from s3transfer.compat import seekable -from s3transfer.exceptions import RetriesExceededError -from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG +from s3transfer.compat import seekable +from s3transfer.exceptions import RetriesExceededError +from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG from s3transfer.utils import S3_RETRYABLE_DOWNLOAD_ERRORS -from s3transfer.utils import get_callbacks -from s3transfer.utils import invoke_progress_callbacks +from s3transfer.utils import get_callbacks +from s3transfer.utils import invoke_progress_callbacks from s3transfer.utils import calculate_num_parts -from s3transfer.utils import calculate_range_parameter -from s3transfer.utils import FunctionContainer -from s3transfer.utils import CountCallbackInvoker -from s3transfer.utils import StreamReaderProgress -from s3transfer.utils import DeferredOpenFile -from s3transfer.tasks import Task -from s3transfer.tasks import SubmissionTask - - -logger = logging.getLogger(__name__) - - -class DownloadOutputManager(object): - """Base manager class for handling various types of files for downloads - - This class is typically used for the DownloadSubmissionTask class to help - determine the following: - - * Provides the fileobj to write to downloads to - * Get a task to complete once everything downloaded has been written - - The answers/implementations differ for the various types of file outputs - that may be accepted. All implementations must subclass and override - public methods from this class. - """ - def __init__(self, osutil, transfer_coordinator, io_executor): - self._osutil = osutil - self._transfer_coordinator = transfer_coordinator - self._io_executor = io_executor - - @classmethod - def is_compatible(cls, download_target, osutil): - """Determines if the target for the download is compatible with manager - - :param download_target: The target for which the upload will write - data to. - - :param osutil: The os utility to be used for the transfer - - :returns: True if the manager can handle the type of target specified - otherwise returns False. - """ - raise NotImplementedError('must implement is_compatible()') - - def get_download_task_tag(self): - """Get the tag (if any) to associate all GetObjectTasks - - :rtype: s3transfer.futures.TaskTag - :returns: The tag to associate all GetObjectTasks with - """ - return None - - def get_fileobj_for_io_writes(self, transfer_future): - """Get file-like object to use for io writes in the io executor - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The future associated with upload request - - returns: A file-like object to write to - """ - raise NotImplementedError('must implement get_fileobj_for_io_writes()') - - def queue_file_io_task(self, fileobj, data, offset): - """Queue IO write for submission to the IO executor. - - This method accepts an IO executor and information about the - downloaded data, and handles submitting this to the IO executor. - - This method may defer submission to the IO executor if necessary. - - """ - self._transfer_coordinator.submit( - self._io_executor, - self.get_io_write_task(fileobj, data, offset) +from s3transfer.utils import calculate_range_parameter +from s3transfer.utils import FunctionContainer +from s3transfer.utils import CountCallbackInvoker +from s3transfer.utils import StreamReaderProgress +from s3transfer.utils import DeferredOpenFile +from s3transfer.tasks import Task +from s3transfer.tasks import SubmissionTask + + +logger = logging.getLogger(__name__) + + +class DownloadOutputManager(object): + """Base manager class for handling various types of files for downloads + + This class is typically used for the DownloadSubmissionTask class to help + determine the following: + + * Provides the fileobj to write to downloads to + * Get a task to complete once everything downloaded has been written + + The answers/implementations differ for the various types of file outputs + that may be accepted. All implementations must subclass and override + public methods from this class. + """ + def __init__(self, osutil, transfer_coordinator, io_executor): + self._osutil = osutil + self._transfer_coordinator = transfer_coordinator + self._io_executor = io_executor + + @classmethod + def is_compatible(cls, download_target, osutil): + """Determines if the target for the download is compatible with manager + + :param download_target: The target for which the upload will write + data to. + + :param osutil: The os utility to be used for the transfer + + :returns: True if the manager can handle the type of target specified + otherwise returns False. + """ + raise NotImplementedError('must implement is_compatible()') + + def get_download_task_tag(self): + """Get the tag (if any) to associate all GetObjectTasks + + :rtype: s3transfer.futures.TaskTag + :returns: The tag to associate all GetObjectTasks with + """ + return None + + def get_fileobj_for_io_writes(self, transfer_future): + """Get file-like object to use for io writes in the io executor + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The future associated with upload request + + returns: A file-like object to write to + """ + raise NotImplementedError('must implement get_fileobj_for_io_writes()') + + def queue_file_io_task(self, fileobj, data, offset): + """Queue IO write for submission to the IO executor. + + This method accepts an IO executor and information about the + downloaded data, and handles submitting this to the IO executor. + + This method may defer submission to the IO executor if necessary. + + """ + self._transfer_coordinator.submit( + self._io_executor, + self.get_io_write_task(fileobj, data, offset) ) - - def get_io_write_task(self, fileobj, data, offset): - """Get an IO write task for the requested set of data - - This task can be ran immediately or be submitted to the IO executor - for it to run. - - :type fileobj: file-like object - :param fileobj: The file-like object to write to - - :type data: bytes - :param data: The data to write out - - :type offset: integer - :param offset: The offset to write the data to in the file-like object - - :returns: An IO task to be used to write data to a file-like object - """ - return IOWriteTask( - self._transfer_coordinator, - main_kwargs={ - 'fileobj': fileobj, - 'data': data, - 'offset': offset, - } - ) - - def get_final_io_task(self): - """Get the final io task to complete the download - - This is needed because based on the architecture of the TransferManager - the final tasks will be sent to the IO executor, but the executor - needs a final task for it to signal that the transfer is done and - all done callbacks can be run. - - :rtype: s3transfer.tasks.Task - :returns: A final task to completed in the io executor - """ - raise NotImplementedError( - 'must implement get_final_io_task()') - - def _get_fileobj_from_filename(self, filename): - f = DeferredOpenFile( - filename, mode='wb', open_function=self._osutil.open) - # Make sure the file gets closed and we remove the temporary file - # if anything goes wrong during the process. - self._transfer_coordinator.add_failure_cleanup(f.close) - return f - - -class DownloadFilenameOutputManager(DownloadOutputManager): - def __init__(self, osutil, transfer_coordinator, io_executor): - super(DownloadFilenameOutputManager, self).__init__( - osutil, transfer_coordinator, io_executor) - self._final_filename = None - self._temp_filename = None - self._temp_fileobj = None - - @classmethod - def is_compatible(cls, download_target, osutil): - return isinstance(download_target, six.string_types) - - def get_fileobj_for_io_writes(self, transfer_future): - fileobj = transfer_future.meta.call_args.fileobj - self._final_filename = fileobj + + def get_io_write_task(self, fileobj, data, offset): + """Get an IO write task for the requested set of data + + This task can be ran immediately or be submitted to the IO executor + for it to run. + + :type fileobj: file-like object + :param fileobj: The file-like object to write to + + :type data: bytes + :param data: The data to write out + + :type offset: integer + :param offset: The offset to write the data to in the file-like object + + :returns: An IO task to be used to write data to a file-like object + """ + return IOWriteTask( + self._transfer_coordinator, + main_kwargs={ + 'fileobj': fileobj, + 'data': data, + 'offset': offset, + } + ) + + def get_final_io_task(self): + """Get the final io task to complete the download + + This is needed because based on the architecture of the TransferManager + the final tasks will be sent to the IO executor, but the executor + needs a final task for it to signal that the transfer is done and + all done callbacks can be run. + + :rtype: s3transfer.tasks.Task + :returns: A final task to completed in the io executor + """ + raise NotImplementedError( + 'must implement get_final_io_task()') + + def _get_fileobj_from_filename(self, filename): + f = DeferredOpenFile( + filename, mode='wb', open_function=self._osutil.open) + # Make sure the file gets closed and we remove the temporary file + # if anything goes wrong during the process. + self._transfer_coordinator.add_failure_cleanup(f.close) + return f + + +class DownloadFilenameOutputManager(DownloadOutputManager): + def __init__(self, osutil, transfer_coordinator, io_executor): + super(DownloadFilenameOutputManager, self).__init__( + osutil, transfer_coordinator, io_executor) + self._final_filename = None + self._temp_filename = None + self._temp_fileobj = None + + @classmethod + def is_compatible(cls, download_target, osutil): + return isinstance(download_target, six.string_types) + + def get_fileobj_for_io_writes(self, transfer_future): + fileobj = transfer_future.meta.call_args.fileobj + self._final_filename = fileobj self._temp_filename = self._osutil.get_temp_filename(fileobj) - self._temp_fileobj = self._get_temp_fileobj() - return self._temp_fileobj - - def get_final_io_task(self): - # A task to rename the file from the temporary file to its final - # location is needed. This should be the last task needed to complete - # the download. - return IORenameFileTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'fileobj': self._temp_fileobj, - 'final_filename': self._final_filename, - 'osutil': self._osutil - }, - is_final=True - ) - - def _get_temp_fileobj(self): - f = self._get_fileobj_from_filename(self._temp_filename) - self._transfer_coordinator.add_failure_cleanup( - self._osutil.remove_file, self._temp_filename) - return f - - -class DownloadSeekableOutputManager(DownloadOutputManager): - @classmethod - def is_compatible(cls, download_target, osutil): - return seekable(download_target) - - def get_fileobj_for_io_writes(self, transfer_future): - # Return the fileobj provided to the future. - return transfer_future.meta.call_args.fileobj - - def get_final_io_task(self): - # This task will serve the purpose of signaling when all of the io - # writes have finished so done callbacks can be called. - return CompleteDownloadNOOPTask( - transfer_coordinator=self._transfer_coordinator) - - -class DownloadNonSeekableOutputManager(DownloadOutputManager): - def __init__(self, osutil, transfer_coordinator, io_executor, - defer_queue=None): - super(DownloadNonSeekableOutputManager, self).__init__( - osutil, transfer_coordinator, io_executor) - if defer_queue is None: - defer_queue = DeferQueue() - self._defer_queue = defer_queue - self._io_submit_lock = threading.Lock() - - @classmethod - def is_compatible(cls, download_target, osutil): - return hasattr(download_target, 'write') - - def get_download_task_tag(self): - return IN_MEMORY_DOWNLOAD_TAG - - def get_fileobj_for_io_writes(self, transfer_future): - return transfer_future.meta.call_args.fileobj - - def get_final_io_task(self): - return CompleteDownloadNOOPTask( - transfer_coordinator=self._transfer_coordinator) - - def queue_file_io_task(self, fileobj, data, offset): - with self._io_submit_lock: - writes = self._defer_queue.request_writes(offset, data) - for write in writes: - data = write['data'] - logger.debug("Queueing IO offset %s for fileobj: %s", - write['offset'], fileobj) - super( - DownloadNonSeekableOutputManager, self).queue_file_io_task( - fileobj, data, offset) - - def get_io_write_task(self, fileobj, data, offset): - return IOStreamingWriteTask( - self._transfer_coordinator, - main_kwargs={ - 'fileobj': fileobj, - 'data': data, - } - ) - - -class DownloadSpecialFilenameOutputManager(DownloadNonSeekableOutputManager): - def __init__(self, osutil, transfer_coordinator, io_executor, - defer_queue=None): - super(DownloadSpecialFilenameOutputManager, self).__init__( - osutil, transfer_coordinator, io_executor, defer_queue) - self._fileobj = None - - @classmethod - def is_compatible(cls, download_target, osutil): - return isinstance(download_target, six.string_types) and \ - osutil.is_special_file(download_target) - - def get_fileobj_for_io_writes(self, transfer_future): - filename = transfer_future.meta.call_args.fileobj - self._fileobj = self._get_fileobj_from_filename(filename) - return self._fileobj - - def get_final_io_task(self): - # Make sure the file gets closed once the transfer is done. - return IOCloseTask( - transfer_coordinator=self._transfer_coordinator, - is_final=True, - main_kwargs={'fileobj': self._fileobj}) - - -class DownloadSubmissionTask(SubmissionTask): - """Task for submitting tasks to execute a download""" - - def _get_download_output_manager_cls(self, transfer_future, osutil): - """Retrieves a class for managing output for a download - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future for the request - - :type osutil: s3transfer.utils.OSUtils - :param osutil: The os utility associated to the transfer - - :rtype: class of DownloadOutputManager - :returns: The appropriate class to use for managing a specific type of - input for downloads. - """ - download_manager_resolver_chain = [ - DownloadSpecialFilenameOutputManager, - DownloadFilenameOutputManager, - DownloadSeekableOutputManager, - DownloadNonSeekableOutputManager, - ] - - fileobj = transfer_future.meta.call_args.fileobj - for download_manager_cls in download_manager_resolver_chain: - if download_manager_cls.is_compatible(fileobj, osutil): - return download_manager_cls - raise RuntimeError( - 'Output %s of type: %s is not supported.' % ( - fileobj, type(fileobj))) - - def _submit(self, client, config, osutil, request_executor, io_executor, - transfer_future, bandwidth_limiter=None): - """ - :param client: The client associated with the transfer manager - - :type config: s3transfer.manager.TransferConfig - :param config: The transfer config associated with the transfer - manager - - :type osutil: s3transfer.utils.OSUtil - :param osutil: The os utility associated to the transfer manager - - :type request_executor: s3transfer.futures.BoundedExecutor - :param request_executor: The request executor associated with the - transfer manager - - :type io_executor: s3transfer.futures.BoundedExecutor - :param io_executor: The io executor associated with the - transfer manager - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future associated with the - transfer request that tasks are being submitted for - - :type bandwidth_limiter: s3transfer.bandwidth.BandwidthLimiter - :param bandwidth_limiter: The bandwidth limiter to use when - downloading streams - """ - if transfer_future.meta.size is None: - # If a size was not provided figure out the size for the - # user. - response = client.head_object( - Bucket=transfer_future.meta.call_args.bucket, - Key=transfer_future.meta.call_args.key, - **transfer_future.meta.call_args.extra_args - ) - transfer_future.meta.provide_transfer_size( - response['ContentLength']) - - download_output_manager = self._get_download_output_manager_cls( - transfer_future, osutil)(osutil, self._transfer_coordinator, - io_executor) - - # If it is greater than threshold do a ranged download, otherwise - # do a regular GetObject download. - if transfer_future.meta.size < config.multipart_threshold: - self._submit_download_request( - client, config, osutil, request_executor, io_executor, - download_output_manager, transfer_future, bandwidth_limiter) - else: - self._submit_ranged_download_request( - client, config, osutil, request_executor, io_executor, - download_output_manager, transfer_future, bandwidth_limiter) - - def _submit_download_request(self, client, config, osutil, - request_executor, io_executor, - download_output_manager, transfer_future, - bandwidth_limiter): - call_args = transfer_future.meta.call_args - - # Get a handle to the file that will be used for writing downloaded - # contents - fileobj = download_output_manager.get_fileobj_for_io_writes( - transfer_future) - - # Get the needed callbacks for the task - progress_callbacks = get_callbacks(transfer_future, 'progress') - - # Get any associated tags for the get object task. - get_object_tag = download_output_manager.get_download_task_tag() - - # Get the final io task to run once the download is complete. - final_task = download_output_manager.get_final_io_task() - - # Submit the task to download the object. - self._transfer_coordinator.submit( - request_executor, - ImmediatelyWriteIOGetObjectTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'fileobj': fileobj, - 'extra_args': call_args.extra_args, - 'callbacks': progress_callbacks, - 'max_attempts': config.num_download_attempts, - 'download_output_manager': download_output_manager, - 'io_chunksize': config.io_chunksize, - 'bandwidth_limiter': bandwidth_limiter - }, - done_callbacks=[final_task] - ), - tag=get_object_tag - ) - - def _submit_ranged_download_request(self, client, config, osutil, - request_executor, io_executor, - download_output_manager, - transfer_future, - bandwidth_limiter): - call_args = transfer_future.meta.call_args - - # Get the needed progress callbacks for the task - progress_callbacks = get_callbacks(transfer_future, 'progress') - - # Get a handle to the file that will be used for writing downloaded - # contents - fileobj = download_output_manager.get_fileobj_for_io_writes( - transfer_future) - - # Determine the number of parts - part_size = config.multipart_chunksize + self._temp_fileobj = self._get_temp_fileobj() + return self._temp_fileobj + + def get_final_io_task(self): + # A task to rename the file from the temporary file to its final + # location is needed. This should be the last task needed to complete + # the download. + return IORenameFileTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'fileobj': self._temp_fileobj, + 'final_filename': self._final_filename, + 'osutil': self._osutil + }, + is_final=True + ) + + def _get_temp_fileobj(self): + f = self._get_fileobj_from_filename(self._temp_filename) + self._transfer_coordinator.add_failure_cleanup( + self._osutil.remove_file, self._temp_filename) + return f + + +class DownloadSeekableOutputManager(DownloadOutputManager): + @classmethod + def is_compatible(cls, download_target, osutil): + return seekable(download_target) + + def get_fileobj_for_io_writes(self, transfer_future): + # Return the fileobj provided to the future. + return transfer_future.meta.call_args.fileobj + + def get_final_io_task(self): + # This task will serve the purpose of signaling when all of the io + # writes have finished so done callbacks can be called. + return CompleteDownloadNOOPTask( + transfer_coordinator=self._transfer_coordinator) + + +class DownloadNonSeekableOutputManager(DownloadOutputManager): + def __init__(self, osutil, transfer_coordinator, io_executor, + defer_queue=None): + super(DownloadNonSeekableOutputManager, self).__init__( + osutil, transfer_coordinator, io_executor) + if defer_queue is None: + defer_queue = DeferQueue() + self._defer_queue = defer_queue + self._io_submit_lock = threading.Lock() + + @classmethod + def is_compatible(cls, download_target, osutil): + return hasattr(download_target, 'write') + + def get_download_task_tag(self): + return IN_MEMORY_DOWNLOAD_TAG + + def get_fileobj_for_io_writes(self, transfer_future): + return transfer_future.meta.call_args.fileobj + + def get_final_io_task(self): + return CompleteDownloadNOOPTask( + transfer_coordinator=self._transfer_coordinator) + + def queue_file_io_task(self, fileobj, data, offset): + with self._io_submit_lock: + writes = self._defer_queue.request_writes(offset, data) + for write in writes: + data = write['data'] + logger.debug("Queueing IO offset %s for fileobj: %s", + write['offset'], fileobj) + super( + DownloadNonSeekableOutputManager, self).queue_file_io_task( + fileobj, data, offset) + + def get_io_write_task(self, fileobj, data, offset): + return IOStreamingWriteTask( + self._transfer_coordinator, + main_kwargs={ + 'fileobj': fileobj, + 'data': data, + } + ) + + +class DownloadSpecialFilenameOutputManager(DownloadNonSeekableOutputManager): + def __init__(self, osutil, transfer_coordinator, io_executor, + defer_queue=None): + super(DownloadSpecialFilenameOutputManager, self).__init__( + osutil, transfer_coordinator, io_executor, defer_queue) + self._fileobj = None + + @classmethod + def is_compatible(cls, download_target, osutil): + return isinstance(download_target, six.string_types) and \ + osutil.is_special_file(download_target) + + def get_fileobj_for_io_writes(self, transfer_future): + filename = transfer_future.meta.call_args.fileobj + self._fileobj = self._get_fileobj_from_filename(filename) + return self._fileobj + + def get_final_io_task(self): + # Make sure the file gets closed once the transfer is done. + return IOCloseTask( + transfer_coordinator=self._transfer_coordinator, + is_final=True, + main_kwargs={'fileobj': self._fileobj}) + + +class DownloadSubmissionTask(SubmissionTask): + """Task for submitting tasks to execute a download""" + + def _get_download_output_manager_cls(self, transfer_future, osutil): + """Retrieves a class for managing output for a download + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future for the request + + :type osutil: s3transfer.utils.OSUtils + :param osutil: The os utility associated to the transfer + + :rtype: class of DownloadOutputManager + :returns: The appropriate class to use for managing a specific type of + input for downloads. + """ + download_manager_resolver_chain = [ + DownloadSpecialFilenameOutputManager, + DownloadFilenameOutputManager, + DownloadSeekableOutputManager, + DownloadNonSeekableOutputManager, + ] + + fileobj = transfer_future.meta.call_args.fileobj + for download_manager_cls in download_manager_resolver_chain: + if download_manager_cls.is_compatible(fileobj, osutil): + return download_manager_cls + raise RuntimeError( + 'Output %s of type: %s is not supported.' % ( + fileobj, type(fileobj))) + + def _submit(self, client, config, osutil, request_executor, io_executor, + transfer_future, bandwidth_limiter=None): + """ + :param client: The client associated with the transfer manager + + :type config: s3transfer.manager.TransferConfig + :param config: The transfer config associated with the transfer + manager + + :type osutil: s3transfer.utils.OSUtil + :param osutil: The os utility associated to the transfer manager + + :type request_executor: s3transfer.futures.BoundedExecutor + :param request_executor: The request executor associated with the + transfer manager + + :type io_executor: s3transfer.futures.BoundedExecutor + :param io_executor: The io executor associated with the + transfer manager + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future associated with the + transfer request that tasks are being submitted for + + :type bandwidth_limiter: s3transfer.bandwidth.BandwidthLimiter + :param bandwidth_limiter: The bandwidth limiter to use when + downloading streams + """ + if transfer_future.meta.size is None: + # If a size was not provided figure out the size for the + # user. + response = client.head_object( + Bucket=transfer_future.meta.call_args.bucket, + Key=transfer_future.meta.call_args.key, + **transfer_future.meta.call_args.extra_args + ) + transfer_future.meta.provide_transfer_size( + response['ContentLength']) + + download_output_manager = self._get_download_output_manager_cls( + transfer_future, osutil)(osutil, self._transfer_coordinator, + io_executor) + + # If it is greater than threshold do a ranged download, otherwise + # do a regular GetObject download. + if transfer_future.meta.size < config.multipart_threshold: + self._submit_download_request( + client, config, osutil, request_executor, io_executor, + download_output_manager, transfer_future, bandwidth_limiter) + else: + self._submit_ranged_download_request( + client, config, osutil, request_executor, io_executor, + download_output_manager, transfer_future, bandwidth_limiter) + + def _submit_download_request(self, client, config, osutil, + request_executor, io_executor, + download_output_manager, transfer_future, + bandwidth_limiter): + call_args = transfer_future.meta.call_args + + # Get a handle to the file that will be used for writing downloaded + # contents + fileobj = download_output_manager.get_fileobj_for_io_writes( + transfer_future) + + # Get the needed callbacks for the task + progress_callbacks = get_callbacks(transfer_future, 'progress') + + # Get any associated tags for the get object task. + get_object_tag = download_output_manager.get_download_task_tag() + + # Get the final io task to run once the download is complete. + final_task = download_output_manager.get_final_io_task() + + # Submit the task to download the object. + self._transfer_coordinator.submit( + request_executor, + ImmediatelyWriteIOGetObjectTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'fileobj': fileobj, + 'extra_args': call_args.extra_args, + 'callbacks': progress_callbacks, + 'max_attempts': config.num_download_attempts, + 'download_output_manager': download_output_manager, + 'io_chunksize': config.io_chunksize, + 'bandwidth_limiter': bandwidth_limiter + }, + done_callbacks=[final_task] + ), + tag=get_object_tag + ) + + def _submit_ranged_download_request(self, client, config, osutil, + request_executor, io_executor, + download_output_manager, + transfer_future, + bandwidth_limiter): + call_args = transfer_future.meta.call_args + + # Get the needed progress callbacks for the task + progress_callbacks = get_callbacks(transfer_future, 'progress') + + # Get a handle to the file that will be used for writing downloaded + # contents + fileobj = download_output_manager.get_fileobj_for_io_writes( + transfer_future) + + # Determine the number of parts + part_size = config.multipart_chunksize num_parts = calculate_num_parts(transfer_future.meta.size, part_size) - - # Get any associated tags for the get object task. - get_object_tag = download_output_manager.get_download_task_tag() - - # Callback invoker to submit the final io task once all downloads - # are complete. - finalize_download_invoker = CountCallbackInvoker( - self._get_final_io_task_submission_callback( - download_output_manager, io_executor - ) - ) - for i in range(num_parts): - # Calculate the range parameter - range_parameter = calculate_range_parameter( - part_size, i, num_parts) - - # Inject the Range parameter to the parameters to be passed in - # as extra args - extra_args = {'Range': range_parameter} - extra_args.update(call_args.extra_args) - finalize_download_invoker.increment() - # Submit the ranged downloads - self._transfer_coordinator.submit( - request_executor, - GetObjectTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'fileobj': fileobj, - 'extra_args': extra_args, - 'callbacks': progress_callbacks, - 'max_attempts': config.num_download_attempts, - 'start_index': i * part_size, - 'download_output_manager': download_output_manager, - 'io_chunksize': config.io_chunksize, - 'bandwidth_limiter': bandwidth_limiter - }, - done_callbacks=[finalize_download_invoker.decrement] - ), - tag=get_object_tag - ) - finalize_download_invoker.finalize() - - def _get_final_io_task_submission_callback(self, download_manager, - io_executor): - final_task = download_manager.get_final_io_task() - return FunctionContainer( - self._transfer_coordinator.submit, io_executor, final_task) - - def _calculate_range_param(self, part_size, part_index, num_parts): - # Used to calculate the Range parameter - start_range = part_index * part_size - if part_index == num_parts - 1: - end_range = '' - else: - end_range = start_range + part_size - 1 - range_param = 'bytes=%s-%s' % (start_range, end_range) - return range_param - - -class GetObjectTask(Task): - def _main(self, client, bucket, key, fileobj, extra_args, callbacks, - max_attempts, download_output_manager, io_chunksize, - start_index=0, bandwidth_limiter=None): - """Downloads an object and places content into io queue - - :param client: The client to use when calling GetObject - :param bucket: The bucket to download from - :param key: The key to download from - :param fileobj: The file handle to write content to - :param exta_args: Any extra arguements to include in GetObject request - :param callbacks: List of progress callbacks to invoke on download - :param max_attempts: The number of retries to do when downloading - :param download_output_manager: The download output manager associated - with the current download. - :param io_chunksize: The size of each io chunk to read from the - download stream and queue in the io queue. - :param start_index: The location in the file to start writing the - content of the key to. - :param bandwidth_limiter: The bandwidth limiter to use when throttling - the downloading of data in streams. - """ - last_exception = None - for i in range(max_attempts): - try: + + # Get any associated tags for the get object task. + get_object_tag = download_output_manager.get_download_task_tag() + + # Callback invoker to submit the final io task once all downloads + # are complete. + finalize_download_invoker = CountCallbackInvoker( + self._get_final_io_task_submission_callback( + download_output_manager, io_executor + ) + ) + for i in range(num_parts): + # Calculate the range parameter + range_parameter = calculate_range_parameter( + part_size, i, num_parts) + + # Inject the Range parameter to the parameters to be passed in + # as extra args + extra_args = {'Range': range_parameter} + extra_args.update(call_args.extra_args) + finalize_download_invoker.increment() + # Submit the ranged downloads + self._transfer_coordinator.submit( + request_executor, + GetObjectTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'fileobj': fileobj, + 'extra_args': extra_args, + 'callbacks': progress_callbacks, + 'max_attempts': config.num_download_attempts, + 'start_index': i * part_size, + 'download_output_manager': download_output_manager, + 'io_chunksize': config.io_chunksize, + 'bandwidth_limiter': bandwidth_limiter + }, + done_callbacks=[finalize_download_invoker.decrement] + ), + tag=get_object_tag + ) + finalize_download_invoker.finalize() + + def _get_final_io_task_submission_callback(self, download_manager, + io_executor): + final_task = download_manager.get_final_io_task() + return FunctionContainer( + self._transfer_coordinator.submit, io_executor, final_task) + + def _calculate_range_param(self, part_size, part_index, num_parts): + # Used to calculate the Range parameter + start_range = part_index * part_size + if part_index == num_parts - 1: + end_range = '' + else: + end_range = start_range + part_size - 1 + range_param = 'bytes=%s-%s' % (start_range, end_range) + return range_param + + +class GetObjectTask(Task): + def _main(self, client, bucket, key, fileobj, extra_args, callbacks, + max_attempts, download_output_manager, io_chunksize, + start_index=0, bandwidth_limiter=None): + """Downloads an object and places content into io queue + + :param client: The client to use when calling GetObject + :param bucket: The bucket to download from + :param key: The key to download from + :param fileobj: The file handle to write content to + :param exta_args: Any extra arguements to include in GetObject request + :param callbacks: List of progress callbacks to invoke on download + :param max_attempts: The number of retries to do when downloading + :param download_output_manager: The download output manager associated + with the current download. + :param io_chunksize: The size of each io chunk to read from the + download stream and queue in the io queue. + :param start_index: The location in the file to start writing the + content of the key to. + :param bandwidth_limiter: The bandwidth limiter to use when throttling + the downloading of data in streams. + """ + last_exception = None + for i in range(max_attempts): + try: current_index = start_index - response = client.get_object( - Bucket=bucket, Key=key, **extra_args) - streaming_body = StreamReaderProgress( - response['Body'], callbacks) - if bandwidth_limiter: - streaming_body = \ - bandwidth_limiter.get_bandwith_limited_stream( - streaming_body, self._transfer_coordinator) - - chunks = DownloadChunkIterator(streaming_body, io_chunksize) - for chunk in chunks: - # If the transfer is done because of a cancellation - # or error somewhere else, stop trying to submit more - # data to be written and break out of the download. - if not self._transfer_coordinator.done(): - self._handle_io( - download_output_manager, fileobj, chunk, - current_index - ) - current_index += len(chunk) - else: - return - return + response = client.get_object( + Bucket=bucket, Key=key, **extra_args) + streaming_body = StreamReaderProgress( + response['Body'], callbacks) + if bandwidth_limiter: + streaming_body = \ + bandwidth_limiter.get_bandwith_limited_stream( + streaming_body, self._transfer_coordinator) + + chunks = DownloadChunkIterator(streaming_body, io_chunksize) + for chunk in chunks: + # If the transfer is done because of a cancellation + # or error somewhere else, stop trying to submit more + # data to be written and break out of the download. + if not self._transfer_coordinator.done(): + self._handle_io( + download_output_manager, fileobj, chunk, + current_index + ) + current_index += len(chunk) + else: + return + return except S3_RETRYABLE_DOWNLOAD_ERRORS as e: - logger.debug("Retrying exception caught (%s), " - "retrying request, (attempt %s / %s)", e, i, - max_attempts, exc_info=True) - last_exception = e - # Also invoke the progress callbacks to indicate that we - # are trying to download the stream again and all progress - # for this GetObject has been lost. - invoke_progress_callbacks( - callbacks, start_index - current_index) - continue - raise RetriesExceededError(last_exception) - - def _handle_io(self, download_output_manager, fileobj, chunk, index): - download_output_manager.queue_file_io_task(fileobj, chunk, index) - - -class ImmediatelyWriteIOGetObjectTask(GetObjectTask): - """GetObjectTask that immediately writes to the provided file object - - This is useful for downloads where it is known only one thread is - downloading the object so there is no reason to go through the - overhead of using an IO queue and executor. - """ - def _handle_io(self, download_output_manager, fileobj, chunk, index): - task = download_output_manager.get_io_write_task(fileobj, chunk, index) - task() - - -class IOWriteTask(Task): - def _main(self, fileobj, data, offset): - """Pulls off an io queue to write contents to a file - - :param fileobj: The file handle to write content to - :param data: The data to write - :param offset: The offset to write the data to. - """ - fileobj.seek(offset) - fileobj.write(data) - - -class IOStreamingWriteTask(Task): - """Task for writing data to a non-seekable stream.""" - - def _main(self, fileobj, data): - """Write data to a fileobj. - - Data will be written directly to the fileboj without - any prior seeking. - - :param fileobj: The fileobj to write content to - :param data: The data to write - - """ - fileobj.write(data) - - -class IORenameFileTask(Task): - """A task to rename a temporary file to its final filename - - :param fileobj: The file handle that content was written to. - :param final_filename: The final name of the file to rename to - upon completion of writing the contents. - :param osutil: OS utility - """ - def _main(self, fileobj, final_filename, osutil): - fileobj.close() - osutil.rename_file(fileobj.name, final_filename) - - -class IOCloseTask(Task): - """A task to close out a file once the download is complete. - - :param fileobj: The fileobj to close. - """ - def _main(self, fileobj): - fileobj.close() - - -class CompleteDownloadNOOPTask(Task): - """A NOOP task to serve as an indicator that the download is complete - - Note that the default for is_final is set to True because this should - always be the last task. - """ - def __init__(self, transfer_coordinator, main_kwargs=None, - pending_main_kwargs=None, done_callbacks=None, - is_final=True): - super(CompleteDownloadNOOPTask, self).__init__( - transfer_coordinator=transfer_coordinator, - main_kwargs=main_kwargs, - pending_main_kwargs=pending_main_kwargs, - done_callbacks=done_callbacks, - is_final=is_final - ) - - def _main(self): - pass - - -class DownloadChunkIterator(object): - def __init__(self, body, chunksize): - """Iterator to chunk out a downloaded S3 stream - - :param body: A readable file-like object - :param chunksize: The amount to read each time - """ - self._body = body - self._chunksize = chunksize - self._num_reads = 0 - - def __iter__(self): - return self - - def __next__(self): - chunk = self._body.read(self._chunksize) - self._num_reads += 1 - if chunk: - return chunk - elif self._num_reads == 1: - # Even though the response may have not had any - # content, we still want to account for an empty object's - # existance so return the empty chunk for that initial - # read. - return chunk - raise StopIteration() - - next = __next__ - - -class DeferQueue(object): - """IO queue that defers write requests until they are queued sequentially. - - This class is used to track IO data for a *single* fileobj. - - You can send data to this queue, and it will defer any IO write requests - until it has the next contiguous block available (starting at 0). - - """ - def __init__(self): - self._writes = [] - self._pending_offsets = set() - self._next_offset = 0 - - def request_writes(self, offset, data): - """Request any available writes given new incoming data. - - You call this method by providing new data along with the - offset associated with the data. If that new data unlocks - any contiguous writes that can now be submitted, this - method will return all applicable writes. - - This is done with 1 method call so you don't have to - make two method calls (put(), get()) which acquires a lock - each method call. - - """ - if offset < self._next_offset: - # This is a request for a write that we've already - # seen. This can happen in the event of a retry - # where if we retry at at offset N/2, we'll requeue - # offsets 0-N/2 again. - return [] - writes = [] - if offset in self._pending_offsets: - # We've already queued this offset so this request is - # a duplicate. In this case we should ignore - # this request and prefer what's already queued. - return [] - heapq.heappush(self._writes, (offset, data)) - self._pending_offsets.add(offset) - while self._writes and self._writes[0][0] == self._next_offset: - next_write = heapq.heappop(self._writes) - writes.append({'offset': next_write[0], 'data': next_write[1]}) - self._pending_offsets.remove(next_write[0]) - self._next_offset += len(next_write[1]) - return writes + logger.debug("Retrying exception caught (%s), " + "retrying request, (attempt %s / %s)", e, i, + max_attempts, exc_info=True) + last_exception = e + # Also invoke the progress callbacks to indicate that we + # are trying to download the stream again and all progress + # for this GetObject has been lost. + invoke_progress_callbacks( + callbacks, start_index - current_index) + continue + raise RetriesExceededError(last_exception) + + def _handle_io(self, download_output_manager, fileobj, chunk, index): + download_output_manager.queue_file_io_task(fileobj, chunk, index) + + +class ImmediatelyWriteIOGetObjectTask(GetObjectTask): + """GetObjectTask that immediately writes to the provided file object + + This is useful for downloads where it is known only one thread is + downloading the object so there is no reason to go through the + overhead of using an IO queue and executor. + """ + def _handle_io(self, download_output_manager, fileobj, chunk, index): + task = download_output_manager.get_io_write_task(fileobj, chunk, index) + task() + + +class IOWriteTask(Task): + def _main(self, fileobj, data, offset): + """Pulls off an io queue to write contents to a file + + :param fileobj: The file handle to write content to + :param data: The data to write + :param offset: The offset to write the data to. + """ + fileobj.seek(offset) + fileobj.write(data) + + +class IOStreamingWriteTask(Task): + """Task for writing data to a non-seekable stream.""" + + def _main(self, fileobj, data): + """Write data to a fileobj. + + Data will be written directly to the fileboj without + any prior seeking. + + :param fileobj: The fileobj to write content to + :param data: The data to write + + """ + fileobj.write(data) + + +class IORenameFileTask(Task): + """A task to rename a temporary file to its final filename + + :param fileobj: The file handle that content was written to. + :param final_filename: The final name of the file to rename to + upon completion of writing the contents. + :param osutil: OS utility + """ + def _main(self, fileobj, final_filename, osutil): + fileobj.close() + osutil.rename_file(fileobj.name, final_filename) + + +class IOCloseTask(Task): + """A task to close out a file once the download is complete. + + :param fileobj: The fileobj to close. + """ + def _main(self, fileobj): + fileobj.close() + + +class CompleteDownloadNOOPTask(Task): + """A NOOP task to serve as an indicator that the download is complete + + Note that the default for is_final is set to True because this should + always be the last task. + """ + def __init__(self, transfer_coordinator, main_kwargs=None, + pending_main_kwargs=None, done_callbacks=None, + is_final=True): + super(CompleteDownloadNOOPTask, self).__init__( + transfer_coordinator=transfer_coordinator, + main_kwargs=main_kwargs, + pending_main_kwargs=pending_main_kwargs, + done_callbacks=done_callbacks, + is_final=is_final + ) + + def _main(self): + pass + + +class DownloadChunkIterator(object): + def __init__(self, body, chunksize): + """Iterator to chunk out a downloaded S3 stream + + :param body: A readable file-like object + :param chunksize: The amount to read each time + """ + self._body = body + self._chunksize = chunksize + self._num_reads = 0 + + def __iter__(self): + return self + + def __next__(self): + chunk = self._body.read(self._chunksize) + self._num_reads += 1 + if chunk: + return chunk + elif self._num_reads == 1: + # Even though the response may have not had any + # content, we still want to account for an empty object's + # existance so return the empty chunk for that initial + # read. + return chunk + raise StopIteration() + + next = __next__ + + +class DeferQueue(object): + """IO queue that defers write requests until they are queued sequentially. + + This class is used to track IO data for a *single* fileobj. + + You can send data to this queue, and it will defer any IO write requests + until it has the next contiguous block available (starting at 0). + + """ + def __init__(self): + self._writes = [] + self._pending_offsets = set() + self._next_offset = 0 + + def request_writes(self, offset, data): + """Request any available writes given new incoming data. + + You call this method by providing new data along with the + offset associated with the data. If that new data unlocks + any contiguous writes that can now be submitted, this + method will return all applicable writes. + + This is done with 1 method call so you don't have to + make two method calls (put(), get()) which acquires a lock + each method call. + + """ + if offset < self._next_offset: + # This is a request for a write that we've already + # seen. This can happen in the event of a retry + # where if we retry at at offset N/2, we'll requeue + # offsets 0-N/2 again. + return [] + writes = [] + if offset in self._pending_offsets: + # We've already queued this offset so this request is + # a duplicate. In this case we should ignore + # this request and prefer what's already queued. + return [] + heapq.heappush(self._writes, (offset, data)) + self._pending_offsets.add(offset) + while self._writes and self._writes[0][0] == self._next_offset: + next_write = heapq.heappop(self._writes) + writes.append({'offset': next_write[0], 'data': next_write[1]}) + self._pending_offsets.remove(next_write[0]) + self._next_offset += len(next_write[1]) + return writes diff --git a/contrib/python/s3transfer/py2/s3transfer/exceptions.py b/contrib/python/s3transfer/py2/s3transfer/exceptions.py index 7756bfed93..7f3a138104 100644 --- a/contrib/python/s3transfer/py2/s3transfer/exceptions.py +++ b/contrib/python/s3transfer/py2/s3transfer/exceptions.py @@ -1,36 +1,36 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -from concurrent.futures import CancelledError - - -class RetriesExceededError(Exception): - def __init__(self, last_exception, msg='Max Retries Exceeded'): - super(RetriesExceededError, self).__init__(msg) - self.last_exception = last_exception - - -class S3UploadFailedError(Exception): - pass - - -class InvalidSubscriberMethodError(Exception): - pass - - -class TransferNotDoneError(Exception): - pass - - -class FatalError(CancelledError): - """A CancelledError raised from an error in the TransferManager""" - pass +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from concurrent.futures import CancelledError + + +class RetriesExceededError(Exception): + def __init__(self, last_exception, msg='Max Retries Exceeded'): + super(RetriesExceededError, self).__init__(msg) + self.last_exception = last_exception + + +class S3UploadFailedError(Exception): + pass + + +class InvalidSubscriberMethodError(Exception): + pass + + +class TransferNotDoneError(Exception): + pass + + +class FatalError(CancelledError): + """A CancelledError raised from an error in the TransferManager""" + pass diff --git a/contrib/python/s3transfer/py2/s3transfer/futures.py b/contrib/python/s3transfer/py2/s3transfer/futures.py index c868a08ae0..d08cb7b3b4 100644 --- a/contrib/python/s3transfer/py2/s3transfer/futures.py +++ b/contrib/python/s3transfer/py2/s3transfer/futures.py @@ -1,32 +1,32 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -from concurrent import futures -from collections import namedtuple -import copy -import logging -import sys -import threading - -from s3transfer.compat import MAXINT -from s3transfer.compat import six -from s3transfer.exceptions import CancelledError, TransferNotDoneError -from s3transfer.utils import FunctionContainer -from s3transfer.utils import TaskSemaphore - - -logger = logging.getLogger(__name__) - - +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from concurrent import futures +from collections import namedtuple +import copy +import logging +import sys +import threading + +from s3transfer.compat import MAXINT +from s3transfer.compat import six +from s3transfer.exceptions import CancelledError, TransferNotDoneError +from s3transfer.utils import FunctionContainer +from s3transfer.utils import TaskSemaphore + + +logger = logging.getLogger(__name__) + + class BaseTransferFuture(object): @property def meta(self): @@ -72,523 +72,523 @@ class BaseTransferMeta(object): class TransferFuture(BaseTransferFuture): - def __init__(self, meta=None, coordinator=None): - """The future associated to a submitted transfer request - - :type meta: TransferMeta - :param meta: The metadata associated to the request. This object - is visible to the requester. - - :type coordinator: TransferCoordinator - :param coordinator: The coordinator associated to the request. This - object is not visible to the requester. - """ - self._meta = meta - if meta is None: - self._meta = TransferMeta() - - self._coordinator = coordinator - if coordinator is None: - self._coordinator = TransferCoordinator() - - @property - def meta(self): - return self._meta - - def done(self): - return self._coordinator.done() - - def result(self): - try: - # Usually the result() method blocks until the transfer is done, - # however if a KeyboardInterrupt is raised we want want to exit - # out of this and propogate the exception. - return self._coordinator.result() - except KeyboardInterrupt as e: - self.cancel() - raise e - - def cancel(self): - self._coordinator.cancel() - - def set_exception(self, exception): - """Sets the exception on the future.""" - if not self.done(): - raise TransferNotDoneError( - 'set_exception can only be called once the transfer is ' - 'complete.') - self._coordinator.set_exception(exception, override=True) - - + def __init__(self, meta=None, coordinator=None): + """The future associated to a submitted transfer request + + :type meta: TransferMeta + :param meta: The metadata associated to the request. This object + is visible to the requester. + + :type coordinator: TransferCoordinator + :param coordinator: The coordinator associated to the request. This + object is not visible to the requester. + """ + self._meta = meta + if meta is None: + self._meta = TransferMeta() + + self._coordinator = coordinator + if coordinator is None: + self._coordinator = TransferCoordinator() + + @property + def meta(self): + return self._meta + + def done(self): + return self._coordinator.done() + + def result(self): + try: + # Usually the result() method blocks until the transfer is done, + # however if a KeyboardInterrupt is raised we want want to exit + # out of this and propogate the exception. + return self._coordinator.result() + except KeyboardInterrupt as e: + self.cancel() + raise e + + def cancel(self): + self._coordinator.cancel() + + def set_exception(self, exception): + """Sets the exception on the future.""" + if not self.done(): + raise TransferNotDoneError( + 'set_exception can only be called once the transfer is ' + 'complete.') + self._coordinator.set_exception(exception, override=True) + + class TransferMeta(BaseTransferMeta): - """Holds metadata about the TransferFuture""" - def __init__(self, call_args=None, transfer_id=None): - self._call_args = call_args - self._transfer_id = transfer_id - self._size = None - self._user_context = {} - - @property - def call_args(self): - """The call args used in the transfer request""" - return self._call_args - - @property - def transfer_id(self): - """The unique id of the transfer""" - return self._transfer_id - - @property - def size(self): - """The size of the transfer request if known""" - return self._size - - @property - def user_context(self): - """A dictionary that requesters can store data in""" - return self._user_context - - def provide_transfer_size(self, size): - """A method to provide the size of a transfer request - - By providing this value, the TransferManager will not try to - call HeadObject or use the use OS to determine the size of the - transfer. - """ - self._size = size - - -class TransferCoordinator(object): - """A helper class for managing TransferFuture""" - def __init__(self, transfer_id=None): - self.transfer_id = transfer_id - self._status = 'not-started' - self._result = None - self._exception = None - self._associated_futures = set() - self._failure_cleanups = [] - self._done_callbacks = [] - self._done_event = threading.Event() - self._lock = threading.Lock() - self._associated_futures_lock = threading.Lock() - self._done_callbacks_lock = threading.Lock() - self._failure_cleanups_lock = threading.Lock() - - def __repr__(self): - return '%s(transfer_id=%s)' % ( - self.__class__.__name__, self.transfer_id) - - @property - def exception(self): - return self._exception - - @property - def associated_futures(self): - """The list of futures associated to the inprogress TransferFuture - - Once the transfer finishes this list becomes empty as the transfer - is considered done and there should be no running futures left. - """ - with self._associated_futures_lock: - # We return a copy of the list because we do not want to - # processing the returned list while another thread is adding - # more futures to the actual list. - return copy.copy(self._associated_futures) - - @property - def failure_cleanups(self): - """The list of callbacks to call when the TransferFuture fails""" - return self._failure_cleanups - - @property - def status(self): - """The status of the TransferFuture - - The currently supported states are: - * not-started - Has yet to start. If in this state, a transfer - can be canceled immediately and nothing will happen. - * queued - SubmissionTask is about to submit tasks - * running - Is inprogress. In-progress as of now means that - the SubmissionTask that runs the transfer is being executed. So - there is no guarantee any transfer requests had been made to - S3 if this state is reached. - * cancelled - Was cancelled - * failed - An exception other than CancelledError was thrown - * success - No exceptions were thrown and is done. - """ - return self._status - - def set_result(self, result): - """Set a result for the TransferFuture - - Implies that the TransferFuture succeeded. This will always set a - result because it is invoked on the final task where there is only - ever one final task and it is ran at the very end of a transfer - process. So if a result is being set for this final task, the transfer - succeeded even if something came a long and canceled the transfer - on the final task. - """ - with self._lock: - self._exception = None - self._result = result - self._status = 'success' - - def set_exception(self, exception, override=False): - """Set an exception for the TransferFuture - - Implies the TransferFuture failed. - - :param exception: The exception that cause the transfer to fail. - :param override: If True, override any existing state. - """ - with self._lock: - if not self.done() or override: - self._exception = exception - self._status = 'failed' - - def result(self): - """Waits until TransferFuture is done and returns the result - - If the TransferFuture succeeded, it will return the result. If the - TransferFuture failed, it will raise the exception associated to the - failure. - """ - # Doing a wait() with no timeout cannot be interrupted in python2 but - # can be interrupted in python3 so we just wait with the largest - # possible value integer value, which is on the scale of billions of - # years... - self._done_event.wait(MAXINT) - - # Once done waiting, raise an exception if present or return the - # final result. - if self._exception: - raise self._exception - return self._result - - def cancel(self, msg='', exc_type=CancelledError): - """Cancels the TransferFuture - - :param msg: The message to attach to the cancellation - :param exc_type: The type of exception to set for the cancellation - """ - with self._lock: - if not self.done(): - should_announce_done = False - logger.debug('%s cancel(%s) called', self, msg) - self._exception = exc_type(msg) - if self._status == 'not-started': - should_announce_done = True - self._status = 'cancelled' - if should_announce_done: - self.announce_done() - - def set_status_to_queued(self): - """Sets the TransferFutrue's status to running""" - self._transition_to_non_done_state('queued') - - def set_status_to_running(self): - """Sets the TransferFuture's status to running""" - self._transition_to_non_done_state('running') - - def _transition_to_non_done_state(self, desired_state): - with self._lock: - if self.done(): - raise RuntimeError( - 'Unable to transition from done state %s to non-done ' - 'state %s.' % (self.status, desired_state)) - self._status = desired_state - - def submit(self, executor, task, tag=None): - """Submits a task to a provided executor - - :type executor: s3transfer.futures.BoundedExecutor - :param executor: The executor to submit the callable to - - :type task: s3transfer.tasks.Task - :param task: The task to submit to the executor - - :type tag: s3transfer.futures.TaskTag - :param tag: A tag to associate to the submitted task - - :rtype: concurrent.futures.Future - :returns: A future representing the submitted task - """ - logger.debug( - "Submitting task %s to executor %s for transfer request: %s." % ( - task, executor, self.transfer_id) - ) - future = executor.submit(task, tag=tag) - # Add this created future to the list of associated future just - # in case it is needed during cleanups. - self.add_associated_future(future) - future.add_done_callback( - FunctionContainer(self.remove_associated_future, future)) - return future - - def done(self): - """Determines if a TransferFuture has completed - - :returns: False if status is equal to 'failed', 'cancelled', or - 'success'. True, otherwise - """ - return self.status in ['failed', 'cancelled', 'success'] - - def add_associated_future(self, future): - """Adds a future to be associated with the TransferFuture""" - with self._associated_futures_lock: - self._associated_futures.add(future) - - def remove_associated_future(self, future): - """Removes a future's association to the TransferFuture""" - with self._associated_futures_lock: - self._associated_futures.remove(future) - - def add_done_callback(self, function, *args, **kwargs): - """Add a done callback to be invoked when transfer is done""" - with self._done_callbacks_lock: - self._done_callbacks.append( - FunctionContainer(function, *args, **kwargs) - ) - - def add_failure_cleanup(self, function, *args, **kwargs): - """Adds a callback to call upon failure""" - with self._failure_cleanups_lock: - self._failure_cleanups.append( - FunctionContainer(function, *args, **kwargs)) - - def announce_done(self): - """Announce that future is done running and run associated callbacks - - This will run any failure cleanups if the transfer failed if not - they have not been run, allows the result() to be unblocked, and will - run any done callbacks associated to the TransferFuture if they have - not already been ran. - """ - if self.status != 'success': - self._run_failure_cleanups() - self._done_event.set() - self._run_done_callbacks() - - def _run_done_callbacks(self): - # Run the callbacks and remove the callbacks from the internal - # list so they do not get ran again if done is announced more than - # once. - with self._done_callbacks_lock: - self._run_callbacks(self._done_callbacks) - self._done_callbacks = [] - - def _run_failure_cleanups(self): - # Run the cleanup callbacks and remove the callbacks from the internal - # list so they do not get ran again if done is announced more than - # once. - with self._failure_cleanups_lock: - self._run_callbacks(self.failure_cleanups) - self._failure_cleanups = [] - - def _run_callbacks(self, callbacks): - for callback in callbacks: - self._run_callback(callback) - - def _run_callback(self, callback): - try: - callback() - # We do not want a callback interrupting the process, especially - # in the failure cleanups. So log and catch, the excpetion. - except Exception: - logger.debug("Exception raised in %s." % callback, exc_info=True) - - -class BoundedExecutor(object): - EXECUTOR_CLS = futures.ThreadPoolExecutor - - def __init__(self, max_size, max_num_threads, tag_semaphores=None, - executor_cls=None): - """An executor implentation that has a maximum queued up tasks - - The executor will block if the number of tasks that have been - submitted and is currently working on is past its maximum. - - :params max_size: The maximum number of inflight futures. An inflight - future means that the task is either queued up or is currently - being executed. A size of None or 0 means that the executor will - have no bound in terms of the number of inflight futures. - - :params max_num_threads: The maximum number of threads the executor - uses. - - :type tag_semaphores: dict - :params tag_semaphores: A dictionary where the key is the name of the - tag and the value is the semaphore to use when limiting the - number of tasks the executor is processing at a time. - - :type executor_cls: BaseExecutor - :param underlying_executor_cls: The executor class that - get bounded by this executor. If None is provided, the - concurrent.futures.ThreadPoolExecutor class is used. - """ - self._max_num_threads = max_num_threads - if executor_cls is None: - executor_cls = self.EXECUTOR_CLS - self._executor = executor_cls(max_workers=self._max_num_threads) - self._semaphore = TaskSemaphore(max_size) - self._tag_semaphores = tag_semaphores - - def submit(self, task, tag=None, block=True): - """Submit a task to complete - - :type task: s3transfer.tasks.Task - :param task: The task to run __call__ on - - - :type tag: s3transfer.futures.TaskTag - :param tag: An optional tag to associate to the task. This - is used to override which semaphore to use. - - :type block: boolean - :param block: True if to wait till it is possible to submit a task. - False, if not to wait and raise an error if not able to submit - a task. - - :returns: The future assocaited to the submitted task - """ - semaphore = self._semaphore - # If a tag was provided, use the semaphore associated to that - # tag. - if tag: - semaphore = self._tag_semaphores[tag] - - # Call acquire on the semaphore. - acquire_token = semaphore.acquire(task.transfer_id, block) - # Create a callback to invoke when task is done in order to call - # release on the semaphore. - release_callback = FunctionContainer( - semaphore.release, task.transfer_id, acquire_token) - # Submit the task to the underlying executor. - future = ExecutorFuture(self._executor.submit(task)) - # Add the Semaphore.release() callback to the future such that - # it is invoked once the future completes. - future.add_done_callback(release_callback) - return future - - def shutdown(self, wait=True): - self._executor.shutdown(wait) - - -class ExecutorFuture(object): - def __init__(self, future): - """A future returned from the executor - - Currently, it is just a wrapper around a concurrent.futures.Future. - However, this can eventually grow to implement the needed functionality - of concurrent.futures.Future if we move off of the library and not - affect the rest of the codebase. - - :type future: concurrent.futures.Future - :param future: The underlying future - """ - self._future = future - - def result(self): - return self._future.result() - - def add_done_callback(self, fn): - """Adds a callback to be completed once future is done - - :parm fn: A callable that takes no arguments. Note that is different - than concurrent.futures.Future.add_done_callback that requires - a single argument for the future. - """ - # The done callback for concurrent.futures.Future will always pass a - # the future in as the only argument. So we need to create the - # proper signature wrapper that will invoke the callback provided. - def done_callback(future_passed_to_callback): - return fn() - self._future.add_done_callback(done_callback) - - def done(self): - return self._future.done() - - -class BaseExecutor(object): - """Base Executor class implementation needed to work with s3transfer""" - def __init__(self, max_workers=None): - pass - - def submit(self, fn, *args, **kwargs): - raise NotImplementedError('submit()') - - def shutdown(self, wait=True): - raise NotImplementedError('shutdown()') - - -class NonThreadedExecutor(BaseExecutor): - """A drop-in replacement non-threaded version of ThreadPoolExecutor""" - def submit(self, fn, *args, **kwargs): - future = NonThreadedExecutorFuture() - try: - result = fn(*args, **kwargs) - future.set_result(result) - except Exception: - e, tb = sys.exc_info()[1:] - logger.debug( - 'Setting exception for %s to %s with traceback %s', - future, e, tb - ) - future.set_exception_info(e, tb) - return future - - def shutdown(self, wait=True): - pass - - -class NonThreadedExecutorFuture(object): - """The Future returned from NonThreadedExecutor - - Note that this future is **not** thread-safe as it is being used - from the context of a non-threaded environment. - """ - def __init__(self): - self._result = None - self._exception = None - self._traceback = None - self._done = False - self._done_callbacks = [] - - def set_result(self, result): - self._result = result - self._set_done() - - def set_exception_info(self, exception, traceback): - self._exception = exception - self._traceback = traceback - self._set_done() - - def result(self, timeout=None): - if self._exception: - six.reraise( - type(self._exception), self._exception, self._traceback) - return self._result - - def _set_done(self): - self._done = True - for done_callback in self._done_callbacks: - self._invoke_done_callback(done_callback) - self._done_callbacks = [] - - def _invoke_done_callback(self, done_callback): - return done_callback(self) - - def done(self): - return self._done - - def add_done_callback(self, fn): - if self._done: - self._invoke_done_callback(fn) - else: - self._done_callbacks.append(fn) - - -TaskTag = namedtuple('TaskTag', ['name']) - -IN_MEMORY_UPLOAD_TAG = TaskTag('in_memory_upload') -IN_MEMORY_DOWNLOAD_TAG = TaskTag('in_memory_download') + """Holds metadata about the TransferFuture""" + def __init__(self, call_args=None, transfer_id=None): + self._call_args = call_args + self._transfer_id = transfer_id + self._size = None + self._user_context = {} + + @property + def call_args(self): + """The call args used in the transfer request""" + return self._call_args + + @property + def transfer_id(self): + """The unique id of the transfer""" + return self._transfer_id + + @property + def size(self): + """The size of the transfer request if known""" + return self._size + + @property + def user_context(self): + """A dictionary that requesters can store data in""" + return self._user_context + + def provide_transfer_size(self, size): + """A method to provide the size of a transfer request + + By providing this value, the TransferManager will not try to + call HeadObject or use the use OS to determine the size of the + transfer. + """ + self._size = size + + +class TransferCoordinator(object): + """A helper class for managing TransferFuture""" + def __init__(self, transfer_id=None): + self.transfer_id = transfer_id + self._status = 'not-started' + self._result = None + self._exception = None + self._associated_futures = set() + self._failure_cleanups = [] + self._done_callbacks = [] + self._done_event = threading.Event() + self._lock = threading.Lock() + self._associated_futures_lock = threading.Lock() + self._done_callbacks_lock = threading.Lock() + self._failure_cleanups_lock = threading.Lock() + + def __repr__(self): + return '%s(transfer_id=%s)' % ( + self.__class__.__name__, self.transfer_id) + + @property + def exception(self): + return self._exception + + @property + def associated_futures(self): + """The list of futures associated to the inprogress TransferFuture + + Once the transfer finishes this list becomes empty as the transfer + is considered done and there should be no running futures left. + """ + with self._associated_futures_lock: + # We return a copy of the list because we do not want to + # processing the returned list while another thread is adding + # more futures to the actual list. + return copy.copy(self._associated_futures) + + @property + def failure_cleanups(self): + """The list of callbacks to call when the TransferFuture fails""" + return self._failure_cleanups + + @property + def status(self): + """The status of the TransferFuture + + The currently supported states are: + * not-started - Has yet to start. If in this state, a transfer + can be canceled immediately and nothing will happen. + * queued - SubmissionTask is about to submit tasks + * running - Is inprogress. In-progress as of now means that + the SubmissionTask that runs the transfer is being executed. So + there is no guarantee any transfer requests had been made to + S3 if this state is reached. + * cancelled - Was cancelled + * failed - An exception other than CancelledError was thrown + * success - No exceptions were thrown and is done. + """ + return self._status + + def set_result(self, result): + """Set a result for the TransferFuture + + Implies that the TransferFuture succeeded. This will always set a + result because it is invoked on the final task where there is only + ever one final task and it is ran at the very end of a transfer + process. So if a result is being set for this final task, the transfer + succeeded even if something came a long and canceled the transfer + on the final task. + """ + with self._lock: + self._exception = None + self._result = result + self._status = 'success' + + def set_exception(self, exception, override=False): + """Set an exception for the TransferFuture + + Implies the TransferFuture failed. + + :param exception: The exception that cause the transfer to fail. + :param override: If True, override any existing state. + """ + with self._lock: + if not self.done() or override: + self._exception = exception + self._status = 'failed' + + def result(self): + """Waits until TransferFuture is done and returns the result + + If the TransferFuture succeeded, it will return the result. If the + TransferFuture failed, it will raise the exception associated to the + failure. + """ + # Doing a wait() with no timeout cannot be interrupted in python2 but + # can be interrupted in python3 so we just wait with the largest + # possible value integer value, which is on the scale of billions of + # years... + self._done_event.wait(MAXINT) + + # Once done waiting, raise an exception if present or return the + # final result. + if self._exception: + raise self._exception + return self._result + + def cancel(self, msg='', exc_type=CancelledError): + """Cancels the TransferFuture + + :param msg: The message to attach to the cancellation + :param exc_type: The type of exception to set for the cancellation + """ + with self._lock: + if not self.done(): + should_announce_done = False + logger.debug('%s cancel(%s) called', self, msg) + self._exception = exc_type(msg) + if self._status == 'not-started': + should_announce_done = True + self._status = 'cancelled' + if should_announce_done: + self.announce_done() + + def set_status_to_queued(self): + """Sets the TransferFutrue's status to running""" + self._transition_to_non_done_state('queued') + + def set_status_to_running(self): + """Sets the TransferFuture's status to running""" + self._transition_to_non_done_state('running') + + def _transition_to_non_done_state(self, desired_state): + with self._lock: + if self.done(): + raise RuntimeError( + 'Unable to transition from done state %s to non-done ' + 'state %s.' % (self.status, desired_state)) + self._status = desired_state + + def submit(self, executor, task, tag=None): + """Submits a task to a provided executor + + :type executor: s3transfer.futures.BoundedExecutor + :param executor: The executor to submit the callable to + + :type task: s3transfer.tasks.Task + :param task: The task to submit to the executor + + :type tag: s3transfer.futures.TaskTag + :param tag: A tag to associate to the submitted task + + :rtype: concurrent.futures.Future + :returns: A future representing the submitted task + """ + logger.debug( + "Submitting task %s to executor %s for transfer request: %s." % ( + task, executor, self.transfer_id) + ) + future = executor.submit(task, tag=tag) + # Add this created future to the list of associated future just + # in case it is needed during cleanups. + self.add_associated_future(future) + future.add_done_callback( + FunctionContainer(self.remove_associated_future, future)) + return future + + def done(self): + """Determines if a TransferFuture has completed + + :returns: False if status is equal to 'failed', 'cancelled', or + 'success'. True, otherwise + """ + return self.status in ['failed', 'cancelled', 'success'] + + def add_associated_future(self, future): + """Adds a future to be associated with the TransferFuture""" + with self._associated_futures_lock: + self._associated_futures.add(future) + + def remove_associated_future(self, future): + """Removes a future's association to the TransferFuture""" + with self._associated_futures_lock: + self._associated_futures.remove(future) + + def add_done_callback(self, function, *args, **kwargs): + """Add a done callback to be invoked when transfer is done""" + with self._done_callbacks_lock: + self._done_callbacks.append( + FunctionContainer(function, *args, **kwargs) + ) + + def add_failure_cleanup(self, function, *args, **kwargs): + """Adds a callback to call upon failure""" + with self._failure_cleanups_lock: + self._failure_cleanups.append( + FunctionContainer(function, *args, **kwargs)) + + def announce_done(self): + """Announce that future is done running and run associated callbacks + + This will run any failure cleanups if the transfer failed if not + they have not been run, allows the result() to be unblocked, and will + run any done callbacks associated to the TransferFuture if they have + not already been ran. + """ + if self.status != 'success': + self._run_failure_cleanups() + self._done_event.set() + self._run_done_callbacks() + + def _run_done_callbacks(self): + # Run the callbacks and remove the callbacks from the internal + # list so they do not get ran again if done is announced more than + # once. + with self._done_callbacks_lock: + self._run_callbacks(self._done_callbacks) + self._done_callbacks = [] + + def _run_failure_cleanups(self): + # Run the cleanup callbacks and remove the callbacks from the internal + # list so they do not get ran again if done is announced more than + # once. + with self._failure_cleanups_lock: + self._run_callbacks(self.failure_cleanups) + self._failure_cleanups = [] + + def _run_callbacks(self, callbacks): + for callback in callbacks: + self._run_callback(callback) + + def _run_callback(self, callback): + try: + callback() + # We do not want a callback interrupting the process, especially + # in the failure cleanups. So log and catch, the excpetion. + except Exception: + logger.debug("Exception raised in %s." % callback, exc_info=True) + + +class BoundedExecutor(object): + EXECUTOR_CLS = futures.ThreadPoolExecutor + + def __init__(self, max_size, max_num_threads, tag_semaphores=None, + executor_cls=None): + """An executor implentation that has a maximum queued up tasks + + The executor will block if the number of tasks that have been + submitted and is currently working on is past its maximum. + + :params max_size: The maximum number of inflight futures. An inflight + future means that the task is either queued up or is currently + being executed. A size of None or 0 means that the executor will + have no bound in terms of the number of inflight futures. + + :params max_num_threads: The maximum number of threads the executor + uses. + + :type tag_semaphores: dict + :params tag_semaphores: A dictionary where the key is the name of the + tag and the value is the semaphore to use when limiting the + number of tasks the executor is processing at a time. + + :type executor_cls: BaseExecutor + :param underlying_executor_cls: The executor class that + get bounded by this executor. If None is provided, the + concurrent.futures.ThreadPoolExecutor class is used. + """ + self._max_num_threads = max_num_threads + if executor_cls is None: + executor_cls = self.EXECUTOR_CLS + self._executor = executor_cls(max_workers=self._max_num_threads) + self._semaphore = TaskSemaphore(max_size) + self._tag_semaphores = tag_semaphores + + def submit(self, task, tag=None, block=True): + """Submit a task to complete + + :type task: s3transfer.tasks.Task + :param task: The task to run __call__ on + + + :type tag: s3transfer.futures.TaskTag + :param tag: An optional tag to associate to the task. This + is used to override which semaphore to use. + + :type block: boolean + :param block: True if to wait till it is possible to submit a task. + False, if not to wait and raise an error if not able to submit + a task. + + :returns: The future assocaited to the submitted task + """ + semaphore = self._semaphore + # If a tag was provided, use the semaphore associated to that + # tag. + if tag: + semaphore = self._tag_semaphores[tag] + + # Call acquire on the semaphore. + acquire_token = semaphore.acquire(task.transfer_id, block) + # Create a callback to invoke when task is done in order to call + # release on the semaphore. + release_callback = FunctionContainer( + semaphore.release, task.transfer_id, acquire_token) + # Submit the task to the underlying executor. + future = ExecutorFuture(self._executor.submit(task)) + # Add the Semaphore.release() callback to the future such that + # it is invoked once the future completes. + future.add_done_callback(release_callback) + return future + + def shutdown(self, wait=True): + self._executor.shutdown(wait) + + +class ExecutorFuture(object): + def __init__(self, future): + """A future returned from the executor + + Currently, it is just a wrapper around a concurrent.futures.Future. + However, this can eventually grow to implement the needed functionality + of concurrent.futures.Future if we move off of the library and not + affect the rest of the codebase. + + :type future: concurrent.futures.Future + :param future: The underlying future + """ + self._future = future + + def result(self): + return self._future.result() + + def add_done_callback(self, fn): + """Adds a callback to be completed once future is done + + :parm fn: A callable that takes no arguments. Note that is different + than concurrent.futures.Future.add_done_callback that requires + a single argument for the future. + """ + # The done callback for concurrent.futures.Future will always pass a + # the future in as the only argument. So we need to create the + # proper signature wrapper that will invoke the callback provided. + def done_callback(future_passed_to_callback): + return fn() + self._future.add_done_callback(done_callback) + + def done(self): + return self._future.done() + + +class BaseExecutor(object): + """Base Executor class implementation needed to work with s3transfer""" + def __init__(self, max_workers=None): + pass + + def submit(self, fn, *args, **kwargs): + raise NotImplementedError('submit()') + + def shutdown(self, wait=True): + raise NotImplementedError('shutdown()') + + +class NonThreadedExecutor(BaseExecutor): + """A drop-in replacement non-threaded version of ThreadPoolExecutor""" + def submit(self, fn, *args, **kwargs): + future = NonThreadedExecutorFuture() + try: + result = fn(*args, **kwargs) + future.set_result(result) + except Exception: + e, tb = sys.exc_info()[1:] + logger.debug( + 'Setting exception for %s to %s with traceback %s', + future, e, tb + ) + future.set_exception_info(e, tb) + return future + + def shutdown(self, wait=True): + pass + + +class NonThreadedExecutorFuture(object): + """The Future returned from NonThreadedExecutor + + Note that this future is **not** thread-safe as it is being used + from the context of a non-threaded environment. + """ + def __init__(self): + self._result = None + self._exception = None + self._traceback = None + self._done = False + self._done_callbacks = [] + + def set_result(self, result): + self._result = result + self._set_done() + + def set_exception_info(self, exception, traceback): + self._exception = exception + self._traceback = traceback + self._set_done() + + def result(self, timeout=None): + if self._exception: + six.reraise( + type(self._exception), self._exception, self._traceback) + return self._result + + def _set_done(self): + self._done = True + for done_callback in self._done_callbacks: + self._invoke_done_callback(done_callback) + self._done_callbacks = [] + + def _invoke_done_callback(self, done_callback): + return done_callback(self) + + def done(self): + return self._done + + def add_done_callback(self, fn): + if self._done: + self._invoke_done_callback(fn) + else: + self._done_callbacks.append(fn) + + +TaskTag = namedtuple('TaskTag', ['name']) + +IN_MEMORY_UPLOAD_TAG = TaskTag('in_memory_upload') +IN_MEMORY_DOWNLOAD_TAG = TaskTag('in_memory_download') diff --git a/contrib/python/s3transfer/py2/s3transfer/manager.py b/contrib/python/s3transfer/py2/s3transfer/manager.py index a7e7c33b00..c6e6eb47f6 100644 --- a/contrib/python/s3transfer/py2/s3transfer/manager.py +++ b/contrib/python/s3transfer/py2/s3transfer/manager.py @@ -1,208 +1,208 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import copy -import logging +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import copy +import logging import re -import threading - -from botocore.compat import six - +import threading + +from botocore.compat import six + from s3transfer.constants import KB, MB from s3transfer.constants import ALLOWED_DOWNLOAD_ARGS -from s3transfer.utils import get_callbacks -from s3transfer.utils import signal_transferring -from s3transfer.utils import signal_not_transferring -from s3transfer.utils import CallArgs -from s3transfer.utils import OSUtils -from s3transfer.utils import TaskSemaphore -from s3transfer.utils import SlidingWindowSemaphore -from s3transfer.exceptions import CancelledError -from s3transfer.exceptions import FatalError -from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG -from s3transfer.futures import IN_MEMORY_UPLOAD_TAG -from s3transfer.futures import BoundedExecutor -from s3transfer.futures import TransferFuture -from s3transfer.futures import TransferMeta -from s3transfer.futures import TransferCoordinator -from s3transfer.download import DownloadSubmissionTask -from s3transfer.upload import UploadSubmissionTask -from s3transfer.copies import CopySubmissionTask -from s3transfer.delete import DeleteSubmissionTask -from s3transfer.bandwidth import LeakyBucket -from s3transfer.bandwidth import BandwidthLimiter - - -logger = logging.getLogger(__name__) - - -class TransferConfig(object): - def __init__(self, - multipart_threshold=8 * MB, - multipart_chunksize=8 * MB, - max_request_concurrency=10, - max_submission_concurrency=5, - max_request_queue_size=1000, - max_submission_queue_size=1000, - max_io_queue_size=1000, - io_chunksize=256 * KB, - num_download_attempts=5, - max_in_memory_upload_chunks=10, - max_in_memory_download_chunks=10, - max_bandwidth=None): - """Configurations for the transfer mangager - - :param multipart_threshold: The threshold for which multipart - transfers occur. - - :param max_request_concurrency: The maximum number of S3 API - transfer-related requests that can happen at a time. - - :param max_submission_concurrency: The maximum number of threads - processing a call to a TransferManager method. Processing a - call usually entails determining which S3 API requests that need - to be enqueued, but does **not** entail making any of the - S3 API data transfering requests needed to perform the transfer. - The threads controlled by ``max_request_concurrency`` is - responsible for that. - - :param multipart_chunksize: The size of each transfer if a request - becomes a multipart transfer. - - :param max_request_queue_size: The maximum amount of S3 API requests - that can be queued at a time. A value of zero means that there - is no maximum. - - :param max_submission_queue_size: The maximum amount of - TransferManager method calls that can be queued at a time. A value - of zero means that there is no maximum. - - :param max_io_queue_size: The maximum amount of read parts that - can be queued to be written to disk per download. A value of zero - means that there is no maximum. The default size for each element - in this queue is 8 KB. - - :param io_chunksize: The max size of each chunk in the io queue. - Currently, this is size used when reading from the downloaded - stream as well. - - :param num_download_attempts: The number of download attempts that - will be tried upon errors with downloading an object in S3. Note - that these retries account for errors that occur when streamming - down the data from s3 (i.e. socket errors and read timeouts that - occur after recieving an OK response from s3). - Other retryable exceptions such as throttling errors and 5xx errors - are already retried by botocore (this default is 5). The - ``num_download_attempts`` does not take into account the - number of exceptions retried by botocore. - - :param max_in_memory_upload_chunks: The number of chunks that can - be stored in memory at a time for all ongoing upload requests. - This pertains to chunks of data that need to be stored in memory - during an upload if the data is sourced from a file-like object. - The total maximum memory footprint due to a in-memory upload - chunks is roughly equal to: - - max_in_memory_upload_chunks * multipart_chunksize - + max_submission_concurrency * multipart_chunksize - - ``max_submission_concurrency`` has an affect on this value because - for each thread pulling data off of a file-like object, they may - be waiting with a single read chunk to be submitted for upload - because the ``max_in_memory_upload_chunks`` value has been reached - by the threads making the upload request. - - :param max_in_memory_download_chunks: The number of chunks that can - be buffered in memory and **not** in the io queue at a time for all - ongoing dowload requests. This pertains specifically to file-like - objects that cannot be seeked. The total maximum memory footprint - due to a in-memory download chunks is roughly equal to: - - max_in_memory_download_chunks * multipart_chunksize - - :param max_bandwidth: The maximum bandwidth that will be consumed - in uploading and downloading file content. The value is in terms of - bytes per second. - """ - self.multipart_threshold = multipart_threshold - self.multipart_chunksize = multipart_chunksize - self.max_request_concurrency = max_request_concurrency - self.max_submission_concurrency = max_submission_concurrency - self.max_request_queue_size = max_request_queue_size - self.max_submission_queue_size = max_submission_queue_size - self.max_io_queue_size = max_io_queue_size - self.io_chunksize = io_chunksize - self.num_download_attempts = num_download_attempts - self.max_in_memory_upload_chunks = max_in_memory_upload_chunks - self.max_in_memory_download_chunks = max_in_memory_download_chunks - self.max_bandwidth = max_bandwidth - self._validate_attrs_are_nonzero() - - def _validate_attrs_are_nonzero(self): - for attr, attr_val, in self.__dict__.items(): - if attr_val is not None and attr_val <= 0: - raise ValueError( - 'Provided parameter %s of value %s must be greater than ' - '0.' % (attr, attr_val)) - - -class TransferManager(object): +from s3transfer.utils import get_callbacks +from s3transfer.utils import signal_transferring +from s3transfer.utils import signal_not_transferring +from s3transfer.utils import CallArgs +from s3transfer.utils import OSUtils +from s3transfer.utils import TaskSemaphore +from s3transfer.utils import SlidingWindowSemaphore +from s3transfer.exceptions import CancelledError +from s3transfer.exceptions import FatalError +from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG +from s3transfer.futures import IN_MEMORY_UPLOAD_TAG +from s3transfer.futures import BoundedExecutor +from s3transfer.futures import TransferFuture +from s3transfer.futures import TransferMeta +from s3transfer.futures import TransferCoordinator +from s3transfer.download import DownloadSubmissionTask +from s3transfer.upload import UploadSubmissionTask +from s3transfer.copies import CopySubmissionTask +from s3transfer.delete import DeleteSubmissionTask +from s3transfer.bandwidth import LeakyBucket +from s3transfer.bandwidth import BandwidthLimiter + + +logger = logging.getLogger(__name__) + + +class TransferConfig(object): + def __init__(self, + multipart_threshold=8 * MB, + multipart_chunksize=8 * MB, + max_request_concurrency=10, + max_submission_concurrency=5, + max_request_queue_size=1000, + max_submission_queue_size=1000, + max_io_queue_size=1000, + io_chunksize=256 * KB, + num_download_attempts=5, + max_in_memory_upload_chunks=10, + max_in_memory_download_chunks=10, + max_bandwidth=None): + """Configurations for the transfer mangager + + :param multipart_threshold: The threshold for which multipart + transfers occur. + + :param max_request_concurrency: The maximum number of S3 API + transfer-related requests that can happen at a time. + + :param max_submission_concurrency: The maximum number of threads + processing a call to a TransferManager method. Processing a + call usually entails determining which S3 API requests that need + to be enqueued, but does **not** entail making any of the + S3 API data transfering requests needed to perform the transfer. + The threads controlled by ``max_request_concurrency`` is + responsible for that. + + :param multipart_chunksize: The size of each transfer if a request + becomes a multipart transfer. + + :param max_request_queue_size: The maximum amount of S3 API requests + that can be queued at a time. A value of zero means that there + is no maximum. + + :param max_submission_queue_size: The maximum amount of + TransferManager method calls that can be queued at a time. A value + of zero means that there is no maximum. + + :param max_io_queue_size: The maximum amount of read parts that + can be queued to be written to disk per download. A value of zero + means that there is no maximum. The default size for each element + in this queue is 8 KB. + + :param io_chunksize: The max size of each chunk in the io queue. + Currently, this is size used when reading from the downloaded + stream as well. + + :param num_download_attempts: The number of download attempts that + will be tried upon errors with downloading an object in S3. Note + that these retries account for errors that occur when streamming + down the data from s3 (i.e. socket errors and read timeouts that + occur after recieving an OK response from s3). + Other retryable exceptions such as throttling errors and 5xx errors + are already retried by botocore (this default is 5). The + ``num_download_attempts`` does not take into account the + number of exceptions retried by botocore. + + :param max_in_memory_upload_chunks: The number of chunks that can + be stored in memory at a time for all ongoing upload requests. + This pertains to chunks of data that need to be stored in memory + during an upload if the data is sourced from a file-like object. + The total maximum memory footprint due to a in-memory upload + chunks is roughly equal to: + + max_in_memory_upload_chunks * multipart_chunksize + + max_submission_concurrency * multipart_chunksize + + ``max_submission_concurrency`` has an affect on this value because + for each thread pulling data off of a file-like object, they may + be waiting with a single read chunk to be submitted for upload + because the ``max_in_memory_upload_chunks`` value has been reached + by the threads making the upload request. + + :param max_in_memory_download_chunks: The number of chunks that can + be buffered in memory and **not** in the io queue at a time for all + ongoing dowload requests. This pertains specifically to file-like + objects that cannot be seeked. The total maximum memory footprint + due to a in-memory download chunks is roughly equal to: + + max_in_memory_download_chunks * multipart_chunksize + + :param max_bandwidth: The maximum bandwidth that will be consumed + in uploading and downloading file content. The value is in terms of + bytes per second. + """ + self.multipart_threshold = multipart_threshold + self.multipart_chunksize = multipart_chunksize + self.max_request_concurrency = max_request_concurrency + self.max_submission_concurrency = max_submission_concurrency + self.max_request_queue_size = max_request_queue_size + self.max_submission_queue_size = max_submission_queue_size + self.max_io_queue_size = max_io_queue_size + self.io_chunksize = io_chunksize + self.num_download_attempts = num_download_attempts + self.max_in_memory_upload_chunks = max_in_memory_upload_chunks + self.max_in_memory_download_chunks = max_in_memory_download_chunks + self.max_bandwidth = max_bandwidth + self._validate_attrs_are_nonzero() + + def _validate_attrs_are_nonzero(self): + for attr, attr_val, in self.__dict__.items(): + if attr_val is not None and attr_val <= 0: + raise ValueError( + 'Provided parameter %s of value %s must be greater than ' + '0.' % (attr, attr_val)) + + +class TransferManager(object): ALLOWED_DOWNLOAD_ARGS = ALLOWED_DOWNLOAD_ARGS - - ALLOWED_UPLOAD_ARGS = [ - 'ACL', - 'CacheControl', - 'ContentDisposition', - 'ContentEncoding', - 'ContentLanguage', - 'ContentType', + + ALLOWED_UPLOAD_ARGS = [ + 'ACL', + 'CacheControl', + 'ContentDisposition', + 'ContentEncoding', + 'ContentLanguage', + 'ContentType', 'ExpectedBucketOwner', - 'Expires', - 'GrantFullControl', - 'GrantRead', - 'GrantReadACP', - 'GrantWriteACP', - 'Metadata', - 'RequestPayer', - 'ServerSideEncryption', - 'StorageClass', - 'SSECustomerAlgorithm', - 'SSECustomerKey', - 'SSECustomerKeyMD5', - 'SSEKMSKeyId', + 'Expires', + 'GrantFullControl', + 'GrantRead', + 'GrantReadACP', + 'GrantWriteACP', + 'Metadata', + 'RequestPayer', + 'ServerSideEncryption', + 'StorageClass', + 'SSECustomerAlgorithm', + 'SSECustomerKey', + 'SSECustomerKeyMD5', + 'SSEKMSKeyId', 'SSEKMSEncryptionContext', 'Tagging', - 'WebsiteRedirectLocation' - ] - - ALLOWED_COPY_ARGS = ALLOWED_UPLOAD_ARGS + [ - 'CopySourceIfMatch', - 'CopySourceIfModifiedSince', - 'CopySourceIfNoneMatch', - 'CopySourceIfUnmodifiedSince', - 'CopySourceSSECustomerAlgorithm', - 'CopySourceSSECustomerKey', - 'CopySourceSSECustomerKeyMD5', + 'WebsiteRedirectLocation' + ] + + ALLOWED_COPY_ARGS = ALLOWED_UPLOAD_ARGS + [ + 'CopySourceIfMatch', + 'CopySourceIfModifiedSince', + 'CopySourceIfNoneMatch', + 'CopySourceIfUnmodifiedSince', + 'CopySourceSSECustomerAlgorithm', + 'CopySourceSSECustomerKey', + 'CopySourceSSECustomerKeyMD5', 'MetadataDirective', 'TaggingDirective', - ] - - ALLOWED_DELETE_ARGS = [ - 'MFA', - 'VersionId', - 'RequestPayer', + ] + + ALLOWED_DELETE_ARGS = [ + 'MFA', + 'VersionId', + 'RequestPayer', 'ExpectedBucketOwner' - ] - + ] + VALIDATE_SUPPORTED_BUCKET_VALUES = True _UNSUPPORTED_BUCKET_PATTERNS = { @@ -212,70 +212,70 @@ class TransferManager(object): ), } - def __init__(self, client, config=None, osutil=None, executor_cls=None): - """A transfer manager interface for Amazon S3 - - :param client: Client to be used by the manager - :param config: TransferConfig to associate specific configurations - :param osutil: OSUtils object to use for os-related behavior when - using with transfer manager. - - :type executor_cls: s3transfer.futures.BaseExecutor - :param executor_cls: The class of executor to use with the transfer - manager. By default, concurrent.futures.ThreadPoolExecutor is used. - """ - self._client = client - self._config = config - if config is None: - self._config = TransferConfig() - self._osutil = osutil - if osutil is None: - self._osutil = OSUtils() - self._coordinator_controller = TransferCoordinatorController() - # A counter to create unique id's for each transfer submitted. - self._id_counter = 0 - - # The executor responsible for making S3 API transfer requests - self._request_executor = BoundedExecutor( - max_size=self._config.max_request_queue_size, - max_num_threads=self._config.max_request_concurrency, - tag_semaphores={ - IN_MEMORY_UPLOAD_TAG: TaskSemaphore( - self._config.max_in_memory_upload_chunks), - IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore( - self._config.max_in_memory_download_chunks) - }, - executor_cls=executor_cls - ) - - # The executor responsible for submitting the necessary tasks to - # perform the desired transfer - self._submission_executor = BoundedExecutor( - max_size=self._config.max_submission_queue_size, - max_num_threads=self._config.max_submission_concurrency, - executor_cls=executor_cls - - ) - - # There is one thread available for writing to disk. It will handle - # downloads for all files. - self._io_executor = BoundedExecutor( - max_size=self._config.max_io_queue_size, - max_num_threads=1, - executor_cls=executor_cls - ) - - # The component responsible for limiting bandwidth usage if it - # is configured. - self._bandwidth_limiter = None - if self._config.max_bandwidth is not None: - logger.debug( - 'Setting max_bandwidth to %s', self._config.max_bandwidth) - leaky_bucket = LeakyBucket(self._config.max_bandwidth) - self._bandwidth_limiter = BandwidthLimiter(leaky_bucket) - - self._register_handlers() - + def __init__(self, client, config=None, osutil=None, executor_cls=None): + """A transfer manager interface for Amazon S3 + + :param client: Client to be used by the manager + :param config: TransferConfig to associate specific configurations + :param osutil: OSUtils object to use for os-related behavior when + using with transfer manager. + + :type executor_cls: s3transfer.futures.BaseExecutor + :param executor_cls: The class of executor to use with the transfer + manager. By default, concurrent.futures.ThreadPoolExecutor is used. + """ + self._client = client + self._config = config + if config is None: + self._config = TransferConfig() + self._osutil = osutil + if osutil is None: + self._osutil = OSUtils() + self._coordinator_controller = TransferCoordinatorController() + # A counter to create unique id's for each transfer submitted. + self._id_counter = 0 + + # The executor responsible for making S3 API transfer requests + self._request_executor = BoundedExecutor( + max_size=self._config.max_request_queue_size, + max_num_threads=self._config.max_request_concurrency, + tag_semaphores={ + IN_MEMORY_UPLOAD_TAG: TaskSemaphore( + self._config.max_in_memory_upload_chunks), + IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore( + self._config.max_in_memory_download_chunks) + }, + executor_cls=executor_cls + ) + + # The executor responsible for submitting the necessary tasks to + # perform the desired transfer + self._submission_executor = BoundedExecutor( + max_size=self._config.max_submission_queue_size, + max_num_threads=self._config.max_submission_concurrency, + executor_cls=executor_cls + + ) + + # There is one thread available for writing to disk. It will handle + # downloads for all files. + self._io_executor = BoundedExecutor( + max_size=self._config.max_io_queue_size, + max_num_threads=1, + executor_cls=executor_cls + ) + + # The component responsible for limiting bandwidth usage if it + # is configured. + self._bandwidth_limiter = None + if self._config.max_bandwidth is not None: + logger.debug( + 'Setting max_bandwidth to %s', self._config.max_bandwidth) + leaky_bucket = LeakyBucket(self._config.max_bandwidth) + self._bandwidth_limiter = BandwidthLimiter(leaky_bucket) + + self._register_handlers() + @property def client(self): return self._client @@ -284,178 +284,178 @@ class TransferManager(object): def config(self): return self._config - def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None): - """Uploads a file to S3 - - :type fileobj: str or seekable file-like object - :param fileobj: The name of a file to upload or a seekable file-like - object to upload. It is recommended to use a filename because - file-like objects may result in higher memory usage. - - :type bucket: str - :param bucket: The name of the bucket to upload to - - :type key: str - :param key: The name of the key to upload to - - :type extra_args: dict - :param extra_args: Extra arguments that may be passed to the - client operation - - :type subscribers: list(s3transfer.subscribers.BaseSubscriber) - :param subscribers: The list of subscribers to be invoked in the - order provided based on the event emit during the process of - the transfer request. - - :rtype: s3transfer.futures.TransferFuture - :returns: Transfer future representing the upload - """ - if extra_args is None: - extra_args = {} - if subscribers is None: - subscribers = [] - self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS) + def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None): + """Uploads a file to S3 + + :type fileobj: str or seekable file-like object + :param fileobj: The name of a file to upload or a seekable file-like + object to upload. It is recommended to use a filename because + file-like objects may result in higher memory usage. + + :type bucket: str + :param bucket: The name of the bucket to upload to + + :type key: str + :param key: The name of the key to upload to + + :type extra_args: dict + :param extra_args: Extra arguments that may be passed to the + client operation + + :type subscribers: list(s3transfer.subscribers.BaseSubscriber) + :param subscribers: The list of subscribers to be invoked in the + order provided based on the event emit during the process of + the transfer request. + + :rtype: s3transfer.futures.TransferFuture + :returns: Transfer future representing the upload + """ + if extra_args is None: + extra_args = {} + if subscribers is None: + subscribers = [] + self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS) self._validate_if_bucket_supported(bucket) - call_args = CallArgs( - fileobj=fileobj, bucket=bucket, key=key, extra_args=extra_args, - subscribers=subscribers - ) - extra_main_kwargs = {} - if self._bandwidth_limiter: - extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter - return self._submit_transfer( - call_args, UploadSubmissionTask, extra_main_kwargs) - - def download(self, bucket, key, fileobj, extra_args=None, - subscribers=None): - """Downloads a file from S3 - - :type bucket: str - :param bucket: The name of the bucket to download from - - :type key: str - :param key: The name of the key to download from - - :type fileobj: str or seekable file-like object - :param fileobj: The name of a file to download or a seekable file-like - object to download. It is recommended to use a filename because - file-like objects may result in higher memory usage. - - :type extra_args: dict - :param extra_args: Extra arguments that may be passed to the - client operation - - :type subscribers: list(s3transfer.subscribers.BaseSubscriber) - :param subscribers: The list of subscribers to be invoked in the - order provided based on the event emit during the process of - the transfer request. - - :rtype: s3transfer.futures.TransferFuture - :returns: Transfer future representing the download - """ - if extra_args is None: - extra_args = {} - if subscribers is None: - subscribers = [] - self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS) + call_args = CallArgs( + fileobj=fileobj, bucket=bucket, key=key, extra_args=extra_args, + subscribers=subscribers + ) + extra_main_kwargs = {} + if self._bandwidth_limiter: + extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter + return self._submit_transfer( + call_args, UploadSubmissionTask, extra_main_kwargs) + + def download(self, bucket, key, fileobj, extra_args=None, + subscribers=None): + """Downloads a file from S3 + + :type bucket: str + :param bucket: The name of the bucket to download from + + :type key: str + :param key: The name of the key to download from + + :type fileobj: str or seekable file-like object + :param fileobj: The name of a file to download or a seekable file-like + object to download. It is recommended to use a filename because + file-like objects may result in higher memory usage. + + :type extra_args: dict + :param extra_args: Extra arguments that may be passed to the + client operation + + :type subscribers: list(s3transfer.subscribers.BaseSubscriber) + :param subscribers: The list of subscribers to be invoked in the + order provided based on the event emit during the process of + the transfer request. + + :rtype: s3transfer.futures.TransferFuture + :returns: Transfer future representing the download + """ + if extra_args is None: + extra_args = {} + if subscribers is None: + subscribers = [] + self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS) self._validate_if_bucket_supported(bucket) - call_args = CallArgs( - bucket=bucket, key=key, fileobj=fileobj, extra_args=extra_args, - subscribers=subscribers - ) - extra_main_kwargs = {'io_executor': self._io_executor} - if self._bandwidth_limiter: - extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter - return self._submit_transfer( - call_args, DownloadSubmissionTask, extra_main_kwargs) - - def copy(self, copy_source, bucket, key, extra_args=None, - subscribers=None, source_client=None): - """Copies a file in S3 - - :type copy_source: dict - :param copy_source: The name of the source bucket, key name of the - source object, and optional version ID of the source object. The - dictionary format is: - ``{'Bucket': 'bucket', 'Key': 'key', 'VersionId': 'id'}``. Note - that the ``VersionId`` key is optional and may be omitted. - - :type bucket: str - :param bucket: The name of the bucket to copy to - - :type key: str - :param key: The name of the key to copy to - - :type extra_args: dict - :param extra_args: Extra arguments that may be passed to the - client operation - - :type subscribers: a list of subscribers - :param subscribers: The list of subscribers to be invoked in the - order provided based on the event emit during the process of - the transfer request. - - :type source_client: botocore or boto3 Client - :param source_client: The client to be used for operation that - may happen at the source object. For example, this client is - used for the head_object that determines the size of the copy. - If no client is provided, the transfer manager's client is used - as the client for the source object. - - :rtype: s3transfer.futures.TransferFuture - :returns: Transfer future representing the copy - """ - if extra_args is None: - extra_args = {} - if subscribers is None: - subscribers = [] - if source_client is None: - source_client = self._client - self._validate_all_known_args(extra_args, self.ALLOWED_COPY_ARGS) + call_args = CallArgs( + bucket=bucket, key=key, fileobj=fileobj, extra_args=extra_args, + subscribers=subscribers + ) + extra_main_kwargs = {'io_executor': self._io_executor} + if self._bandwidth_limiter: + extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter + return self._submit_transfer( + call_args, DownloadSubmissionTask, extra_main_kwargs) + + def copy(self, copy_source, bucket, key, extra_args=None, + subscribers=None, source_client=None): + """Copies a file in S3 + + :type copy_source: dict + :param copy_source: The name of the source bucket, key name of the + source object, and optional version ID of the source object. The + dictionary format is: + ``{'Bucket': 'bucket', 'Key': 'key', 'VersionId': 'id'}``. Note + that the ``VersionId`` key is optional and may be omitted. + + :type bucket: str + :param bucket: The name of the bucket to copy to + + :type key: str + :param key: The name of the key to copy to + + :type extra_args: dict + :param extra_args: Extra arguments that may be passed to the + client operation + + :type subscribers: a list of subscribers + :param subscribers: The list of subscribers to be invoked in the + order provided based on the event emit during the process of + the transfer request. + + :type source_client: botocore or boto3 Client + :param source_client: The client to be used for operation that + may happen at the source object. For example, this client is + used for the head_object that determines the size of the copy. + If no client is provided, the transfer manager's client is used + as the client for the source object. + + :rtype: s3transfer.futures.TransferFuture + :returns: Transfer future representing the copy + """ + if extra_args is None: + extra_args = {} + if subscribers is None: + subscribers = [] + if source_client is None: + source_client = self._client + self._validate_all_known_args(extra_args, self.ALLOWED_COPY_ARGS) if isinstance(copy_source, dict): self._validate_if_bucket_supported(copy_source.get('Bucket')) self._validate_if_bucket_supported(bucket) - call_args = CallArgs( - copy_source=copy_source, bucket=bucket, key=key, - extra_args=extra_args, subscribers=subscribers, - source_client=source_client - ) - return self._submit_transfer(call_args, CopySubmissionTask) - - def delete(self, bucket, key, extra_args=None, subscribers=None): - """Delete an S3 object. - - :type bucket: str - :param bucket: The name of the bucket. - - :type key: str - :param key: The name of the S3 object to delete. - - :type extra_args: dict - :param extra_args: Extra arguments that may be passed to the - DeleteObject call. - - :type subscribers: list - :param subscribers: A list of subscribers to be invoked during the - process of the transfer request. Note that the ``on_progress`` - callback is not invoked during object deletion. - - :rtype: s3transfer.futures.TransferFuture - :return: Transfer future representing the deletion. - - """ - if extra_args is None: - extra_args = {} - if subscribers is None: - subscribers = [] - self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS) + call_args = CallArgs( + copy_source=copy_source, bucket=bucket, key=key, + extra_args=extra_args, subscribers=subscribers, + source_client=source_client + ) + return self._submit_transfer(call_args, CopySubmissionTask) + + def delete(self, bucket, key, extra_args=None, subscribers=None): + """Delete an S3 object. + + :type bucket: str + :param bucket: The name of the bucket. + + :type key: str + :param key: The name of the S3 object to delete. + + :type extra_args: dict + :param extra_args: Extra arguments that may be passed to the + DeleteObject call. + + :type subscribers: list + :param subscribers: A list of subscribers to be invoked during the + process of the transfer request. Note that the ``on_progress`` + callback is not invoked during object deletion. + + :rtype: s3transfer.futures.TransferFuture + :return: Transfer future representing the deletion. + + """ + if extra_args is None: + extra_args = {} + if subscribers is None: + subscribers = [] + self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS) self._validate_if_bucket_supported(bucket) - call_args = CallArgs( - bucket=bucket, key=key, extra_args=extra_args, - subscribers=subscribers - ) - return self._submit_transfer(call_args, DeleteSubmissionTask) - + call_args = CallArgs( + bucket=bucket, key=key, extra_args=extra_args, + subscribers=subscribers + ) + return self._submit_transfer(call_args, DeleteSubmissionTask) + def _validate_if_bucket_supported(self, bucket): # s3 high level operations don't support some resources # (eg. S3 Object Lambda) only direct API calls are available @@ -469,229 +469,229 @@ class TransferManager(object): 'resource. Use direct client calls instead.' % resource ) - def _validate_all_known_args(self, actual, allowed): - for kwarg in actual: - if kwarg not in allowed: - raise ValueError( - "Invalid extra_args key '%s', " - "must be one of: %s" % ( - kwarg, ', '.join(allowed))) - - def _submit_transfer(self, call_args, submission_task_cls, - extra_main_kwargs=None): - if not extra_main_kwargs: - extra_main_kwargs = {} - - # Create a TransferFuture to return back to the user - transfer_future, components = self._get_future_with_components( - call_args) - - # Add any provided done callbacks to the created transfer future - # to be invoked on the transfer future being complete. - for callback in get_callbacks(transfer_future, 'done'): - components['coordinator'].add_done_callback(callback) - - # Get the main kwargs needed to instantiate the submission task - main_kwargs = self._get_submission_task_main_kwargs( - transfer_future, extra_main_kwargs) - - # Submit a SubmissionTask that will submit all of the necessary - # tasks needed to complete the S3 transfer. - self._submission_executor.submit( - submission_task_cls( - transfer_coordinator=components['coordinator'], - main_kwargs=main_kwargs - ) - ) - - # Increment the unique id counter for future transfer requests - self._id_counter += 1 - - return transfer_future - - def _get_future_with_components(self, call_args): - transfer_id = self._id_counter - # Creates a new transfer future along with its components - transfer_coordinator = TransferCoordinator(transfer_id=transfer_id) - # Track the transfer coordinator for transfers to manage. - self._coordinator_controller.add_transfer_coordinator( - transfer_coordinator) - # Also make sure that the transfer coordinator is removed once - # the transfer completes so it does not stick around in memory. - transfer_coordinator.add_done_callback( - self._coordinator_controller.remove_transfer_coordinator, - transfer_coordinator) - components = { - 'meta': TransferMeta(call_args, transfer_id=transfer_id), - 'coordinator': transfer_coordinator - } - transfer_future = TransferFuture(**components) - return transfer_future, components - - def _get_submission_task_main_kwargs( - self, transfer_future, extra_main_kwargs): - main_kwargs = { - 'client': self._client, - 'config': self._config, - 'osutil': self._osutil, - 'request_executor': self._request_executor, - 'transfer_future': transfer_future - } - main_kwargs.update(extra_main_kwargs) - return main_kwargs - - def _register_handlers(self): - # Register handlers to enable/disable callbacks on uploads. - event_name = 'request-created.s3' - self._client.meta.events.register_first( - event_name, signal_not_transferring, - unique_id='s3upload-not-transferring') - self._client.meta.events.register_last( - event_name, signal_transferring, - unique_id='s3upload-transferring') - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, *args): - cancel = False - cancel_msg = '' - cancel_exc_type = FatalError - # If a exception was raised in the context handler, signal to cancel - # all of the inprogress futures in the shutdown. - if exc_type: - cancel = True - cancel_msg = six.text_type(exc_value) - if not cancel_msg: - cancel_msg = repr(exc_value) - # If it was a KeyboardInterrupt, the cancellation was initiated - # by the user. - if isinstance(exc_value, KeyboardInterrupt): - cancel_exc_type = CancelledError - self._shutdown(cancel, cancel_msg, cancel_exc_type) - - def shutdown(self, cancel=False, cancel_msg=''): - """Shutdown the TransferManager - - It will wait till all transfers complete before it completely shuts - down. - - :type cancel: boolean - :param cancel: If True, calls TransferFuture.cancel() for - all in-progress in transfers. This is useful if you want the - shutdown to happen quicker. - - :type cancel_msg: str - :param cancel_msg: The message to specify if canceling all in-progress - transfers. - """ - self._shutdown(cancel, cancel, cancel_msg) - - def _shutdown(self, cancel, cancel_msg, exc_type=CancelledError): - if cancel: - # Cancel all in-flight transfers if requested, before waiting - # for them to complete. - self._coordinator_controller.cancel(cancel_msg, exc_type) - try: - # Wait until there are no more in-progress transfers. This is - # wrapped in a try statement because this can be interrupted - # with a KeyboardInterrupt that needs to be caught. - self._coordinator_controller.wait() - except KeyboardInterrupt: - # If not errors were raised in the try block, the cancel should - # have no coordinators it needs to run cancel on. If there was - # an error raised in the try statement we want to cancel all of - # the inflight transfers before shutting down to speed that - # process up. - self._coordinator_controller.cancel('KeyboardInterrupt()') - raise - finally: - # Shutdown all of the executors. - self._submission_executor.shutdown() - self._request_executor.shutdown() - self._io_executor.shutdown() - - -class TransferCoordinatorController(object): - def __init__(self): - """Abstraction to control all transfer coordinators - - This abstraction allows the manager to wait for inprogress transfers - to complete and cancel all inprogress transfers. - """ - self._lock = threading.Lock() - self._tracked_transfer_coordinators = set() - - @property - def tracked_transfer_coordinators(self): - """The set of transfer coordinators being tracked""" - with self._lock: - # We return a copy because the set is mutable and if you were to - # iterate over the set, it may be changing in length due to - # additions and removals of transfer coordinators. - return copy.copy(self._tracked_transfer_coordinators) - - def add_transfer_coordinator(self, transfer_coordinator): - """Adds a transfer coordinator of a transfer to be canceled if needed - - :type transfer_coordinator: s3transfer.futures.TransferCoordinator - :param transfer_coordinator: The transfer coordinator for the - particular transfer - """ - with self._lock: - self._tracked_transfer_coordinators.add(transfer_coordinator) - - def remove_transfer_coordinator(self, transfer_coordinator): - """Remove a transfer coordinator from cancelation consideration - - Typically, this method is invoked by the transfer coordinator itself - to remove its self when it completes its transfer. - - :type transfer_coordinator: s3transfer.futures.TransferCoordinator - :param transfer_coordinator: The transfer coordinator for the - particular transfer - """ - with self._lock: - self._tracked_transfer_coordinators.remove(transfer_coordinator) - - def cancel(self, msg='', exc_type=CancelledError): - """Cancels all inprogress transfers - - This cancels the inprogress transfers by calling cancel() on all - tracked transfer coordinators. - - :param msg: The message to pass on to each transfer coordinator that - gets cancelled. - - :param exc_type: The type of exception to set for the cancellation - """ - for transfer_coordinator in self.tracked_transfer_coordinators: - transfer_coordinator.cancel(msg, exc_type) - - def wait(self): - """Wait until there are no more inprogress transfers - - This will not stop when failures are encountered and not propogate any - of these errors from failed transfers, but it can be interrupted with - a KeyboardInterrupt. - """ - try: - transfer_coordinator = None - for transfer_coordinator in self.tracked_transfer_coordinators: - transfer_coordinator.result() - except KeyboardInterrupt: - logger.debug('Received KeyboardInterrupt in wait()') - # If Keyboard interrupt is raised while waiting for - # the result, then exit out of the wait and raise the - # exception - if transfer_coordinator: - logger.debug( - 'On KeyboardInterrupt was waiting for %s', - transfer_coordinator) - raise - except Exception: - # A general exception could have been thrown because - # of result(). We just want to ignore this and continue - # because we at least know that the transfer coordinator - # has completed. - pass + def _validate_all_known_args(self, actual, allowed): + for kwarg in actual: + if kwarg not in allowed: + raise ValueError( + "Invalid extra_args key '%s', " + "must be one of: %s" % ( + kwarg, ', '.join(allowed))) + + def _submit_transfer(self, call_args, submission_task_cls, + extra_main_kwargs=None): + if not extra_main_kwargs: + extra_main_kwargs = {} + + # Create a TransferFuture to return back to the user + transfer_future, components = self._get_future_with_components( + call_args) + + # Add any provided done callbacks to the created transfer future + # to be invoked on the transfer future being complete. + for callback in get_callbacks(transfer_future, 'done'): + components['coordinator'].add_done_callback(callback) + + # Get the main kwargs needed to instantiate the submission task + main_kwargs = self._get_submission_task_main_kwargs( + transfer_future, extra_main_kwargs) + + # Submit a SubmissionTask that will submit all of the necessary + # tasks needed to complete the S3 transfer. + self._submission_executor.submit( + submission_task_cls( + transfer_coordinator=components['coordinator'], + main_kwargs=main_kwargs + ) + ) + + # Increment the unique id counter for future transfer requests + self._id_counter += 1 + + return transfer_future + + def _get_future_with_components(self, call_args): + transfer_id = self._id_counter + # Creates a new transfer future along with its components + transfer_coordinator = TransferCoordinator(transfer_id=transfer_id) + # Track the transfer coordinator for transfers to manage. + self._coordinator_controller.add_transfer_coordinator( + transfer_coordinator) + # Also make sure that the transfer coordinator is removed once + # the transfer completes so it does not stick around in memory. + transfer_coordinator.add_done_callback( + self._coordinator_controller.remove_transfer_coordinator, + transfer_coordinator) + components = { + 'meta': TransferMeta(call_args, transfer_id=transfer_id), + 'coordinator': transfer_coordinator + } + transfer_future = TransferFuture(**components) + return transfer_future, components + + def _get_submission_task_main_kwargs( + self, transfer_future, extra_main_kwargs): + main_kwargs = { + 'client': self._client, + 'config': self._config, + 'osutil': self._osutil, + 'request_executor': self._request_executor, + 'transfer_future': transfer_future + } + main_kwargs.update(extra_main_kwargs) + return main_kwargs + + def _register_handlers(self): + # Register handlers to enable/disable callbacks on uploads. + event_name = 'request-created.s3' + self._client.meta.events.register_first( + event_name, signal_not_transferring, + unique_id='s3upload-not-transferring') + self._client.meta.events.register_last( + event_name, signal_transferring, + unique_id='s3upload-transferring') + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, *args): + cancel = False + cancel_msg = '' + cancel_exc_type = FatalError + # If a exception was raised in the context handler, signal to cancel + # all of the inprogress futures in the shutdown. + if exc_type: + cancel = True + cancel_msg = six.text_type(exc_value) + if not cancel_msg: + cancel_msg = repr(exc_value) + # If it was a KeyboardInterrupt, the cancellation was initiated + # by the user. + if isinstance(exc_value, KeyboardInterrupt): + cancel_exc_type = CancelledError + self._shutdown(cancel, cancel_msg, cancel_exc_type) + + def shutdown(self, cancel=False, cancel_msg=''): + """Shutdown the TransferManager + + It will wait till all transfers complete before it completely shuts + down. + + :type cancel: boolean + :param cancel: If True, calls TransferFuture.cancel() for + all in-progress in transfers. This is useful if you want the + shutdown to happen quicker. + + :type cancel_msg: str + :param cancel_msg: The message to specify if canceling all in-progress + transfers. + """ + self._shutdown(cancel, cancel, cancel_msg) + + def _shutdown(self, cancel, cancel_msg, exc_type=CancelledError): + if cancel: + # Cancel all in-flight transfers if requested, before waiting + # for them to complete. + self._coordinator_controller.cancel(cancel_msg, exc_type) + try: + # Wait until there are no more in-progress transfers. This is + # wrapped in a try statement because this can be interrupted + # with a KeyboardInterrupt that needs to be caught. + self._coordinator_controller.wait() + except KeyboardInterrupt: + # If not errors were raised in the try block, the cancel should + # have no coordinators it needs to run cancel on. If there was + # an error raised in the try statement we want to cancel all of + # the inflight transfers before shutting down to speed that + # process up. + self._coordinator_controller.cancel('KeyboardInterrupt()') + raise + finally: + # Shutdown all of the executors. + self._submission_executor.shutdown() + self._request_executor.shutdown() + self._io_executor.shutdown() + + +class TransferCoordinatorController(object): + def __init__(self): + """Abstraction to control all transfer coordinators + + This abstraction allows the manager to wait for inprogress transfers + to complete and cancel all inprogress transfers. + """ + self._lock = threading.Lock() + self._tracked_transfer_coordinators = set() + + @property + def tracked_transfer_coordinators(self): + """The set of transfer coordinators being tracked""" + with self._lock: + # We return a copy because the set is mutable and if you were to + # iterate over the set, it may be changing in length due to + # additions and removals of transfer coordinators. + return copy.copy(self._tracked_transfer_coordinators) + + def add_transfer_coordinator(self, transfer_coordinator): + """Adds a transfer coordinator of a transfer to be canceled if needed + + :type transfer_coordinator: s3transfer.futures.TransferCoordinator + :param transfer_coordinator: The transfer coordinator for the + particular transfer + """ + with self._lock: + self._tracked_transfer_coordinators.add(transfer_coordinator) + + def remove_transfer_coordinator(self, transfer_coordinator): + """Remove a transfer coordinator from cancelation consideration + + Typically, this method is invoked by the transfer coordinator itself + to remove its self when it completes its transfer. + + :type transfer_coordinator: s3transfer.futures.TransferCoordinator + :param transfer_coordinator: The transfer coordinator for the + particular transfer + """ + with self._lock: + self._tracked_transfer_coordinators.remove(transfer_coordinator) + + def cancel(self, msg='', exc_type=CancelledError): + """Cancels all inprogress transfers + + This cancels the inprogress transfers by calling cancel() on all + tracked transfer coordinators. + + :param msg: The message to pass on to each transfer coordinator that + gets cancelled. + + :param exc_type: The type of exception to set for the cancellation + """ + for transfer_coordinator in self.tracked_transfer_coordinators: + transfer_coordinator.cancel(msg, exc_type) + + def wait(self): + """Wait until there are no more inprogress transfers + + This will not stop when failures are encountered and not propogate any + of these errors from failed transfers, but it can be interrupted with + a KeyboardInterrupt. + """ + try: + transfer_coordinator = None + for transfer_coordinator in self.tracked_transfer_coordinators: + transfer_coordinator.result() + except KeyboardInterrupt: + logger.debug('Received KeyboardInterrupt in wait()') + # If Keyboard interrupt is raised while waiting for + # the result, then exit out of the wait and raise the + # exception + if transfer_coordinator: + logger.debug( + 'On KeyboardInterrupt was waiting for %s', + transfer_coordinator) + raise + except Exception: + # A general exception could have been thrown because + # of result(). We just want to ignore this and continue + # because we at least know that the transfer coordinator + # has completed. + pass diff --git a/contrib/python/s3transfer/py2/s3transfer/subscribers.py b/contrib/python/s3transfer/py2/s3transfer/subscribers.py index 41698cbbb1..329b01ec9d 100644 --- a/contrib/python/s3transfer/py2/s3transfer/subscribers.py +++ b/contrib/python/s3transfer/py2/s3transfer/subscribers.py @@ -1,95 +1,95 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -from botocore.compat import six - -from s3transfer.compat import accepts_kwargs -from s3transfer.exceptions import InvalidSubscriberMethodError - - -class BaseSubscriber(object): - """The base subscriber class - - It is recommended that all subscriber implementations subclass and then - override the subscription methods (i.e. on_{subsribe_type}() methods). - """ - VALID_SUBSCRIBER_TYPES = [ - 'queued', - 'progress', - 'done' - ] - - def __new__(cls, *args, **kwargs): - cls._validate_subscriber_methods() - return super(BaseSubscriber, cls).__new__(cls) - - @classmethod - def _validate_subscriber_methods(cls): - for subscriber_type in cls.VALID_SUBSCRIBER_TYPES: - subscriber_method = getattr(cls, 'on_' + subscriber_type) - if not six.callable(subscriber_method): - raise InvalidSubscriberMethodError( - 'Subscriber method %s must be callable.' % - subscriber_method) - - if not accepts_kwargs(subscriber_method): - raise InvalidSubscriberMethodError( - 'Subscriber method %s must accept keyword ' - 'arguments (**kwargs)' % subscriber_method) - - def on_queued(self, future, **kwargs): - """Callback to be invoked when transfer request gets queued - - This callback can be useful for: - - * Keeping track of how many transfers have been requested - * Providing the expected transfer size through - future.meta.provide_transfer_size() so a HeadObject would not - need to be made for copies and downloads. - - :type future: s3transfer.futures.TransferFuture - :param future: The TransferFuture representing the requested transfer. - """ - pass - - def on_progress(self, future, bytes_transferred, **kwargs): - """Callback to be invoked when progress is made on transfer - - This callback can be useful for: - - * Recording and displaying progress - - :type future: s3transfer.futures.TransferFuture - :param future: The TransferFuture representing the requested transfer. - - :type bytes_transferred: int - :param bytes_transferred: The number of bytes transferred for that - invocation of the callback. Note that a negative amount can be - provided, which usually indicates that an in-progress request - needed to be retried and thus progress was rewound. - """ - pass - - def on_done(self, future, **kwargs): - """Callback to be invoked once a transfer is done - - This callback can be useful for: - - * Recording and displaying whether the transfer succeeded or - failed using future.result() - * Running some task after the transfer completed like changing - the last modified time of a downloaded file. - - :type future: s3transfer.futures.TransferFuture - :param future: The TransferFuture representing the requested transfer. - """ - pass +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from botocore.compat import six + +from s3transfer.compat import accepts_kwargs +from s3transfer.exceptions import InvalidSubscriberMethodError + + +class BaseSubscriber(object): + """The base subscriber class + + It is recommended that all subscriber implementations subclass and then + override the subscription methods (i.e. on_{subsribe_type}() methods). + """ + VALID_SUBSCRIBER_TYPES = [ + 'queued', + 'progress', + 'done' + ] + + def __new__(cls, *args, **kwargs): + cls._validate_subscriber_methods() + return super(BaseSubscriber, cls).__new__(cls) + + @classmethod + def _validate_subscriber_methods(cls): + for subscriber_type in cls.VALID_SUBSCRIBER_TYPES: + subscriber_method = getattr(cls, 'on_' + subscriber_type) + if not six.callable(subscriber_method): + raise InvalidSubscriberMethodError( + 'Subscriber method %s must be callable.' % + subscriber_method) + + if not accepts_kwargs(subscriber_method): + raise InvalidSubscriberMethodError( + 'Subscriber method %s must accept keyword ' + 'arguments (**kwargs)' % subscriber_method) + + def on_queued(self, future, **kwargs): + """Callback to be invoked when transfer request gets queued + + This callback can be useful for: + + * Keeping track of how many transfers have been requested + * Providing the expected transfer size through + future.meta.provide_transfer_size() so a HeadObject would not + need to be made for copies and downloads. + + :type future: s3transfer.futures.TransferFuture + :param future: The TransferFuture representing the requested transfer. + """ + pass + + def on_progress(self, future, bytes_transferred, **kwargs): + """Callback to be invoked when progress is made on transfer + + This callback can be useful for: + + * Recording and displaying progress + + :type future: s3transfer.futures.TransferFuture + :param future: The TransferFuture representing the requested transfer. + + :type bytes_transferred: int + :param bytes_transferred: The number of bytes transferred for that + invocation of the callback. Note that a negative amount can be + provided, which usually indicates that an in-progress request + needed to be retried and thus progress was rewound. + """ + pass + + def on_done(self, future, **kwargs): + """Callback to be invoked once a transfer is done + + This callback can be useful for: + + * Recording and displaying whether the transfer succeeded or + failed using future.result() + * Running some task after the transfer completed like changing + the last modified time of a downloaded file. + + :type future: s3transfer.futures.TransferFuture + :param future: The TransferFuture representing the requested transfer. + """ + pass diff --git a/contrib/python/s3transfer/py2/s3transfer/tasks.py b/contrib/python/s3transfer/py2/s3transfer/tasks.py index dcb407f1a4..1d314216e4 100644 --- a/contrib/python/s3transfer/py2/s3transfer/tasks.py +++ b/contrib/python/s3transfer/py2/s3transfer/tasks.py @@ -1,364 +1,364 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import copy -import logging - -from s3transfer.utils import get_callbacks - - -logger = logging.getLogger(__name__) - - -class Task(object): - """A task associated to a TransferFuture request - - This is a base class for other classes to subclass from. All subclassed - classes must implement the main() method. - """ - def __init__(self, transfer_coordinator, main_kwargs=None, - pending_main_kwargs=None, done_callbacks=None, - is_final=False): - """ - :type transfer_coordinator: s3transfer.futures.TransferCoordinator - :param transfer_coordinator: The context associated to the - TransferFuture for which this Task is associated with. - - :type main_kwargs: dict - :param main_kwargs: The keyword args that can be immediately supplied - to the _main() method of the task - - :type pending_main_kwargs: dict - :param pending_main_kwargs: The keyword args that are depended upon - by the result from a dependent future(s). The result returned by - the future(s) will be used as the value for the keyword argument - when _main() is called. The values for each key can be: - * a single future - Once completed, its value will be the - result of that single future - * a list of futures - Once all of the futures complete, the - value used will be a list of each completed future result - value in order of when they were originally supplied. - - :type done_callbacks: list of callbacks - :param done_callbacks: A list of callbacks to call once the task is - done completing. Each callback will be called with no arguments - and will be called no matter if the task succeeds or an exception - is raised. - - :type is_final: boolean - :param is_final: True, to indicate that this task is the final task - for the TransferFuture request. By setting this value to True, it - will set the result of the entire TransferFuture to the result - returned by this task's main() method. - """ - self._transfer_coordinator = transfer_coordinator - - self._main_kwargs = main_kwargs - if self._main_kwargs is None: - self._main_kwargs = {} - - self._pending_main_kwargs = pending_main_kwargs - if pending_main_kwargs is None: - self._pending_main_kwargs = {} - - self._done_callbacks = done_callbacks - if self._done_callbacks is None: - self._done_callbacks = [] - - self._is_final = is_final - - def __repr__(self): - # These are the general main_kwarg parameters that we want to - # display in the repr. - params_to_display = [ - 'bucket', 'key', 'part_number', 'final_filename', - 'transfer_future', 'offset', 'extra_args' - ] - main_kwargs_to_display = self._get_kwargs_with_params_to_include( - self._main_kwargs, params_to_display) - return '%s(transfer_id=%s, %s)' % ( - self.__class__.__name__, self._transfer_coordinator.transfer_id, - main_kwargs_to_display) - - @property - def transfer_id(self): - """The id for the transfer request that the task belongs to""" - return self._transfer_coordinator.transfer_id - - def _get_kwargs_with_params_to_include(self, kwargs, include): - filtered_kwargs = {} - for param in include: - if param in kwargs: - filtered_kwargs[param] = kwargs[param] - return filtered_kwargs - - def _get_kwargs_with_params_to_exclude(self, kwargs, exclude): - filtered_kwargs = {} - for param, value in kwargs.items(): - if param in exclude: - continue - filtered_kwargs[param] = value - return filtered_kwargs - - def __call__(self): - """The callable to use when submitting a Task to an executor""" - try: - # Wait for all of futures this task depends on. - self._wait_on_dependent_futures() - # Gather up all of the main keyword arguments for main(). - # This includes the immediately provided main_kwargs and - # the values for pending_main_kwargs that source from the return - # values from the task's depenent futures. - kwargs = self._get_all_main_kwargs() - # If the task is not done (really only if some other related - # task to the TransferFuture had failed) then execute the task's - # main() method. - if not self._transfer_coordinator.done(): - return self._execute_main(kwargs) - except Exception as e: - self._log_and_set_exception(e) - finally: - # Run any done callbacks associated to the task no matter what. - for done_callback in self._done_callbacks: - done_callback() - - if self._is_final: - # If this is the final task announce that it is done if results - # are waiting on its completion. - self._transfer_coordinator.announce_done() - - def _execute_main(self, kwargs): - # Do not display keyword args that should not be printed, especially - # if they are going to make the logs hard to follow. - params_to_exclude = ['data'] - kwargs_to_display = self._get_kwargs_with_params_to_exclude( - kwargs, params_to_exclude) - # Log what is about to be executed. - logger.debug( - "Executing task %s with kwargs %s" % (self, kwargs_to_display) - ) - - return_value = self._main(**kwargs) - # If the task is the final task, then set the TransferFuture's - # value to the return value from main(). - if self._is_final: - self._transfer_coordinator.set_result(return_value) - return return_value - - def _log_and_set_exception(self, exception): - # If an exception is ever thrown than set the exception for the - # entire TransferFuture. - logger.debug("Exception raised.", exc_info=True) - self._transfer_coordinator.set_exception(exception) - - def _main(self, **kwargs): - """The method that will be ran in the executor - - This method must be implemented by subclasses from Task. main() can - be implemented with any arguments decided upon by the subclass. - """ - raise NotImplementedError('_main() must be implemented') - - def _wait_on_dependent_futures(self): - # Gather all of the futures into that main() depends on. - futures_to_wait_on = [] - for _, future in self._pending_main_kwargs.items(): - # If the pending main keyword arg is a list then extend the list. - if isinstance(future, list): - futures_to_wait_on.extend(future) - # If the pending main keword arg is a future append it to the list. - else: - futures_to_wait_on.append(future) - # Now wait for all of the futures to complete. - self._wait_until_all_complete(futures_to_wait_on) - - def _wait_until_all_complete(self, futures): - # This is a basic implementation of the concurrent.futures.wait() - # - # concurrent.futures.wait() is not used instead because of this - # reported issue: https://bugs.python.org/issue20319. - # The issue would occassionally cause multipart uploads to hang - # when wait() was called. With this approach, it avoids the - # concurrency bug by removing any association with concurrent.futures - # implementation of waiters. - logger.debug( - '%s about to wait for the following futures %s', self, futures) - for future in futures: - try: - logger.debug('%s about to wait for %s', self, future) - future.result() - except Exception: - # result() can also produce exceptions. We want to ignore - # these to be deffered to error handling down the road. - pass - logger.debug('%s done waiting for dependent futures', self) - - def _get_all_main_kwargs(self): - # Copy over all of the kwargs that we know is available. - kwargs = copy.copy(self._main_kwargs) - - # Iterate through the kwargs whose values are pending on the result - # of a future. - for key, pending_value in self._pending_main_kwargs.items(): - # If the value is a list of futures, iterate though the list - # appending on the result from each future. - if isinstance(pending_value, list): - result = [] - for future in pending_value: - result.append(future.result()) - # Otherwise if the pending_value is a future, just wait for it. - else: - result = pending_value.result() - # Add the retrieved value to the kwargs to be sent to the - # main() call. - kwargs[key] = result - return kwargs - - -class SubmissionTask(Task): - """A base class for any submission task - - Submission tasks are the top-level task used to submit a series of tasks - to execute a particular transfer. - """ - def _main(self, transfer_future, **kwargs): - """ - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future associated with the - transfer request that tasks are being submitted for - - :param kwargs: Any additional kwargs that you may want to pass - to the _submit() method - """ - try: - self._transfer_coordinator.set_status_to_queued() - - # Before submitting any tasks, run all of the on_queued callbacks - on_queued_callbacks = get_callbacks(transfer_future, 'queued') - for on_queued_callback in on_queued_callbacks: - on_queued_callback() - - # Once callbacks have been ran set the status to running. - self._transfer_coordinator.set_status_to_running() - - # Call the submit method to start submitting tasks to execute the - # transfer. - self._submit(transfer_future=transfer_future, **kwargs) - except BaseException as e: - # If there was an exception raised during the submission of task - # there is a chance that the final task that signals if a transfer - # is done and too run the cleanup may never have been submitted in - # the first place so we need to account accordingly. - # - # Note that BaseException is caught, instead of Exception, because - # for some implmentations of executors, specifically the serial - # implementation, the SubmissionTask is directly exposed to - # KeyboardInterupts and so needs to cleanup and signal done - # for those as well. - - # Set the exception, that caused the process to fail. - self._log_and_set_exception(e) - - # Wait for all possibly associated futures that may have spawned - # from this submission task have finished before we anounce the - # transfer done. - self._wait_for_all_submitted_futures_to_complete() - - # Announce the transfer as done, which will run any cleanups - # and done callbacks as well. - self._transfer_coordinator.announce_done() - - def _submit(self, transfer_future, **kwargs): - """The submition method to be implemented - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future associated with the - transfer request that tasks are being submitted for - - :param kwargs: Any additional keyword arguments you want to be passed - in - """ - raise NotImplementedError('_submit() must be implemented') - - def _wait_for_all_submitted_futures_to_complete(self): - # We want to wait for all futures that were submitted to - # complete as we do not want the cleanup callbacks or done callbacks - # to be called to early. The main problem is any task that was - # submitted may have submitted even more during its process and so - # we need to account accordingly. - - # First get all of the futures that were submitted up to this point. - submitted_futures = self._transfer_coordinator.associated_futures - while submitted_futures: - # Wait for those futures to complete. - self._wait_until_all_complete(submitted_futures) - # However, more futures may have been submitted as we waited so - # we need to check again for any more associated futures. - possibly_more_submitted_futures = \ - self._transfer_coordinator.associated_futures - # If the current list of submitted futures is equal to the - # the list of associated futures for when after the wait completes, - # we can ensure no more futures were submitted in waiting on - # the current list of futures to complete ultimately meaning all - # futures that may have spawned from the original submission task - # have completed. - if submitted_futures == possibly_more_submitted_futures: - break - submitted_futures = possibly_more_submitted_futures - - -class CreateMultipartUploadTask(Task): - """Task to initiate a multipart upload""" - def _main(self, client, bucket, key, extra_args): - """ - :param client: The client to use when calling CreateMultipartUpload - :param bucket: The name of the bucket to upload to - :param key: The name of the key to upload to - :param extra_args: A dictionary of any extra arguments that may be - used in the intialization. - - :returns: The upload id of the multipart upload - """ - # Create the multipart upload. - response = client.create_multipart_upload( - Bucket=bucket, Key=key, **extra_args) - upload_id = response['UploadId'] - - # Add a cleanup if the multipart upload fails at any point. - self._transfer_coordinator.add_failure_cleanup( - client.abort_multipart_upload, Bucket=bucket, Key=key, - UploadId=upload_id - ) - return upload_id - - -class CompleteMultipartUploadTask(Task): - """Task to complete a multipart upload""" - def _main(self, client, bucket, key, upload_id, parts, extra_args): - """ - :param client: The client to use when calling CompleteMultipartUpload - :param bucket: The name of the bucket to upload to - :param key: The name of the key to upload to - :param upload_id: The id of the upload - :param parts: A list of parts to use to complete the multipart upload:: - - [{'Etag': etag_value, 'PartNumber': part_number}, ...] - - Each element in the list consists of a return value from - ``UploadPartTask.main()``. - :param extra_args: A dictionary of any extra arguments that may be - used in completing the multipart transfer. - """ - client.complete_multipart_upload( - Bucket=bucket, Key=key, UploadId=upload_id, - MultipartUpload={'Parts': parts}, - **extra_args) +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import copy +import logging + +from s3transfer.utils import get_callbacks + + +logger = logging.getLogger(__name__) + + +class Task(object): + """A task associated to a TransferFuture request + + This is a base class for other classes to subclass from. All subclassed + classes must implement the main() method. + """ + def __init__(self, transfer_coordinator, main_kwargs=None, + pending_main_kwargs=None, done_callbacks=None, + is_final=False): + """ + :type transfer_coordinator: s3transfer.futures.TransferCoordinator + :param transfer_coordinator: The context associated to the + TransferFuture for which this Task is associated with. + + :type main_kwargs: dict + :param main_kwargs: The keyword args that can be immediately supplied + to the _main() method of the task + + :type pending_main_kwargs: dict + :param pending_main_kwargs: The keyword args that are depended upon + by the result from a dependent future(s). The result returned by + the future(s) will be used as the value for the keyword argument + when _main() is called. The values for each key can be: + * a single future - Once completed, its value will be the + result of that single future + * a list of futures - Once all of the futures complete, the + value used will be a list of each completed future result + value in order of when they were originally supplied. + + :type done_callbacks: list of callbacks + :param done_callbacks: A list of callbacks to call once the task is + done completing. Each callback will be called with no arguments + and will be called no matter if the task succeeds or an exception + is raised. + + :type is_final: boolean + :param is_final: True, to indicate that this task is the final task + for the TransferFuture request. By setting this value to True, it + will set the result of the entire TransferFuture to the result + returned by this task's main() method. + """ + self._transfer_coordinator = transfer_coordinator + + self._main_kwargs = main_kwargs + if self._main_kwargs is None: + self._main_kwargs = {} + + self._pending_main_kwargs = pending_main_kwargs + if pending_main_kwargs is None: + self._pending_main_kwargs = {} + + self._done_callbacks = done_callbacks + if self._done_callbacks is None: + self._done_callbacks = [] + + self._is_final = is_final + + def __repr__(self): + # These are the general main_kwarg parameters that we want to + # display in the repr. + params_to_display = [ + 'bucket', 'key', 'part_number', 'final_filename', + 'transfer_future', 'offset', 'extra_args' + ] + main_kwargs_to_display = self._get_kwargs_with_params_to_include( + self._main_kwargs, params_to_display) + return '%s(transfer_id=%s, %s)' % ( + self.__class__.__name__, self._transfer_coordinator.transfer_id, + main_kwargs_to_display) + + @property + def transfer_id(self): + """The id for the transfer request that the task belongs to""" + return self._transfer_coordinator.transfer_id + + def _get_kwargs_with_params_to_include(self, kwargs, include): + filtered_kwargs = {} + for param in include: + if param in kwargs: + filtered_kwargs[param] = kwargs[param] + return filtered_kwargs + + def _get_kwargs_with_params_to_exclude(self, kwargs, exclude): + filtered_kwargs = {} + for param, value in kwargs.items(): + if param in exclude: + continue + filtered_kwargs[param] = value + return filtered_kwargs + + def __call__(self): + """The callable to use when submitting a Task to an executor""" + try: + # Wait for all of futures this task depends on. + self._wait_on_dependent_futures() + # Gather up all of the main keyword arguments for main(). + # This includes the immediately provided main_kwargs and + # the values for pending_main_kwargs that source from the return + # values from the task's depenent futures. + kwargs = self._get_all_main_kwargs() + # If the task is not done (really only if some other related + # task to the TransferFuture had failed) then execute the task's + # main() method. + if not self._transfer_coordinator.done(): + return self._execute_main(kwargs) + except Exception as e: + self._log_and_set_exception(e) + finally: + # Run any done callbacks associated to the task no matter what. + for done_callback in self._done_callbacks: + done_callback() + + if self._is_final: + # If this is the final task announce that it is done if results + # are waiting on its completion. + self._transfer_coordinator.announce_done() + + def _execute_main(self, kwargs): + # Do not display keyword args that should not be printed, especially + # if they are going to make the logs hard to follow. + params_to_exclude = ['data'] + kwargs_to_display = self._get_kwargs_with_params_to_exclude( + kwargs, params_to_exclude) + # Log what is about to be executed. + logger.debug( + "Executing task %s with kwargs %s" % (self, kwargs_to_display) + ) + + return_value = self._main(**kwargs) + # If the task is the final task, then set the TransferFuture's + # value to the return value from main(). + if self._is_final: + self._transfer_coordinator.set_result(return_value) + return return_value + + def _log_and_set_exception(self, exception): + # If an exception is ever thrown than set the exception for the + # entire TransferFuture. + logger.debug("Exception raised.", exc_info=True) + self._transfer_coordinator.set_exception(exception) + + def _main(self, **kwargs): + """The method that will be ran in the executor + + This method must be implemented by subclasses from Task. main() can + be implemented with any arguments decided upon by the subclass. + """ + raise NotImplementedError('_main() must be implemented') + + def _wait_on_dependent_futures(self): + # Gather all of the futures into that main() depends on. + futures_to_wait_on = [] + for _, future in self._pending_main_kwargs.items(): + # If the pending main keyword arg is a list then extend the list. + if isinstance(future, list): + futures_to_wait_on.extend(future) + # If the pending main keword arg is a future append it to the list. + else: + futures_to_wait_on.append(future) + # Now wait for all of the futures to complete. + self._wait_until_all_complete(futures_to_wait_on) + + def _wait_until_all_complete(self, futures): + # This is a basic implementation of the concurrent.futures.wait() + # + # concurrent.futures.wait() is not used instead because of this + # reported issue: https://bugs.python.org/issue20319. + # The issue would occassionally cause multipart uploads to hang + # when wait() was called. With this approach, it avoids the + # concurrency bug by removing any association with concurrent.futures + # implementation of waiters. + logger.debug( + '%s about to wait for the following futures %s', self, futures) + for future in futures: + try: + logger.debug('%s about to wait for %s', self, future) + future.result() + except Exception: + # result() can also produce exceptions. We want to ignore + # these to be deffered to error handling down the road. + pass + logger.debug('%s done waiting for dependent futures', self) + + def _get_all_main_kwargs(self): + # Copy over all of the kwargs that we know is available. + kwargs = copy.copy(self._main_kwargs) + + # Iterate through the kwargs whose values are pending on the result + # of a future. + for key, pending_value in self._pending_main_kwargs.items(): + # If the value is a list of futures, iterate though the list + # appending on the result from each future. + if isinstance(pending_value, list): + result = [] + for future in pending_value: + result.append(future.result()) + # Otherwise if the pending_value is a future, just wait for it. + else: + result = pending_value.result() + # Add the retrieved value to the kwargs to be sent to the + # main() call. + kwargs[key] = result + return kwargs + + +class SubmissionTask(Task): + """A base class for any submission task + + Submission tasks are the top-level task used to submit a series of tasks + to execute a particular transfer. + """ + def _main(self, transfer_future, **kwargs): + """ + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future associated with the + transfer request that tasks are being submitted for + + :param kwargs: Any additional kwargs that you may want to pass + to the _submit() method + """ + try: + self._transfer_coordinator.set_status_to_queued() + + # Before submitting any tasks, run all of the on_queued callbacks + on_queued_callbacks = get_callbacks(transfer_future, 'queued') + for on_queued_callback in on_queued_callbacks: + on_queued_callback() + + # Once callbacks have been ran set the status to running. + self._transfer_coordinator.set_status_to_running() + + # Call the submit method to start submitting tasks to execute the + # transfer. + self._submit(transfer_future=transfer_future, **kwargs) + except BaseException as e: + # If there was an exception raised during the submission of task + # there is a chance that the final task that signals if a transfer + # is done and too run the cleanup may never have been submitted in + # the first place so we need to account accordingly. + # + # Note that BaseException is caught, instead of Exception, because + # for some implmentations of executors, specifically the serial + # implementation, the SubmissionTask is directly exposed to + # KeyboardInterupts and so needs to cleanup and signal done + # for those as well. + + # Set the exception, that caused the process to fail. + self._log_and_set_exception(e) + + # Wait for all possibly associated futures that may have spawned + # from this submission task have finished before we anounce the + # transfer done. + self._wait_for_all_submitted_futures_to_complete() + + # Announce the transfer as done, which will run any cleanups + # and done callbacks as well. + self._transfer_coordinator.announce_done() + + def _submit(self, transfer_future, **kwargs): + """The submition method to be implemented + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future associated with the + transfer request that tasks are being submitted for + + :param kwargs: Any additional keyword arguments you want to be passed + in + """ + raise NotImplementedError('_submit() must be implemented') + + def _wait_for_all_submitted_futures_to_complete(self): + # We want to wait for all futures that were submitted to + # complete as we do not want the cleanup callbacks or done callbacks + # to be called to early. The main problem is any task that was + # submitted may have submitted even more during its process and so + # we need to account accordingly. + + # First get all of the futures that were submitted up to this point. + submitted_futures = self._transfer_coordinator.associated_futures + while submitted_futures: + # Wait for those futures to complete. + self._wait_until_all_complete(submitted_futures) + # However, more futures may have been submitted as we waited so + # we need to check again for any more associated futures. + possibly_more_submitted_futures = \ + self._transfer_coordinator.associated_futures + # If the current list of submitted futures is equal to the + # the list of associated futures for when after the wait completes, + # we can ensure no more futures were submitted in waiting on + # the current list of futures to complete ultimately meaning all + # futures that may have spawned from the original submission task + # have completed. + if submitted_futures == possibly_more_submitted_futures: + break + submitted_futures = possibly_more_submitted_futures + + +class CreateMultipartUploadTask(Task): + """Task to initiate a multipart upload""" + def _main(self, client, bucket, key, extra_args): + """ + :param client: The client to use when calling CreateMultipartUpload + :param bucket: The name of the bucket to upload to + :param key: The name of the key to upload to + :param extra_args: A dictionary of any extra arguments that may be + used in the intialization. + + :returns: The upload id of the multipart upload + """ + # Create the multipart upload. + response = client.create_multipart_upload( + Bucket=bucket, Key=key, **extra_args) + upload_id = response['UploadId'] + + # Add a cleanup if the multipart upload fails at any point. + self._transfer_coordinator.add_failure_cleanup( + client.abort_multipart_upload, Bucket=bucket, Key=key, + UploadId=upload_id + ) + return upload_id + + +class CompleteMultipartUploadTask(Task): + """Task to complete a multipart upload""" + def _main(self, client, bucket, key, upload_id, parts, extra_args): + """ + :param client: The client to use when calling CompleteMultipartUpload + :param bucket: The name of the bucket to upload to + :param key: The name of the key to upload to + :param upload_id: The id of the upload + :param parts: A list of parts to use to complete the multipart upload:: + + [{'Etag': etag_value, 'PartNumber': part_number}, ...] + + Each element in the list consists of a return value from + ``UploadPartTask.main()``. + :param extra_args: A dictionary of any extra arguments that may be + used in completing the multipart transfer. + """ + client.complete_multipart_upload( + Bucket=bucket, Key=key, UploadId=upload_id, + MultipartUpload={'Parts': parts}, + **extra_args) diff --git a/contrib/python/s3transfer/py2/s3transfer/upload.py b/contrib/python/s3transfer/py2/s3transfer/upload.py index 7e13c58a34..27451e5eff 100644 --- a/contrib/python/s3transfer/py2/s3transfer/upload.py +++ b/contrib/python/s3transfer/py2/s3transfer/upload.py @@ -1,726 +1,726 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import math - -from botocore.compat import six - -from s3transfer.compat import seekable, readable -from s3transfer.futures import IN_MEMORY_UPLOAD_TAG -from s3transfer.tasks import Task -from s3transfer.tasks import SubmissionTask -from s3transfer.tasks import CreateMultipartUploadTask -from s3transfer.tasks import CompleteMultipartUploadTask -from s3transfer.utils import get_callbacks -from s3transfer.utils import get_filtered_dict -from s3transfer.utils import DeferredOpenFile, ChunksizeAdjuster - - -class AggregatedProgressCallback(object): - def __init__(self, callbacks, threshold=1024 * 256): - """Aggregates progress updates for every provided progress callback - - :type callbacks: A list of functions that accepts bytes_transferred - as a single argument - :param callbacks: The callbacks to invoke when threshold is reached - - :type threshold: int - :param threshold: The progress threshold in which to take the - aggregated progress and invoke the progress callback with that - aggregated progress total - """ - self._callbacks = callbacks - self._threshold = threshold - self._bytes_seen = 0 - - def __call__(self, bytes_transferred): - self._bytes_seen += bytes_transferred - if self._bytes_seen >= self._threshold: - self._trigger_callbacks() - - def flush(self): - """Flushes out any progress that has not been sent to its callbacks""" - if self._bytes_seen > 0: - self._trigger_callbacks() - - def _trigger_callbacks(self): - for callback in self._callbacks: - callback(bytes_transferred=self._bytes_seen) - self._bytes_seen = 0 - - -class InterruptReader(object): - """Wrapper that can interrupt reading using an error - +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import math + +from botocore.compat import six + +from s3transfer.compat import seekable, readable +from s3transfer.futures import IN_MEMORY_UPLOAD_TAG +from s3transfer.tasks import Task +from s3transfer.tasks import SubmissionTask +from s3transfer.tasks import CreateMultipartUploadTask +from s3transfer.tasks import CompleteMultipartUploadTask +from s3transfer.utils import get_callbacks +from s3transfer.utils import get_filtered_dict +from s3transfer.utils import DeferredOpenFile, ChunksizeAdjuster + + +class AggregatedProgressCallback(object): + def __init__(self, callbacks, threshold=1024 * 256): + """Aggregates progress updates for every provided progress callback + + :type callbacks: A list of functions that accepts bytes_transferred + as a single argument + :param callbacks: The callbacks to invoke when threshold is reached + + :type threshold: int + :param threshold: The progress threshold in which to take the + aggregated progress and invoke the progress callback with that + aggregated progress total + """ + self._callbacks = callbacks + self._threshold = threshold + self._bytes_seen = 0 + + def __call__(self, bytes_transferred): + self._bytes_seen += bytes_transferred + if self._bytes_seen >= self._threshold: + self._trigger_callbacks() + + def flush(self): + """Flushes out any progress that has not been sent to its callbacks""" + if self._bytes_seen > 0: + self._trigger_callbacks() + + def _trigger_callbacks(self): + for callback in self._callbacks: + callback(bytes_transferred=self._bytes_seen) + self._bytes_seen = 0 + + +class InterruptReader(object): + """Wrapper that can interrupt reading using an error + It uses a transfer coordinator to propagate an error if it notices - that a read is being made while the file is being read from. - - :type fileobj: file-like obj - :param fileobj: The file-like object to read from - - :type transfer_coordinator: s3transfer.futures.TransferCoordinator - :param transfer_coordinator: The transfer coordinator to use if the - reader needs to be interrupted. - """ - def __init__(self, fileobj, transfer_coordinator): - self._fileobj = fileobj - self._transfer_coordinator = transfer_coordinator - - def read(self, amount=None): - # If there is an exception, then raise the exception. - # We raise an error instead of returning no bytes because for - # requests where the content length and md5 was sent, it will - # cause md5 mismatches and retries as there was no indication that - # the stream being read from encountered any issues. - if self._transfer_coordinator.exception: - raise self._transfer_coordinator.exception - return self._fileobj.read(amount) - + that a read is being made while the file is being read from. + + :type fileobj: file-like obj + :param fileobj: The file-like object to read from + + :type transfer_coordinator: s3transfer.futures.TransferCoordinator + :param transfer_coordinator: The transfer coordinator to use if the + reader needs to be interrupted. + """ + def __init__(self, fileobj, transfer_coordinator): + self._fileobj = fileobj + self._transfer_coordinator = transfer_coordinator + + def read(self, amount=None): + # If there is an exception, then raise the exception. + # We raise an error instead of returning no bytes because for + # requests where the content length and md5 was sent, it will + # cause md5 mismatches and retries as there was no indication that + # the stream being read from encountered any issues. + if self._transfer_coordinator.exception: + raise self._transfer_coordinator.exception + return self._fileobj.read(amount) + def seek(self, where, whence=0): self._fileobj.seek(where, whence) - - def tell(self): - return self._fileobj.tell() - - def close(self): - self._fileobj.close() - - def __enter__(self): - return self - - def __exit__(self, *args, **kwargs): - self.close() - - -class UploadInputManager(object): - """Base manager class for handling various types of files for uploads - - This class is typically used for the UploadSubmissionTask class to help - determine the following: - - * How to determine the size of the file - * How to determine if a multipart upload is required - * How to retrieve the body for a PutObject - * How to retrieve the bodies for a set of UploadParts - - The answers/implementations differ for the various types of file inputs - that may be accepted. All implementations must subclass and override - public methods from this class. - """ - def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None): - self._osutil = osutil - self._transfer_coordinator = transfer_coordinator - self._bandwidth_limiter = bandwidth_limiter - - @classmethod - def is_compatible(cls, upload_source): - """Determines if the source for the upload is compatible with manager - - :param upload_source: The source for which the upload will pull data - from. - - :returns: True if the manager can handle the type of source specified - otherwise returns False. - """ - raise NotImplementedError('must implement _is_compatible()') - - def stores_body_in_memory(self, operation_name): - """Whether the body it provides are stored in-memory - - :type operation_name: str - :param operation_name: The name of the client operation that the body - is being used for. Valid operation_names are ``put_object`` and - ``upload_part``. - - :rtype: boolean - :returns: True if the body returned by the manager will be stored in - memory. False if the manager will not directly store the body in - memory. - """ - raise NotImplemented('must implement store_body_in_memory()') - - def provide_transfer_size(self, transfer_future): - """Provides the transfer size of an upload - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The future associated with upload request - """ - raise NotImplementedError('must implement provide_transfer_size()') - - def requires_multipart_upload(self, transfer_future, config): - """Determines where a multipart upload is required - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The future associated with upload request - - :type config: s3transfer.manager.TransferConfig - :param config: The config associated to the transfer manager - - :rtype: boolean - :returns: True, if the upload should be multipart based on - configuartion and size. False, otherwise. - """ - raise NotImplementedError('must implement requires_multipart_upload()') - - def get_put_object_body(self, transfer_future): - """Returns the body to use for PutObject - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The future associated with upload request - - :type config: s3transfer.manager.TransferConfig - :param config: The config associated to the transfer manager - - :rtype: s3transfer.utils.ReadFileChunk - :returns: A ReadFileChunk including all progress callbacks - associated with the transfer future. - """ - raise NotImplementedError('must implement get_put_object_body()') - - def yield_upload_part_bodies(self, transfer_future, chunksize): - """Yields the part number and body to use for each UploadPart - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The future associated with upload request - - :type chunksize: int - :param chunksize: The chunksize to use for this upload. - - :rtype: int, s3transfer.utils.ReadFileChunk - :returns: Yields the part number and the ReadFileChunk including all - progress callbacks associated with the transfer future for that - specific yielded part. - """ - raise NotImplementedError('must implement yield_upload_part_bodies()') - - def _wrap_fileobj(self, fileobj): - fileobj = InterruptReader(fileobj, self._transfer_coordinator) - if self._bandwidth_limiter: - fileobj = self._bandwidth_limiter.get_bandwith_limited_stream( - fileobj, self._transfer_coordinator, enabled=False) - return fileobj - - def _get_progress_callbacks(self, transfer_future): - callbacks = get_callbacks(transfer_future, 'progress') - # We only want to be wrapping the callbacks if there are callbacks to - # invoke because we do not want to be doing any unnecessary work if - # there are no callbacks to invoke. - if callbacks: - return [AggregatedProgressCallback(callbacks)] - return [] - - def _get_close_callbacks(self, aggregated_progress_callbacks): - return [callback.flush for callback in aggregated_progress_callbacks] - - -class UploadFilenameInputManager(UploadInputManager): - """Upload utility for filenames""" - @classmethod - def is_compatible(cls, upload_source): - return isinstance(upload_source, six.string_types) - - def stores_body_in_memory(self, operation_name): - return False - - def provide_transfer_size(self, transfer_future): - transfer_future.meta.provide_transfer_size( - self._osutil.get_file_size( - transfer_future.meta.call_args.fileobj)) - - def requires_multipart_upload(self, transfer_future, config): - return transfer_future.meta.size >= config.multipart_threshold - - def get_put_object_body(self, transfer_future): - # Get a file-like object for the given input - fileobj, full_size = self._get_put_object_fileobj_with_full_size( - transfer_future) - - # Wrap fileobj with interrupt reader that will quickly cancel - # uploads if needed instead of having to wait for the socket - # to completely read all of the data. - fileobj = self._wrap_fileobj(fileobj) - - callbacks = self._get_progress_callbacks(transfer_future) - close_callbacks = self._get_close_callbacks(callbacks) - size = transfer_future.meta.size - # Return the file-like object wrapped into a ReadFileChunk to get - # progress. - return self._osutil.open_file_chunk_reader_from_fileobj( - fileobj=fileobj, chunk_size=size, full_file_size=full_size, - callbacks=callbacks, close_callbacks=close_callbacks) - - def yield_upload_part_bodies(self, transfer_future, chunksize): - full_file_size = transfer_future.meta.size - num_parts = self._get_num_parts(transfer_future, chunksize) - for part_number in range(1, num_parts + 1): - callbacks = self._get_progress_callbacks(transfer_future) - close_callbacks = self._get_close_callbacks(callbacks) - start_byte = chunksize * (part_number - 1) - # Get a file-like object for that part and the size of the full - # file size for the associated file-like object for that part. - fileobj, full_size = self._get_upload_part_fileobj_with_full_size( - transfer_future.meta.call_args.fileobj, start_byte=start_byte, - part_size=chunksize, full_file_size=full_file_size) - - # Wrap fileobj with interrupt reader that will quickly cancel - # uploads if needed instead of having to wait for the socket - # to completely read all of the data. - fileobj = self._wrap_fileobj(fileobj) - - # Wrap the file-like object into a ReadFileChunk to get progress. - read_file_chunk = self._osutil.open_file_chunk_reader_from_fileobj( - fileobj=fileobj, chunk_size=chunksize, - full_file_size=full_size, callbacks=callbacks, - close_callbacks=close_callbacks) - yield part_number, read_file_chunk - - def _get_deferred_open_file(self, fileobj, start_byte): - fileobj = DeferredOpenFile( - fileobj, start_byte, open_function=self._osutil.open) - return fileobj - - def _get_put_object_fileobj_with_full_size(self, transfer_future): - fileobj = transfer_future.meta.call_args.fileobj - size = transfer_future.meta.size - return self._get_deferred_open_file(fileobj, 0), size - - def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs): - start_byte = kwargs['start_byte'] - full_size = kwargs['full_file_size'] - return self._get_deferred_open_file(fileobj, start_byte), full_size - - def _get_num_parts(self, transfer_future, part_size): - return int( - math.ceil(transfer_future.meta.size / float(part_size))) - - -class UploadSeekableInputManager(UploadFilenameInputManager): - """Upload utility for an open file object""" - @classmethod - def is_compatible(cls, upload_source): - return readable(upload_source) and seekable(upload_source) - - def stores_body_in_memory(self, operation_name): - if operation_name == 'put_object': - return False - else: - return True - - def provide_transfer_size(self, transfer_future): - fileobj = transfer_future.meta.call_args.fileobj - # To determine size, first determine the starting position - # Seek to the end and then find the difference in the length - # between the end and start positions. - start_position = fileobj.tell() - fileobj.seek(0, 2) - end_position = fileobj.tell() - fileobj.seek(start_position) - transfer_future.meta.provide_transfer_size( - end_position - start_position) - - def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs): - # Note: It is unfortunate that in order to do a multithreaded - # multipart upload we cannot simply copy the filelike object - # since there is not really a mechanism in python (i.e. os.dup - # points to the same OS filehandle which causes concurrency - # issues). So instead we need to read from the fileobj and + + def tell(self): + return self._fileobj.tell() + + def close(self): + self._fileobj.close() + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + self.close() + + +class UploadInputManager(object): + """Base manager class for handling various types of files for uploads + + This class is typically used for the UploadSubmissionTask class to help + determine the following: + + * How to determine the size of the file + * How to determine if a multipart upload is required + * How to retrieve the body for a PutObject + * How to retrieve the bodies for a set of UploadParts + + The answers/implementations differ for the various types of file inputs + that may be accepted. All implementations must subclass and override + public methods from this class. + """ + def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None): + self._osutil = osutil + self._transfer_coordinator = transfer_coordinator + self._bandwidth_limiter = bandwidth_limiter + + @classmethod + def is_compatible(cls, upload_source): + """Determines if the source for the upload is compatible with manager + + :param upload_source: The source for which the upload will pull data + from. + + :returns: True if the manager can handle the type of source specified + otherwise returns False. + """ + raise NotImplementedError('must implement _is_compatible()') + + def stores_body_in_memory(self, operation_name): + """Whether the body it provides are stored in-memory + + :type operation_name: str + :param operation_name: The name of the client operation that the body + is being used for. Valid operation_names are ``put_object`` and + ``upload_part``. + + :rtype: boolean + :returns: True if the body returned by the manager will be stored in + memory. False if the manager will not directly store the body in + memory. + """ + raise NotImplemented('must implement store_body_in_memory()') + + def provide_transfer_size(self, transfer_future): + """Provides the transfer size of an upload + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The future associated with upload request + """ + raise NotImplementedError('must implement provide_transfer_size()') + + def requires_multipart_upload(self, transfer_future, config): + """Determines where a multipart upload is required + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The future associated with upload request + + :type config: s3transfer.manager.TransferConfig + :param config: The config associated to the transfer manager + + :rtype: boolean + :returns: True, if the upload should be multipart based on + configuartion and size. False, otherwise. + """ + raise NotImplementedError('must implement requires_multipart_upload()') + + def get_put_object_body(self, transfer_future): + """Returns the body to use for PutObject + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The future associated with upload request + + :type config: s3transfer.manager.TransferConfig + :param config: The config associated to the transfer manager + + :rtype: s3transfer.utils.ReadFileChunk + :returns: A ReadFileChunk including all progress callbacks + associated with the transfer future. + """ + raise NotImplementedError('must implement get_put_object_body()') + + def yield_upload_part_bodies(self, transfer_future, chunksize): + """Yields the part number and body to use for each UploadPart + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The future associated with upload request + + :type chunksize: int + :param chunksize: The chunksize to use for this upload. + + :rtype: int, s3transfer.utils.ReadFileChunk + :returns: Yields the part number and the ReadFileChunk including all + progress callbacks associated with the transfer future for that + specific yielded part. + """ + raise NotImplementedError('must implement yield_upload_part_bodies()') + + def _wrap_fileobj(self, fileobj): + fileobj = InterruptReader(fileobj, self._transfer_coordinator) + if self._bandwidth_limiter: + fileobj = self._bandwidth_limiter.get_bandwith_limited_stream( + fileobj, self._transfer_coordinator, enabled=False) + return fileobj + + def _get_progress_callbacks(self, transfer_future): + callbacks = get_callbacks(transfer_future, 'progress') + # We only want to be wrapping the callbacks if there are callbacks to + # invoke because we do not want to be doing any unnecessary work if + # there are no callbacks to invoke. + if callbacks: + return [AggregatedProgressCallback(callbacks)] + return [] + + def _get_close_callbacks(self, aggregated_progress_callbacks): + return [callback.flush for callback in aggregated_progress_callbacks] + + +class UploadFilenameInputManager(UploadInputManager): + """Upload utility for filenames""" + @classmethod + def is_compatible(cls, upload_source): + return isinstance(upload_source, six.string_types) + + def stores_body_in_memory(self, operation_name): + return False + + def provide_transfer_size(self, transfer_future): + transfer_future.meta.provide_transfer_size( + self._osutil.get_file_size( + transfer_future.meta.call_args.fileobj)) + + def requires_multipart_upload(self, transfer_future, config): + return transfer_future.meta.size >= config.multipart_threshold + + def get_put_object_body(self, transfer_future): + # Get a file-like object for the given input + fileobj, full_size = self._get_put_object_fileobj_with_full_size( + transfer_future) + + # Wrap fileobj with interrupt reader that will quickly cancel + # uploads if needed instead of having to wait for the socket + # to completely read all of the data. + fileobj = self._wrap_fileobj(fileobj) + + callbacks = self._get_progress_callbacks(transfer_future) + close_callbacks = self._get_close_callbacks(callbacks) + size = transfer_future.meta.size + # Return the file-like object wrapped into a ReadFileChunk to get + # progress. + return self._osutil.open_file_chunk_reader_from_fileobj( + fileobj=fileobj, chunk_size=size, full_file_size=full_size, + callbacks=callbacks, close_callbacks=close_callbacks) + + def yield_upload_part_bodies(self, transfer_future, chunksize): + full_file_size = transfer_future.meta.size + num_parts = self._get_num_parts(transfer_future, chunksize) + for part_number in range(1, num_parts + 1): + callbacks = self._get_progress_callbacks(transfer_future) + close_callbacks = self._get_close_callbacks(callbacks) + start_byte = chunksize * (part_number - 1) + # Get a file-like object for that part and the size of the full + # file size for the associated file-like object for that part. + fileobj, full_size = self._get_upload_part_fileobj_with_full_size( + transfer_future.meta.call_args.fileobj, start_byte=start_byte, + part_size=chunksize, full_file_size=full_file_size) + + # Wrap fileobj with interrupt reader that will quickly cancel + # uploads if needed instead of having to wait for the socket + # to completely read all of the data. + fileobj = self._wrap_fileobj(fileobj) + + # Wrap the file-like object into a ReadFileChunk to get progress. + read_file_chunk = self._osutil.open_file_chunk_reader_from_fileobj( + fileobj=fileobj, chunk_size=chunksize, + full_file_size=full_size, callbacks=callbacks, + close_callbacks=close_callbacks) + yield part_number, read_file_chunk + + def _get_deferred_open_file(self, fileobj, start_byte): + fileobj = DeferredOpenFile( + fileobj, start_byte, open_function=self._osutil.open) + return fileobj + + def _get_put_object_fileobj_with_full_size(self, transfer_future): + fileobj = transfer_future.meta.call_args.fileobj + size = transfer_future.meta.size + return self._get_deferred_open_file(fileobj, 0), size + + def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs): + start_byte = kwargs['start_byte'] + full_size = kwargs['full_file_size'] + return self._get_deferred_open_file(fileobj, start_byte), full_size + + def _get_num_parts(self, transfer_future, part_size): + return int( + math.ceil(transfer_future.meta.size / float(part_size))) + + +class UploadSeekableInputManager(UploadFilenameInputManager): + """Upload utility for an open file object""" + @classmethod + def is_compatible(cls, upload_source): + return readable(upload_source) and seekable(upload_source) + + def stores_body_in_memory(self, operation_name): + if operation_name == 'put_object': + return False + else: + return True + + def provide_transfer_size(self, transfer_future): + fileobj = transfer_future.meta.call_args.fileobj + # To determine size, first determine the starting position + # Seek to the end and then find the difference in the length + # between the end and start positions. + start_position = fileobj.tell() + fileobj.seek(0, 2) + end_position = fileobj.tell() + fileobj.seek(start_position) + transfer_future.meta.provide_transfer_size( + end_position - start_position) + + def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs): + # Note: It is unfortunate that in order to do a multithreaded + # multipart upload we cannot simply copy the filelike object + # since there is not really a mechanism in python (i.e. os.dup + # points to the same OS filehandle which causes concurrency + # issues). So instead we need to read from the fileobj and # chunk the data out to separate file-like objects in memory. - data = fileobj.read(kwargs['part_size']) - # We return the length of the data instead of the full_file_size + data = fileobj.read(kwargs['part_size']) + # We return the length of the data instead of the full_file_size # because we partitioned the data into separate BytesIO objects - # meaning the BytesIO object has no knowledge of its start position - # relative the input source nor access to the rest of the input - # source. So we must treat it as its own standalone file. - return six.BytesIO(data), len(data) - - def _get_put_object_fileobj_with_full_size(self, transfer_future): - fileobj = transfer_future.meta.call_args.fileobj - # The current position needs to be taken into account when retrieving - # the full size of the file. - size = fileobj.tell() + transfer_future.meta.size - return fileobj, size - - -class UploadNonSeekableInputManager(UploadInputManager): - """Upload utility for a file-like object that cannot seek.""" - def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None): - super(UploadNonSeekableInputManager, self).__init__( - osutil, transfer_coordinator, bandwidth_limiter) - self._initial_data = b'' - - @classmethod - def is_compatible(cls, upload_source): - return readable(upload_source) - - def stores_body_in_memory(self, operation_name): - return True - - def provide_transfer_size(self, transfer_future): - # No-op because there is no way to do this short of reading the entire - # body into memory. - return - - def requires_multipart_upload(self, transfer_future, config): - # If the user has set the size, we can use that. - if transfer_future.meta.size is not None: - return transfer_future.meta.size >= config.multipart_threshold - - # This is tricky to determine in this case because we can't know how - # large the input is. So to figure it out, we read data into memory - # up until the threshold and compare how much data was actually read - # against the threshold. - fileobj = transfer_future.meta.call_args.fileobj - threshold = config.multipart_threshold - self._initial_data = self._read(fileobj, threshold, False) - if len(self._initial_data) < threshold: - return False - else: - return True - - def get_put_object_body(self, transfer_future): - callbacks = self._get_progress_callbacks(transfer_future) - close_callbacks = self._get_close_callbacks(callbacks) - fileobj = transfer_future.meta.call_args.fileobj - - body = self._wrap_data( - self._initial_data + fileobj.read(), callbacks, close_callbacks) - - # Zero out the stored data so we don't have additional copies - # hanging around in memory. - self._initial_data = None - return body - - def yield_upload_part_bodies(self, transfer_future, chunksize): - file_object = transfer_future.meta.call_args.fileobj - part_number = 0 - - # Continue reading parts from the file-like object until it is empty. - while True: - callbacks = self._get_progress_callbacks(transfer_future) - close_callbacks = self._get_close_callbacks(callbacks) - part_number += 1 - part_content = self._read(file_object, chunksize) - if not part_content: - break - part_object = self._wrap_data( - part_content, callbacks, close_callbacks) - - # Zero out part_content to avoid hanging on to additional data. - part_content = None - yield part_number, part_object - - def _read(self, fileobj, amount, truncate=True): - """ - Reads a specific amount of data from a stream and returns it. If there - is any data in initial_data, that will be popped out first. - - :type fileobj: A file-like object that implements read - :param fileobj: The stream to read from. - - :type amount: int - :param amount: The number of bytes to read from the stream. - - :type truncate: bool - :param truncate: Whether or not to truncate initial_data after - reading from it. - - :return: Generator which generates part bodies from the initial data. - """ - # If the the initial data is empty, we simply read from the fileobj - if len(self._initial_data) == 0: - return fileobj.read(amount) - + # meaning the BytesIO object has no knowledge of its start position + # relative the input source nor access to the rest of the input + # source. So we must treat it as its own standalone file. + return six.BytesIO(data), len(data) + + def _get_put_object_fileobj_with_full_size(self, transfer_future): + fileobj = transfer_future.meta.call_args.fileobj + # The current position needs to be taken into account when retrieving + # the full size of the file. + size = fileobj.tell() + transfer_future.meta.size + return fileobj, size + + +class UploadNonSeekableInputManager(UploadInputManager): + """Upload utility for a file-like object that cannot seek.""" + def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None): + super(UploadNonSeekableInputManager, self).__init__( + osutil, transfer_coordinator, bandwidth_limiter) + self._initial_data = b'' + + @classmethod + def is_compatible(cls, upload_source): + return readable(upload_source) + + def stores_body_in_memory(self, operation_name): + return True + + def provide_transfer_size(self, transfer_future): + # No-op because there is no way to do this short of reading the entire + # body into memory. + return + + def requires_multipart_upload(self, transfer_future, config): + # If the user has set the size, we can use that. + if transfer_future.meta.size is not None: + return transfer_future.meta.size >= config.multipart_threshold + + # This is tricky to determine in this case because we can't know how + # large the input is. So to figure it out, we read data into memory + # up until the threshold and compare how much data was actually read + # against the threshold. + fileobj = transfer_future.meta.call_args.fileobj + threshold = config.multipart_threshold + self._initial_data = self._read(fileobj, threshold, False) + if len(self._initial_data) < threshold: + return False + else: + return True + + def get_put_object_body(self, transfer_future): + callbacks = self._get_progress_callbacks(transfer_future) + close_callbacks = self._get_close_callbacks(callbacks) + fileobj = transfer_future.meta.call_args.fileobj + + body = self._wrap_data( + self._initial_data + fileobj.read(), callbacks, close_callbacks) + + # Zero out the stored data so we don't have additional copies + # hanging around in memory. + self._initial_data = None + return body + + def yield_upload_part_bodies(self, transfer_future, chunksize): + file_object = transfer_future.meta.call_args.fileobj + part_number = 0 + + # Continue reading parts from the file-like object until it is empty. + while True: + callbacks = self._get_progress_callbacks(transfer_future) + close_callbacks = self._get_close_callbacks(callbacks) + part_number += 1 + part_content = self._read(file_object, chunksize) + if not part_content: + break + part_object = self._wrap_data( + part_content, callbacks, close_callbacks) + + # Zero out part_content to avoid hanging on to additional data. + part_content = None + yield part_number, part_object + + def _read(self, fileobj, amount, truncate=True): + """ + Reads a specific amount of data from a stream and returns it. If there + is any data in initial_data, that will be popped out first. + + :type fileobj: A file-like object that implements read + :param fileobj: The stream to read from. + + :type amount: int + :param amount: The number of bytes to read from the stream. + + :type truncate: bool + :param truncate: Whether or not to truncate initial_data after + reading from it. + + :return: Generator which generates part bodies from the initial data. + """ + # If the the initial data is empty, we simply read from the fileobj + if len(self._initial_data) == 0: + return fileobj.read(amount) + # If the requested number of bytes is less than the amount of - # initial data, pull entirely from initial data. - if amount <= len(self._initial_data): - data = self._initial_data[:amount] - # Truncate initial data so we don't hang onto the data longer - # than we need. - if truncate: - self._initial_data = self._initial_data[amount:] - return data - - # At this point there is some initial data left, but not enough to - # satisfy the number of bytes requested. Pull out the remaining - # initial data and read the rest from the fileobj. - amount_to_read = amount - len(self._initial_data) - data = self._initial_data + fileobj.read(amount_to_read) - - # Zero out initial data so we don't hang onto the data any more. - if truncate: - self._initial_data = b'' - return data - - def _wrap_data(self, data, callbacks, close_callbacks): - """ - Wraps data with the interrupt reader and the file chunk reader. - - :type data: bytes - :param data: The data to wrap. - - :type callbacks: list - :param callbacks: The callbacks associated with the transfer future. - - :type close_callbacks: list - :param close_callbacks: The callbacks to be called when closing the - wrapper for the data. - - :return: Fully wrapped data. - """ - fileobj = self._wrap_fileobj(six.BytesIO(data)) - return self._osutil.open_file_chunk_reader_from_fileobj( - fileobj=fileobj, chunk_size=len(data), full_file_size=len(data), - callbacks=callbacks, close_callbacks=close_callbacks) - - -class UploadSubmissionTask(SubmissionTask): - """Task for submitting tasks to execute an upload""" - - UPLOAD_PART_ARGS = [ - 'SSECustomerKey', - 'SSECustomerAlgorithm', - 'SSECustomerKeyMD5', - 'RequestPayer', + # initial data, pull entirely from initial data. + if amount <= len(self._initial_data): + data = self._initial_data[:amount] + # Truncate initial data so we don't hang onto the data longer + # than we need. + if truncate: + self._initial_data = self._initial_data[amount:] + return data + + # At this point there is some initial data left, but not enough to + # satisfy the number of bytes requested. Pull out the remaining + # initial data and read the rest from the fileobj. + amount_to_read = amount - len(self._initial_data) + data = self._initial_data + fileobj.read(amount_to_read) + + # Zero out initial data so we don't hang onto the data any more. + if truncate: + self._initial_data = b'' + return data + + def _wrap_data(self, data, callbacks, close_callbacks): + """ + Wraps data with the interrupt reader and the file chunk reader. + + :type data: bytes + :param data: The data to wrap. + + :type callbacks: list + :param callbacks: The callbacks associated with the transfer future. + + :type close_callbacks: list + :param close_callbacks: The callbacks to be called when closing the + wrapper for the data. + + :return: Fully wrapped data. + """ + fileobj = self._wrap_fileobj(six.BytesIO(data)) + return self._osutil.open_file_chunk_reader_from_fileobj( + fileobj=fileobj, chunk_size=len(data), full_file_size=len(data), + callbacks=callbacks, close_callbacks=close_callbacks) + + +class UploadSubmissionTask(SubmissionTask): + """Task for submitting tasks to execute an upload""" + + UPLOAD_PART_ARGS = [ + 'SSECustomerKey', + 'SSECustomerAlgorithm', + 'SSECustomerKeyMD5', + 'RequestPayer', 'ExpectedBucketOwner' - ] - - COMPLETE_MULTIPART_ARGS = [ + ] + + COMPLETE_MULTIPART_ARGS = [ 'RequestPayer', 'ExpectedBucketOwner' - ] - - def _get_upload_input_manager_cls(self, transfer_future): + ] + + def _get_upload_input_manager_cls(self, transfer_future): """Retrieves a class for managing input for an upload based on file type - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future for the request - - :rtype: class of UploadInputManager - :returns: The appropriate class to use for managing a specific type of - input for uploads. - """ - upload_manager_resolver_chain = [ - UploadFilenameInputManager, - UploadSeekableInputManager, - UploadNonSeekableInputManager - ] - - fileobj = transfer_future.meta.call_args.fileobj - for upload_manager_cls in upload_manager_resolver_chain: - if upload_manager_cls.is_compatible(fileobj): - return upload_manager_cls - raise RuntimeError( - 'Input %s of type: %s is not supported.' % ( - fileobj, type(fileobj))) - - def _submit(self, client, config, osutil, request_executor, - transfer_future, bandwidth_limiter=None): - """ - :param client: The client associated with the transfer manager - - :type config: s3transfer.manager.TransferConfig - :param config: The transfer config associated with the transfer - manager - - :type osutil: s3transfer.utils.OSUtil - :param osutil: The os utility associated to the transfer manager - - :type request_executor: s3transfer.futures.BoundedExecutor - :param request_executor: The request executor associated with the - transfer manager - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future associated with the - transfer request that tasks are being submitted for - """ - upload_input_manager = self._get_upload_input_manager_cls( - transfer_future)( - osutil, self._transfer_coordinator, bandwidth_limiter) - - # Determine the size if it was not provided - if transfer_future.meta.size is None: - upload_input_manager.provide_transfer_size(transfer_future) - - # Do a multipart upload if needed, otherwise do a regular put object. - if not upload_input_manager.requires_multipart_upload( - transfer_future, config): - self._submit_upload_request( - client, config, osutil, request_executor, transfer_future, - upload_input_manager) - else: - self._submit_multipart_request( - client, config, osutil, request_executor, transfer_future, - upload_input_manager) - - def _submit_upload_request(self, client, config, osutil, request_executor, - transfer_future, upload_input_manager): - call_args = transfer_future.meta.call_args - - # Get any tags that need to be associated to the put object task - put_object_tag = self._get_upload_task_tag( - upload_input_manager, 'put_object') - - # Submit the request of a single upload. - self._transfer_coordinator.submit( - request_executor, - PutObjectTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'fileobj': upload_input_manager.get_put_object_body( - transfer_future), - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': call_args.extra_args - }, - is_final=True - ), - tag=put_object_tag - ) - - def _submit_multipart_request(self, client, config, osutil, - request_executor, transfer_future, - upload_input_manager): - call_args = transfer_future.meta.call_args - - # Submit the request to create a multipart upload. - create_multipart_future = self._transfer_coordinator.submit( - request_executor, - CreateMultipartUploadTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': call_args.extra_args, - } - ) - ) - - # Submit requests to upload the parts of the file. - part_futures = [] - extra_part_args = self._extra_upload_part_args(call_args.extra_args) - - # Get any tags that need to be associated to the submitted task - # for upload the data - upload_part_tag = self._get_upload_task_tag( - upload_input_manager, 'upload_part') - - size = transfer_future.meta.size - adjuster = ChunksizeAdjuster() - chunksize = adjuster.adjust_chunksize(config.multipart_chunksize, size) - part_iterator = upload_input_manager.yield_upload_part_bodies( - transfer_future, chunksize) - - for part_number, fileobj in part_iterator: - part_futures.append( - self._transfer_coordinator.submit( - request_executor, - UploadPartTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'fileobj': fileobj, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'part_number': part_number, - 'extra_args': extra_part_args - }, - pending_main_kwargs={ - 'upload_id': create_multipart_future - } - ), - tag=upload_part_tag - ) - ) - - complete_multipart_extra_args = self._extra_complete_multipart_args( - call_args.extra_args) - # Submit the request to complete the multipart upload. - self._transfer_coordinator.submit( - request_executor, - CompleteMultipartUploadTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': complete_multipart_extra_args, - }, - pending_main_kwargs={ - 'upload_id': create_multipart_future, - 'parts': part_futures - }, - is_final=True - ) - ) - - def _extra_upload_part_args(self, extra_args): - # Only the args in UPLOAD_PART_ARGS actually need to be passed - # onto the upload_part calls. - return get_filtered_dict(extra_args, self.UPLOAD_PART_ARGS) - - def _extra_complete_multipart_args(self, extra_args): - return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS) - - def _get_upload_task_tag(self, upload_input_manager, operation_name): - tag = None - if upload_input_manager.stores_body_in_memory(operation_name): - tag = IN_MEMORY_UPLOAD_TAG - return tag - - -class PutObjectTask(Task): - """Task to do a nonmultipart upload""" - def _main(self, client, fileobj, bucket, key, extra_args): - """ - :param client: The client to use when calling PutObject - :param fileobj: The file to upload. - :param bucket: The name of the bucket to upload to - :param key: The name of the key to upload to - :param extra_args: A dictionary of any extra arguments that may be - used in the upload. - """ - with fileobj as body: - client.put_object(Bucket=bucket, Key=key, Body=body, **extra_args) - - -class UploadPartTask(Task): - """Task to upload a part in a multipart upload""" - def _main(self, client, fileobj, bucket, key, upload_id, part_number, - extra_args): - """ - :param client: The client to use when calling PutObject - :param fileobj: The file to upload. - :param bucket: The name of the bucket to upload to - :param key: The name of the key to upload to - :param upload_id: The id of the upload - :param part_number: The number representing the part of the multipart - upload - :param extra_args: A dictionary of any extra arguments that may be - used in the upload. - - :rtype: dict - :returns: A dictionary representing a part:: - - {'Etag': etag_value, 'PartNumber': part_number} - - This value can be appended to a list to be used to complete - the multipart upload. - """ - with fileobj as body: - response = client.upload_part( - Bucket=bucket, Key=key, - UploadId=upload_id, PartNumber=part_number, - Body=body, **extra_args) - etag = response['ETag'] - return {'ETag': etag, 'PartNumber': part_number} + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future for the request + + :rtype: class of UploadInputManager + :returns: The appropriate class to use for managing a specific type of + input for uploads. + """ + upload_manager_resolver_chain = [ + UploadFilenameInputManager, + UploadSeekableInputManager, + UploadNonSeekableInputManager + ] + + fileobj = transfer_future.meta.call_args.fileobj + for upload_manager_cls in upload_manager_resolver_chain: + if upload_manager_cls.is_compatible(fileobj): + return upload_manager_cls + raise RuntimeError( + 'Input %s of type: %s is not supported.' % ( + fileobj, type(fileobj))) + + def _submit(self, client, config, osutil, request_executor, + transfer_future, bandwidth_limiter=None): + """ + :param client: The client associated with the transfer manager + + :type config: s3transfer.manager.TransferConfig + :param config: The transfer config associated with the transfer + manager + + :type osutil: s3transfer.utils.OSUtil + :param osutil: The os utility associated to the transfer manager + + :type request_executor: s3transfer.futures.BoundedExecutor + :param request_executor: The request executor associated with the + transfer manager + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future associated with the + transfer request that tasks are being submitted for + """ + upload_input_manager = self._get_upload_input_manager_cls( + transfer_future)( + osutil, self._transfer_coordinator, bandwidth_limiter) + + # Determine the size if it was not provided + if transfer_future.meta.size is None: + upload_input_manager.provide_transfer_size(transfer_future) + + # Do a multipart upload if needed, otherwise do a regular put object. + if not upload_input_manager.requires_multipart_upload( + transfer_future, config): + self._submit_upload_request( + client, config, osutil, request_executor, transfer_future, + upload_input_manager) + else: + self._submit_multipart_request( + client, config, osutil, request_executor, transfer_future, + upload_input_manager) + + def _submit_upload_request(self, client, config, osutil, request_executor, + transfer_future, upload_input_manager): + call_args = transfer_future.meta.call_args + + # Get any tags that need to be associated to the put object task + put_object_tag = self._get_upload_task_tag( + upload_input_manager, 'put_object') + + # Submit the request of a single upload. + self._transfer_coordinator.submit( + request_executor, + PutObjectTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'fileobj': upload_input_manager.get_put_object_body( + transfer_future), + 'bucket': call_args.bucket, + 'key': call_args.key, + 'extra_args': call_args.extra_args + }, + is_final=True + ), + tag=put_object_tag + ) + + def _submit_multipart_request(self, client, config, osutil, + request_executor, transfer_future, + upload_input_manager): + call_args = transfer_future.meta.call_args + + # Submit the request to create a multipart upload. + create_multipart_future = self._transfer_coordinator.submit( + request_executor, + CreateMultipartUploadTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'extra_args': call_args.extra_args, + } + ) + ) + + # Submit requests to upload the parts of the file. + part_futures = [] + extra_part_args = self._extra_upload_part_args(call_args.extra_args) + + # Get any tags that need to be associated to the submitted task + # for upload the data + upload_part_tag = self._get_upload_task_tag( + upload_input_manager, 'upload_part') + + size = transfer_future.meta.size + adjuster = ChunksizeAdjuster() + chunksize = adjuster.adjust_chunksize(config.multipart_chunksize, size) + part_iterator = upload_input_manager.yield_upload_part_bodies( + transfer_future, chunksize) + + for part_number, fileobj in part_iterator: + part_futures.append( + self._transfer_coordinator.submit( + request_executor, + UploadPartTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'fileobj': fileobj, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'part_number': part_number, + 'extra_args': extra_part_args + }, + pending_main_kwargs={ + 'upload_id': create_multipart_future + } + ), + tag=upload_part_tag + ) + ) + + complete_multipart_extra_args = self._extra_complete_multipart_args( + call_args.extra_args) + # Submit the request to complete the multipart upload. + self._transfer_coordinator.submit( + request_executor, + CompleteMultipartUploadTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'extra_args': complete_multipart_extra_args, + }, + pending_main_kwargs={ + 'upload_id': create_multipart_future, + 'parts': part_futures + }, + is_final=True + ) + ) + + def _extra_upload_part_args(self, extra_args): + # Only the args in UPLOAD_PART_ARGS actually need to be passed + # onto the upload_part calls. + return get_filtered_dict(extra_args, self.UPLOAD_PART_ARGS) + + def _extra_complete_multipart_args(self, extra_args): + return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS) + + def _get_upload_task_tag(self, upload_input_manager, operation_name): + tag = None + if upload_input_manager.stores_body_in_memory(operation_name): + tag = IN_MEMORY_UPLOAD_TAG + return tag + + +class PutObjectTask(Task): + """Task to do a nonmultipart upload""" + def _main(self, client, fileobj, bucket, key, extra_args): + """ + :param client: The client to use when calling PutObject + :param fileobj: The file to upload. + :param bucket: The name of the bucket to upload to + :param key: The name of the key to upload to + :param extra_args: A dictionary of any extra arguments that may be + used in the upload. + """ + with fileobj as body: + client.put_object(Bucket=bucket, Key=key, Body=body, **extra_args) + + +class UploadPartTask(Task): + """Task to upload a part in a multipart upload""" + def _main(self, client, fileobj, bucket, key, upload_id, part_number, + extra_args): + """ + :param client: The client to use when calling PutObject + :param fileobj: The file to upload. + :param bucket: The name of the bucket to upload to + :param key: The name of the key to upload to + :param upload_id: The id of the upload + :param part_number: The number representing the part of the multipart + upload + :param extra_args: A dictionary of any extra arguments that may be + used in the upload. + + :rtype: dict + :returns: A dictionary representing a part:: + + {'Etag': etag_value, 'PartNumber': part_number} + + This value can be appended to a list to be used to complete + the multipart upload. + """ + with fileobj as body: + response = client.upload_part( + Bucket=bucket, Key=key, + UploadId=upload_id, PartNumber=part_number, + Body=body, **extra_args) + etag = response['ETag'] + return {'ETag': etag, 'PartNumber': part_number} diff --git a/contrib/python/s3transfer/py2/s3transfer/utils.py b/contrib/python/s3transfer/py2/s3transfer/utils.py index b93cd5cf94..67f7e028fc 100644 --- a/contrib/python/s3transfer/py2/s3transfer/utils.py +++ b/contrib/python/s3transfer/py2/s3transfer/utils.py @@ -1,313 +1,313 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import random -import time -import functools -import math -import os +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import random +import time +import functools +import math +import os import socket -import stat -import string -import logging -import threading -import io -from collections import defaultdict - +import stat +import string +import logging +import threading +import io +from collections import defaultdict + from botocore.exceptions import IncompleteReadError from botocore.exceptions import ReadTimeoutError from s3transfer.compat import SOCKET_ERROR -from s3transfer.compat import rename_file -from s3transfer.compat import seekable +from s3transfer.compat import rename_file +from s3transfer.compat import seekable from s3transfer.compat import fallocate - - -MAX_PARTS = 10000 -# The maximum file size you can upload via S3 per request. -# See: http://docs.aws.amazon.com/AmazonS3/latest/dev/UploadingObjects.html -# and: http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html -MAX_SINGLE_UPLOAD_SIZE = 5 * (1024 ** 3) -MIN_UPLOAD_CHUNKSIZE = 5 * (1024 ** 2) -logger = logging.getLogger(__name__) - - + + +MAX_PARTS = 10000 +# The maximum file size you can upload via S3 per request. +# See: http://docs.aws.amazon.com/AmazonS3/latest/dev/UploadingObjects.html +# and: http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html +MAX_SINGLE_UPLOAD_SIZE = 5 * (1024 ** 3) +MIN_UPLOAD_CHUNKSIZE = 5 * (1024 ** 2) +logger = logging.getLogger(__name__) + + S3_RETRYABLE_DOWNLOAD_ERRORS = ( socket.timeout, SOCKET_ERROR, ReadTimeoutError, IncompleteReadError ) -def random_file_extension(num_digits=8): - return ''.join(random.choice(string.hexdigits) for _ in range(num_digits)) - - -def signal_not_transferring(request, operation_name, **kwargs): - if operation_name in ['PutObject', 'UploadPart'] and \ - hasattr(request.body, 'signal_not_transferring'): - request.body.signal_not_transferring() - - -def signal_transferring(request, operation_name, **kwargs): - if operation_name in ['PutObject', 'UploadPart'] and \ - hasattr(request.body, 'signal_transferring'): - request.body.signal_transferring() - - +def random_file_extension(num_digits=8): + return ''.join(random.choice(string.hexdigits) for _ in range(num_digits)) + + +def signal_not_transferring(request, operation_name, **kwargs): + if operation_name in ['PutObject', 'UploadPart'] and \ + hasattr(request.body, 'signal_not_transferring'): + request.body.signal_not_transferring() + + +def signal_transferring(request, operation_name, **kwargs): + if operation_name in ['PutObject', 'UploadPart'] and \ + hasattr(request.body, 'signal_transferring'): + request.body.signal_transferring() + + def calculate_num_parts(size, part_size): return int(math.ceil(size / float(part_size))) -def calculate_range_parameter(part_size, part_index, num_parts, - total_size=None): - """Calculate the range parameter for multipart downloads/copies - - :type part_size: int - :param part_size: The size of the part - - :type part_index: int - :param part_index: The index for which this parts starts. This index starts - at zero - - :type num_parts: int - :param num_parts: The total number of parts in the transfer - - :returns: The value to use for Range parameter on downloads or - the CopySourceRange parameter for copies - """ - # Used to calculate the Range parameter - start_range = part_index * part_size - if part_index == num_parts - 1: - end_range = '' - if total_size is not None: - end_range = str(total_size - 1) - else: - end_range = start_range + part_size - 1 - range_param = 'bytes=%s-%s' % (start_range, end_range) - return range_param - - -def get_callbacks(transfer_future, callback_type): - """Retrieves callbacks from a subscriber - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future the subscriber is associated - to. - - :type callback_type: str - :param callback_type: The type of callback to retrieve from the subscriber. - Valid types include: - * 'queued' - * 'progress' - * 'done' - - :returns: A list of callbacks for the type specified. All callbacks are - preinjected with the transfer future. - """ - callbacks = [] - for subscriber in transfer_future.meta.call_args.subscribers: - callback_name = 'on_' + callback_type - if hasattr(subscriber, callback_name): - callbacks.append( - functools.partial( - getattr(subscriber, callback_name), - future=transfer_future - ) - ) - return callbacks - - -def invoke_progress_callbacks(callbacks, bytes_transferred): - """Calls all progress callbacks - - :param callbacks: A list of progress callbacks to invoke - :param bytes_transferred: The number of bytes transferred. This is passed - to the callbacks. If no bytes were transferred the callbacks will not - be invoked because no progress was achieved. It is also possible - to receive a negative amount which comes from retrying a transfer - request. - """ - # Only invoke the callbacks if bytes were actually transferred. - if bytes_transferred: - for callback in callbacks: - callback(bytes_transferred=bytes_transferred) - - -def get_filtered_dict(original_dict, whitelisted_keys): - """Gets a dictionary filtered by whitelisted keys - - :param original_dict: The original dictionary of arguments to source keys - and values. - :param whitelisted_key: A list of keys to include in the filtered - dictionary. - - :returns: A dictionary containing key/values from the original dictionary - whose key was included in the whitelist - """ - filtered_dict = {} - for key, value in original_dict.items(): - if key in whitelisted_keys: - filtered_dict[key] = value - return filtered_dict - - -class CallArgs(object): - def __init__(self, **kwargs): - """A class that records call arguments - - The call arguments must be passed as keyword arguments. It will set - each keyword argument as an attribute of the object along with its - associated value. - """ - for arg, value in kwargs.items(): - setattr(self, arg, value) - - -class FunctionContainer(object): - """An object that contains a function and any args or kwargs to call it - - When called the provided function will be called with provided args - and kwargs. - """ - def __init__(self, func, *args, **kwargs): - self._func = func - self._args = args - self._kwargs = kwargs - - def __repr__(self): - return 'Function: %s with args %s and kwargs %s' % ( - self._func, self._args, self._kwargs) - - def __call__(self): - return self._func(*self._args, **self._kwargs) - - -class CountCallbackInvoker(object): - """An abstraction to invoke a callback when a shared count reaches zero - - :param callback: Callback invoke when finalized count reaches zero - """ - def __init__(self, callback): - self._lock = threading.Lock() - self._callback = callback - self._count = 0 - self._is_finalized = False - - @property - def current_count(self): - with self._lock: - return self._count - - def increment(self): - """Increment the count by one""" - with self._lock: - if self._is_finalized: - raise RuntimeError( - 'Counter has been finalized it can no longer be ' - 'incremented.' - ) - self._count += 1 - - def decrement(self): - """Decrement the count by one""" - with self._lock: - if self._count == 0: - raise RuntimeError( - 'Counter is at zero. It cannot dip below zero') - self._count -= 1 - if self._is_finalized and self._count == 0: - self._callback() - - def finalize(self): - """Finalize the counter - - Once finalized, the counter never be incremented and the callback - can be invoked once the count reaches zero - """ - with self._lock: - self._is_finalized = True - if self._count == 0: - self._callback() - - -class OSUtils(object): +def calculate_range_parameter(part_size, part_index, num_parts, + total_size=None): + """Calculate the range parameter for multipart downloads/copies + + :type part_size: int + :param part_size: The size of the part + + :type part_index: int + :param part_index: The index for which this parts starts. This index starts + at zero + + :type num_parts: int + :param num_parts: The total number of parts in the transfer + + :returns: The value to use for Range parameter on downloads or + the CopySourceRange parameter for copies + """ + # Used to calculate the Range parameter + start_range = part_index * part_size + if part_index == num_parts - 1: + end_range = '' + if total_size is not None: + end_range = str(total_size - 1) + else: + end_range = start_range + part_size - 1 + range_param = 'bytes=%s-%s' % (start_range, end_range) + return range_param + + +def get_callbacks(transfer_future, callback_type): + """Retrieves callbacks from a subscriber + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future the subscriber is associated + to. + + :type callback_type: str + :param callback_type: The type of callback to retrieve from the subscriber. + Valid types include: + * 'queued' + * 'progress' + * 'done' + + :returns: A list of callbacks for the type specified. All callbacks are + preinjected with the transfer future. + """ + callbacks = [] + for subscriber in transfer_future.meta.call_args.subscribers: + callback_name = 'on_' + callback_type + if hasattr(subscriber, callback_name): + callbacks.append( + functools.partial( + getattr(subscriber, callback_name), + future=transfer_future + ) + ) + return callbacks + + +def invoke_progress_callbacks(callbacks, bytes_transferred): + """Calls all progress callbacks + + :param callbacks: A list of progress callbacks to invoke + :param bytes_transferred: The number of bytes transferred. This is passed + to the callbacks. If no bytes were transferred the callbacks will not + be invoked because no progress was achieved. It is also possible + to receive a negative amount which comes from retrying a transfer + request. + """ + # Only invoke the callbacks if bytes were actually transferred. + if bytes_transferred: + for callback in callbacks: + callback(bytes_transferred=bytes_transferred) + + +def get_filtered_dict(original_dict, whitelisted_keys): + """Gets a dictionary filtered by whitelisted keys + + :param original_dict: The original dictionary of arguments to source keys + and values. + :param whitelisted_key: A list of keys to include in the filtered + dictionary. + + :returns: A dictionary containing key/values from the original dictionary + whose key was included in the whitelist + """ + filtered_dict = {} + for key, value in original_dict.items(): + if key in whitelisted_keys: + filtered_dict[key] = value + return filtered_dict + + +class CallArgs(object): + def __init__(self, **kwargs): + """A class that records call arguments + + The call arguments must be passed as keyword arguments. It will set + each keyword argument as an attribute of the object along with its + associated value. + """ + for arg, value in kwargs.items(): + setattr(self, arg, value) + + +class FunctionContainer(object): + """An object that contains a function and any args or kwargs to call it + + When called the provided function will be called with provided args + and kwargs. + """ + def __init__(self, func, *args, **kwargs): + self._func = func + self._args = args + self._kwargs = kwargs + + def __repr__(self): + return 'Function: %s with args %s and kwargs %s' % ( + self._func, self._args, self._kwargs) + + def __call__(self): + return self._func(*self._args, **self._kwargs) + + +class CountCallbackInvoker(object): + """An abstraction to invoke a callback when a shared count reaches zero + + :param callback: Callback invoke when finalized count reaches zero + """ + def __init__(self, callback): + self._lock = threading.Lock() + self._callback = callback + self._count = 0 + self._is_finalized = False + + @property + def current_count(self): + with self._lock: + return self._count + + def increment(self): + """Increment the count by one""" + with self._lock: + if self._is_finalized: + raise RuntimeError( + 'Counter has been finalized it can no longer be ' + 'incremented.' + ) + self._count += 1 + + def decrement(self): + """Decrement the count by one""" + with self._lock: + if self._count == 0: + raise RuntimeError( + 'Counter is at zero. It cannot dip below zero') + self._count -= 1 + if self._is_finalized and self._count == 0: + self._callback() + + def finalize(self): + """Finalize the counter + + Once finalized, the counter never be incremented and the callback + can be invoked once the count reaches zero + """ + with self._lock: + self._is_finalized = True + if self._count == 0: + self._callback() + + +class OSUtils(object): _MAX_FILENAME_LEN = 255 - def get_file_size(self, filename): - return os.path.getsize(filename) - - def open_file_chunk_reader(self, filename, start_byte, size, callbacks): - return ReadFileChunk.from_filename(filename, start_byte, - size, callbacks, - enable_callbacks=False) - - def open_file_chunk_reader_from_fileobj(self, fileobj, chunk_size, - full_file_size, callbacks, - close_callbacks=None): - return ReadFileChunk( - fileobj, chunk_size, full_file_size, - callbacks=callbacks, enable_callbacks=False, - close_callbacks=close_callbacks) - - def open(self, filename, mode): - return open(filename, mode) - - def remove_file(self, filename): - """Remove a file, noop if file does not exist.""" - # Unlike os.remove, if the file does not exist, - # then this method does nothing. - try: - os.remove(filename) - except OSError: - pass - - def rename_file(self, current_filename, new_filename): - rename_file(current_filename, new_filename) - - def is_special_file(cls, filename): - """Checks to see if a file is a special UNIX file. - - It checks if the file is a character special device, block special - device, FIFO, or socket. - - :param filename: Name of the file - - :returns: True if the file is a special file. False, if is not. - """ - # If it does not exist, it must be a new file so it cannot be - # a special file. - if not os.path.exists(filename): - return False - mode = os.stat(filename).st_mode - # Character special device. - if stat.S_ISCHR(mode): - return True - # Block special device - if stat.S_ISBLK(mode): - return True - # Named pipe / FIFO - if stat.S_ISFIFO(mode): - return True - # Socket. - if stat.S_ISSOCK(mode): - return True - return False - + def get_file_size(self, filename): + return os.path.getsize(filename) + + def open_file_chunk_reader(self, filename, start_byte, size, callbacks): + return ReadFileChunk.from_filename(filename, start_byte, + size, callbacks, + enable_callbacks=False) + + def open_file_chunk_reader_from_fileobj(self, fileobj, chunk_size, + full_file_size, callbacks, + close_callbacks=None): + return ReadFileChunk( + fileobj, chunk_size, full_file_size, + callbacks=callbacks, enable_callbacks=False, + close_callbacks=close_callbacks) + + def open(self, filename, mode): + return open(filename, mode) + + def remove_file(self, filename): + """Remove a file, noop if file does not exist.""" + # Unlike os.remove, if the file does not exist, + # then this method does nothing. + try: + os.remove(filename) + except OSError: + pass + + def rename_file(self, current_filename, new_filename): + rename_file(current_filename, new_filename) + + def is_special_file(cls, filename): + """Checks to see if a file is a special UNIX file. + + It checks if the file is a character special device, block special + device, FIFO, or socket. + + :param filename: Name of the file + + :returns: True if the file is a special file. False, if is not. + """ + # If it does not exist, it must be a new file so it cannot be + # a special file. + if not os.path.exists(filename): + return False + mode = os.stat(filename).st_mode + # Character special device. + if stat.S_ISCHR(mode): + return True + # Block special device + if stat.S_ISBLK(mode): + return True + # Named pipe / FIFO + if stat.S_ISFIFO(mode): + return True + # Socket. + if stat.S_ISSOCK(mode): + return True + return False + def get_temp_filename(self, filename): suffix = os.extsep + random_file_extension() path = os.path.dirname(filename) name = os.path.basename(filename) temp_filename = name[:self._MAX_FILENAME_LEN - len(suffix)] + suffix return os.path.join(path, temp_filename) - + def allocate(self, filename, size): try: with self.open(filename, 'wb') as f: @@ -317,191 +317,191 @@ class OSUtils(object): raise -class DeferredOpenFile(object): - def __init__(self, filename, start_byte=0, mode='rb', open_function=open): - """A class that defers the opening of a file till needed - - This is useful for deferring opening of a file till it is needed - in a separate thread, as there is a limit of how many open files - there can be in a single thread for most operating systems. The - file gets opened in the following methods: ``read()``, ``seek()``, - and ``__enter__()`` - - :type filename: str - :param filename: The name of the file to open - - :type start_byte: int - :param start_byte: The byte to seek to when the file is opened. - - :type mode: str - :param mode: The mode to use to open the file - - :type open_function: function - :param open_function: The function to use to open the file - """ - self._filename = filename - self._fileobj = None - self._start_byte = start_byte - self._mode = mode - self._open_function = open_function - - def _open_if_needed(self): - if self._fileobj is None: - self._fileobj = self._open_function(self._filename, self._mode) - if self._start_byte != 0: - self._fileobj.seek(self._start_byte) - - @property - def name(self): - return self._filename - - def read(self, amount=None): - self._open_if_needed() - return self._fileobj.read(amount) - - def write(self, data): - self._open_if_needed() - self._fileobj.write(data) - +class DeferredOpenFile(object): + def __init__(self, filename, start_byte=0, mode='rb', open_function=open): + """A class that defers the opening of a file till needed + + This is useful for deferring opening of a file till it is needed + in a separate thread, as there is a limit of how many open files + there can be in a single thread for most operating systems. The + file gets opened in the following methods: ``read()``, ``seek()``, + and ``__enter__()`` + + :type filename: str + :param filename: The name of the file to open + + :type start_byte: int + :param start_byte: The byte to seek to when the file is opened. + + :type mode: str + :param mode: The mode to use to open the file + + :type open_function: function + :param open_function: The function to use to open the file + """ + self._filename = filename + self._fileobj = None + self._start_byte = start_byte + self._mode = mode + self._open_function = open_function + + def _open_if_needed(self): + if self._fileobj is None: + self._fileobj = self._open_function(self._filename, self._mode) + if self._start_byte != 0: + self._fileobj.seek(self._start_byte) + + @property + def name(self): + return self._filename + + def read(self, amount=None): + self._open_if_needed() + return self._fileobj.read(amount) + + def write(self, data): + self._open_if_needed() + self._fileobj.write(data) + def seek(self, where, whence=0): - self._open_if_needed() + self._open_if_needed() self._fileobj.seek(where, whence) - - def tell(self): - if self._fileobj is None: - return self._start_byte - return self._fileobj.tell() - - def close(self): - if self._fileobj: - self._fileobj.close() - - def __enter__(self): - self._open_if_needed() - return self - - def __exit__(self, *args, **kwargs): - self.close() - - -class ReadFileChunk(object): - def __init__(self, fileobj, chunk_size, full_file_size, - callbacks=None, enable_callbacks=True, close_callbacks=None): - """ - - Given a file object shown below:: - - |___________________________________________________| - 0 | | full_file_size - |----chunk_size---| - f.tell() - - :type fileobj: file - :param fileobj: File like object - - :type chunk_size: int - :param chunk_size: The max chunk size to read. Trying to read - pass the end of the chunk size will behave like you've - reached the end of the file. - - :type full_file_size: int - :param full_file_size: The entire content length associated - with ``fileobj``. - - :type callbacks: A list of function(amount_read) - :param callbacks: Called whenever data is read from this object in the - order provided. - - :type enable_callbacks: boolean - :param enable_callbacks: True if to run callbacks. Otherwise, do not - run callbacks - - :type close_callbacks: A list of function() - :param close_callbacks: Called when close is called. The function - should take no arguments. - """ - self._fileobj = fileobj - self._start_byte = self._fileobj.tell() - self._size = self._calculate_file_size( - self._fileobj, requested_size=chunk_size, - start_byte=self._start_byte, actual_file_size=full_file_size) + + def tell(self): + if self._fileobj is None: + return self._start_byte + return self._fileobj.tell() + + def close(self): + if self._fileobj: + self._fileobj.close() + + def __enter__(self): + self._open_if_needed() + return self + + def __exit__(self, *args, **kwargs): + self.close() + + +class ReadFileChunk(object): + def __init__(self, fileobj, chunk_size, full_file_size, + callbacks=None, enable_callbacks=True, close_callbacks=None): + """ + + Given a file object shown below:: + + |___________________________________________________| + 0 | | full_file_size + |----chunk_size---| + f.tell() + + :type fileobj: file + :param fileobj: File like object + + :type chunk_size: int + :param chunk_size: The max chunk size to read. Trying to read + pass the end of the chunk size will behave like you've + reached the end of the file. + + :type full_file_size: int + :param full_file_size: The entire content length associated + with ``fileobj``. + + :type callbacks: A list of function(amount_read) + :param callbacks: Called whenever data is read from this object in the + order provided. + + :type enable_callbacks: boolean + :param enable_callbacks: True if to run callbacks. Otherwise, do not + run callbacks + + :type close_callbacks: A list of function() + :param close_callbacks: Called when close is called. The function + should take no arguments. + """ + self._fileobj = fileobj + self._start_byte = self._fileobj.tell() + self._size = self._calculate_file_size( + self._fileobj, requested_size=chunk_size, + start_byte=self._start_byte, actual_file_size=full_file_size) # _amount_read represents the position in the chunk and may exceed # the chunk size, but won't allow reads out of bounds. - self._amount_read = 0 - self._callbacks = callbacks - if callbacks is None: - self._callbacks = [] - self._callbacks_enabled = enable_callbacks - self._close_callbacks = close_callbacks - if close_callbacks is None: - self._close_callbacks = close_callbacks - - @classmethod - def from_filename(cls, filename, start_byte, chunk_size, callbacks=None, - enable_callbacks=True): - """Convenience factory function to create from a filename. - - :type start_byte: int - :param start_byte: The first byte from which to start reading. - - :type chunk_size: int - :param chunk_size: The max chunk size to read. Trying to read - pass the end of the chunk size will behave like you've - reached the end of the file. - - :type full_file_size: int - :param full_file_size: The entire content length associated - with ``fileobj``. - - :type callbacks: function(amount_read) - :param callbacks: Called whenever data is read from this object. - - :type enable_callbacks: bool - :param enable_callbacks: Indicate whether to invoke callback - during read() calls. - - :rtype: ``ReadFileChunk`` - :return: A new instance of ``ReadFileChunk`` - - """ - f = open(filename, 'rb') - f.seek(start_byte) - file_size = os.fstat(f.fileno()).st_size - return cls(f, chunk_size, file_size, callbacks, enable_callbacks) - - def _calculate_file_size(self, fileobj, requested_size, start_byte, - actual_file_size): - max_chunk_size = actual_file_size - start_byte - return min(max_chunk_size, requested_size) - - def read(self, amount=None): + self._amount_read = 0 + self._callbacks = callbacks + if callbacks is None: + self._callbacks = [] + self._callbacks_enabled = enable_callbacks + self._close_callbacks = close_callbacks + if close_callbacks is None: + self._close_callbacks = close_callbacks + + @classmethod + def from_filename(cls, filename, start_byte, chunk_size, callbacks=None, + enable_callbacks=True): + """Convenience factory function to create from a filename. + + :type start_byte: int + :param start_byte: The first byte from which to start reading. + + :type chunk_size: int + :param chunk_size: The max chunk size to read. Trying to read + pass the end of the chunk size will behave like you've + reached the end of the file. + + :type full_file_size: int + :param full_file_size: The entire content length associated + with ``fileobj``. + + :type callbacks: function(amount_read) + :param callbacks: Called whenever data is read from this object. + + :type enable_callbacks: bool + :param enable_callbacks: Indicate whether to invoke callback + during read() calls. + + :rtype: ``ReadFileChunk`` + :return: A new instance of ``ReadFileChunk`` + + """ + f = open(filename, 'rb') + f.seek(start_byte) + file_size = os.fstat(f.fileno()).st_size + return cls(f, chunk_size, file_size, callbacks, enable_callbacks) + + def _calculate_file_size(self, fileobj, requested_size, start_byte, + actual_file_size): + max_chunk_size = actual_file_size - start_byte + return min(max_chunk_size, requested_size) + + def read(self, amount=None): amount_left = max(self._size - self._amount_read, 0) - if amount is None: + if amount is None: amount_to_read = amount_left - else: + else: amount_to_read = min(amount_left, amount) - data = self._fileobj.read(amount_to_read) - self._amount_read += len(data) - if self._callbacks is not None and self._callbacks_enabled: - invoke_progress_callbacks(self._callbacks, len(data)) - return data - - def signal_transferring(self): - self.enable_callback() - if hasattr(self._fileobj, 'signal_transferring'): - self._fileobj.signal_transferring() - - def signal_not_transferring(self): - self.disable_callback() - if hasattr(self._fileobj, 'signal_not_transferring'): - self._fileobj.signal_not_transferring() - - def enable_callback(self): - self._callbacks_enabled = True - - def disable_callback(self): - self._callbacks_enabled = False - + data = self._fileobj.read(amount_to_read) + self._amount_read += len(data) + if self._callbacks is not None and self._callbacks_enabled: + invoke_progress_callbacks(self._callbacks, len(data)) + return data + + def signal_transferring(self): + self.enable_callback() + if hasattr(self._fileobj, 'signal_transferring'): + self._fileobj.signal_transferring() + + def signal_not_transferring(self): + self.disable_callback() + if hasattr(self._fileobj, 'signal_not_transferring'): + self._fileobj.signal_not_transferring() + + def enable_callback(self): + self._callbacks_enabled = True + + def disable_callback(self): + self._callbacks_enabled = False + def seek(self, where, whence=0): if whence not in (0, 1, 2): # Mimic io's error for invalid whence values @@ -517,247 +517,247 @@ class ReadFileChunk(object): where += self._size self._fileobj.seek(max(where, self._start_byte)) - if self._callbacks is not None and self._callbacks_enabled: - # To also rewind the callback() for an accurate progress report + if self._callbacks is not None and self._callbacks_enabled: + # To also rewind the callback() for an accurate progress report bounded_where = max(min(where - self._start_byte, self._size), 0) bounded_amount_read = min(self._amount_read, self._size) amount = bounded_where - bounded_amount_read - invoke_progress_callbacks( + invoke_progress_callbacks( self._callbacks, bytes_transferred=amount) self._amount_read = max(where - self._start_byte, 0) - - def close(self): - if self._close_callbacks is not None and self._callbacks_enabled: - for callback in self._close_callbacks: - callback() - self._fileobj.close() - - def tell(self): - return self._amount_read - - def __len__(self): - # __len__ is defined because requests will try to determine the length - # of the stream to set a content length. In the normal case - # of the file it will just stat the file, but we need to change that - # behavior. By providing a __len__, requests will use that instead - # of stat'ing the file. - return self._size - - def __enter__(self): - return self - - def __exit__(self, *args, **kwargs): - self.close() - - def __iter__(self): - # This is a workaround for http://bugs.python.org/issue17575 - # Basically httplib will try to iterate over the contents, even - # if its a file like object. This wasn't noticed because we've - # already exhausted the stream so iterating over the file immediately - # stops, which is what we're simulating here. - return iter([]) - - -class StreamReaderProgress(object): - """Wrapper for a read only stream that adds progress callbacks.""" - def __init__(self, stream, callbacks=None): - self._stream = stream - self._callbacks = callbacks - if callbacks is None: - self._callbacks = [] - - def read(self, *args, **kwargs): - value = self._stream.read(*args, **kwargs) - invoke_progress_callbacks(self._callbacks, len(value)) - return value - - -class NoResourcesAvailable(Exception): - pass - - -class TaskSemaphore(object): - def __init__(self, count): - """A semaphore for the purpose of limiting the number of tasks - - :param count: The size of semaphore - """ - self._semaphore = threading.Semaphore(count) - - def acquire(self, tag, blocking=True): - """Acquire the semaphore - - :param tag: A tag identifying what is acquiring the semaphore. Note - that this is not really needed to directly use this class but is - needed for API compatibility with the SlidingWindowSemaphore - implementation. - :param block: If True, block until it can be acquired. If False, - do not block and raise an exception if cannot be aquired. - - :returns: A token (can be None) to use when releasing the semaphore - """ - logger.debug("Acquiring %s", tag) - if not self._semaphore.acquire(blocking): - raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag) - - def release(self, tag, acquire_token): - """Release the semaphore - - :param tag: A tag identifying what is releasing the semaphore - :param acquire_token: The token returned from when the semaphore was - acquired. Note that this is not really needed to directly use this - class but is needed for API compatibility with the - SlidingWindowSemaphore implementation. - """ - logger.debug("Releasing acquire %s/%s" % (tag, acquire_token)) - self._semaphore.release() - - -class SlidingWindowSemaphore(TaskSemaphore): - """A semaphore used to coordinate sequential resource access. - - This class is similar to the stdlib BoundedSemaphore: - - * It's initialized with a count. - * Each call to ``acquire()`` decrements the counter. - * If the count is at zero, then ``acquire()`` will either block until the - count increases, or if ``blocking=False``, then it will raise - a NoResourcesAvailable exception indicating that it failed to acquire the - semaphore. - - The main difference is that this semaphore is used to limit - access to a resource that requires sequential access. For example, - if I want to access resource R that has 20 subresources R_0 - R_19, - this semaphore can also enforce that you only have a max range of - 10 at any given point in time. You must also specify a tag name - when you acquire the semaphore. The sliding window semantics apply - on a per tag basis. The internal count will only be incremented - when the minimum sequence number for a tag is released. - - """ - def __init__(self, count): - self._count = count - # Dict[tag, next_sequence_number]. - self._tag_sequences = defaultdict(int) - self._lowest_sequence = {} - self._lock = threading.Lock() - self._condition = threading.Condition(self._lock) - # Dict[tag, List[sequence_number]] - self._pending_release = {} - - def current_count(self): - with self._lock: - return self._count - - def acquire(self, tag, blocking=True): - logger.debug("Acquiring %s", tag) - self._condition.acquire() - try: - if self._count == 0: - if not blocking: - raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag) - else: - while self._count == 0: - self._condition.wait() - # self._count is no longer zero. - # First, check if this is the first time we're seeing this tag. - sequence_number = self._tag_sequences[tag] - if sequence_number == 0: - # First time seeing the tag, so record we're at 0. - self._lowest_sequence[tag] = sequence_number - self._tag_sequences[tag] += 1 - self._count -= 1 - return sequence_number - finally: - self._condition.release() - - def release(self, tag, acquire_token): - sequence_number = acquire_token - logger.debug("Releasing acquire %s/%s", tag, sequence_number) - self._condition.acquire() - try: - if tag not in self._tag_sequences: - raise ValueError("Attempted to release unknown tag: %s" % tag) - max_sequence = self._tag_sequences[tag] - if self._lowest_sequence[tag] == sequence_number: - # We can immediately process this request and free up - # resources. - self._lowest_sequence[tag] += 1 - self._count += 1 - self._condition.notify() - queued = self._pending_release.get(tag, []) - while queued: - if self._lowest_sequence[tag] == queued[-1]: - queued.pop() - self._lowest_sequence[tag] += 1 - self._count += 1 - else: - break - elif self._lowest_sequence[tag] < sequence_number < max_sequence: - # We can't do anything right now because we're still waiting - # for the min sequence for the tag to be released. We have - # to queue this for pending release. - self._pending_release.setdefault( - tag, []).append(sequence_number) - self._pending_release[tag].sort(reverse=True) - else: - raise ValueError( - "Attempted to release unknown sequence number " - "%s for tag: %s" % (sequence_number, tag)) - finally: - self._condition.release() - - -class ChunksizeAdjuster(object): - def __init__(self, max_size=MAX_SINGLE_UPLOAD_SIZE, - min_size=MIN_UPLOAD_CHUNKSIZE, max_parts=MAX_PARTS): - self.max_size = max_size - self.min_size = min_size - self.max_parts = max_parts - - def adjust_chunksize(self, current_chunksize, file_size=None): - """Get a chunksize close to current that fits within all S3 limits. - - :type current_chunksize: int - :param current_chunksize: The currently configured chunksize. - - :type file_size: int or None - :param file_size: The size of the file to upload. This might be None - if the object being transferred has an unknown size. - - :returns: A valid chunksize that fits within configured limits. - """ - chunksize = current_chunksize - if file_size is not None: - chunksize = self._adjust_for_max_parts(chunksize, file_size) - return self._adjust_for_chunksize_limits(chunksize) - - def _adjust_for_chunksize_limits(self, current_chunksize): - if current_chunksize > self.max_size: - logger.debug( - "Chunksize greater than maximum chunksize. " - "Setting to %s from %s." % (self.max_size, current_chunksize)) - return self.max_size - elif current_chunksize < self.min_size: - logger.debug( - "Chunksize less than minimum chunksize. " - "Setting to %s from %s." % (self.min_size, current_chunksize)) - return self.min_size - else: - return current_chunksize - - def _adjust_for_max_parts(self, current_chunksize, file_size): - chunksize = current_chunksize - num_parts = int(math.ceil(file_size / float(chunksize))) - - while num_parts > self.max_parts: - chunksize *= 2 - num_parts = int(math.ceil(file_size / float(chunksize))) - - if chunksize != current_chunksize: - logger.debug( - "Chunksize would result in the number of parts exceeding the " - "maximum. Setting to %s from %s." % - (chunksize, current_chunksize)) - - return chunksize + + def close(self): + if self._close_callbacks is not None and self._callbacks_enabled: + for callback in self._close_callbacks: + callback() + self._fileobj.close() + + def tell(self): + return self._amount_read + + def __len__(self): + # __len__ is defined because requests will try to determine the length + # of the stream to set a content length. In the normal case + # of the file it will just stat the file, but we need to change that + # behavior. By providing a __len__, requests will use that instead + # of stat'ing the file. + return self._size + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + self.close() + + def __iter__(self): + # This is a workaround for http://bugs.python.org/issue17575 + # Basically httplib will try to iterate over the contents, even + # if its a file like object. This wasn't noticed because we've + # already exhausted the stream so iterating over the file immediately + # stops, which is what we're simulating here. + return iter([]) + + +class StreamReaderProgress(object): + """Wrapper for a read only stream that adds progress callbacks.""" + def __init__(self, stream, callbacks=None): + self._stream = stream + self._callbacks = callbacks + if callbacks is None: + self._callbacks = [] + + def read(self, *args, **kwargs): + value = self._stream.read(*args, **kwargs) + invoke_progress_callbacks(self._callbacks, len(value)) + return value + + +class NoResourcesAvailable(Exception): + pass + + +class TaskSemaphore(object): + def __init__(self, count): + """A semaphore for the purpose of limiting the number of tasks + + :param count: The size of semaphore + """ + self._semaphore = threading.Semaphore(count) + + def acquire(self, tag, blocking=True): + """Acquire the semaphore + + :param tag: A tag identifying what is acquiring the semaphore. Note + that this is not really needed to directly use this class but is + needed for API compatibility with the SlidingWindowSemaphore + implementation. + :param block: If True, block until it can be acquired. If False, + do not block and raise an exception if cannot be aquired. + + :returns: A token (can be None) to use when releasing the semaphore + """ + logger.debug("Acquiring %s", tag) + if not self._semaphore.acquire(blocking): + raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag) + + def release(self, tag, acquire_token): + """Release the semaphore + + :param tag: A tag identifying what is releasing the semaphore + :param acquire_token: The token returned from when the semaphore was + acquired. Note that this is not really needed to directly use this + class but is needed for API compatibility with the + SlidingWindowSemaphore implementation. + """ + logger.debug("Releasing acquire %s/%s" % (tag, acquire_token)) + self._semaphore.release() + + +class SlidingWindowSemaphore(TaskSemaphore): + """A semaphore used to coordinate sequential resource access. + + This class is similar to the stdlib BoundedSemaphore: + + * It's initialized with a count. + * Each call to ``acquire()`` decrements the counter. + * If the count is at zero, then ``acquire()`` will either block until the + count increases, or if ``blocking=False``, then it will raise + a NoResourcesAvailable exception indicating that it failed to acquire the + semaphore. + + The main difference is that this semaphore is used to limit + access to a resource that requires sequential access. For example, + if I want to access resource R that has 20 subresources R_0 - R_19, + this semaphore can also enforce that you only have a max range of + 10 at any given point in time. You must also specify a tag name + when you acquire the semaphore. The sliding window semantics apply + on a per tag basis. The internal count will only be incremented + when the minimum sequence number for a tag is released. + + """ + def __init__(self, count): + self._count = count + # Dict[tag, next_sequence_number]. + self._tag_sequences = defaultdict(int) + self._lowest_sequence = {} + self._lock = threading.Lock() + self._condition = threading.Condition(self._lock) + # Dict[tag, List[sequence_number]] + self._pending_release = {} + + def current_count(self): + with self._lock: + return self._count + + def acquire(self, tag, blocking=True): + logger.debug("Acquiring %s", tag) + self._condition.acquire() + try: + if self._count == 0: + if not blocking: + raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag) + else: + while self._count == 0: + self._condition.wait() + # self._count is no longer zero. + # First, check if this is the first time we're seeing this tag. + sequence_number = self._tag_sequences[tag] + if sequence_number == 0: + # First time seeing the tag, so record we're at 0. + self._lowest_sequence[tag] = sequence_number + self._tag_sequences[tag] += 1 + self._count -= 1 + return sequence_number + finally: + self._condition.release() + + def release(self, tag, acquire_token): + sequence_number = acquire_token + logger.debug("Releasing acquire %s/%s", tag, sequence_number) + self._condition.acquire() + try: + if tag not in self._tag_sequences: + raise ValueError("Attempted to release unknown tag: %s" % tag) + max_sequence = self._tag_sequences[tag] + if self._lowest_sequence[tag] == sequence_number: + # We can immediately process this request and free up + # resources. + self._lowest_sequence[tag] += 1 + self._count += 1 + self._condition.notify() + queued = self._pending_release.get(tag, []) + while queued: + if self._lowest_sequence[tag] == queued[-1]: + queued.pop() + self._lowest_sequence[tag] += 1 + self._count += 1 + else: + break + elif self._lowest_sequence[tag] < sequence_number < max_sequence: + # We can't do anything right now because we're still waiting + # for the min sequence for the tag to be released. We have + # to queue this for pending release. + self._pending_release.setdefault( + tag, []).append(sequence_number) + self._pending_release[tag].sort(reverse=True) + else: + raise ValueError( + "Attempted to release unknown sequence number " + "%s for tag: %s" % (sequence_number, tag)) + finally: + self._condition.release() + + +class ChunksizeAdjuster(object): + def __init__(self, max_size=MAX_SINGLE_UPLOAD_SIZE, + min_size=MIN_UPLOAD_CHUNKSIZE, max_parts=MAX_PARTS): + self.max_size = max_size + self.min_size = min_size + self.max_parts = max_parts + + def adjust_chunksize(self, current_chunksize, file_size=None): + """Get a chunksize close to current that fits within all S3 limits. + + :type current_chunksize: int + :param current_chunksize: The currently configured chunksize. + + :type file_size: int or None + :param file_size: The size of the file to upload. This might be None + if the object being transferred has an unknown size. + + :returns: A valid chunksize that fits within configured limits. + """ + chunksize = current_chunksize + if file_size is not None: + chunksize = self._adjust_for_max_parts(chunksize, file_size) + return self._adjust_for_chunksize_limits(chunksize) + + def _adjust_for_chunksize_limits(self, current_chunksize): + if current_chunksize > self.max_size: + logger.debug( + "Chunksize greater than maximum chunksize. " + "Setting to %s from %s." % (self.max_size, current_chunksize)) + return self.max_size + elif current_chunksize < self.min_size: + logger.debug( + "Chunksize less than minimum chunksize. " + "Setting to %s from %s." % (self.min_size, current_chunksize)) + return self.min_size + else: + return current_chunksize + + def _adjust_for_max_parts(self, current_chunksize, file_size): + chunksize = current_chunksize + num_parts = int(math.ceil(file_size / float(chunksize))) + + while num_parts > self.max_parts: + chunksize *= 2 + num_parts = int(math.ceil(file_size / float(chunksize))) + + if chunksize != current_chunksize: + logger.debug( + "Chunksize would result in the number of parts exceeding the " + "maximum. Setting to %s from %s." % + (chunksize, current_chunksize)) + + return chunksize diff --git a/contrib/python/s3transfer/py2/ya.make b/contrib/python/s3transfer/py2/ya.make index 4a40935092..45eb28f88c 100644 --- a/contrib/python/s3transfer/py2/ya.make +++ b/contrib/python/s3transfer/py2/ya.make @@ -1,51 +1,51 @@ # Generated by devtools/yamaker (pypi). PY2_LIBRARY() - + OWNER(gebetix g:python-contrib) VERSION(0.4.2) - + LICENSE(Apache-2.0) - -PEERDIR( - contrib/python/botocore + +PEERDIR( + contrib/python/botocore contrib/python/futures -) - +) + NO_LINT() NO_CHECK_IMPORTS( s3transfer.crt ) -PY_SRCS( - TOP_LEVEL - s3transfer/__init__.py - s3transfer/bandwidth.py - s3transfer/compat.py +PY_SRCS( + TOP_LEVEL + s3transfer/__init__.py + s3transfer/bandwidth.py + s3transfer/compat.py s3transfer/constants.py s3transfer/copies.py s3transfer/crt.py - s3transfer/delete.py - s3transfer/download.py - s3transfer/exceptions.py - s3transfer/futures.py - s3transfer/manager.py + s3transfer/delete.py + s3transfer/download.py + s3transfer/exceptions.py + s3transfer/futures.py + s3transfer/manager.py s3transfer/processpool.py - s3transfer/subscribers.py - s3transfer/tasks.py - s3transfer/upload.py - s3transfer/utils.py -) - + s3transfer/subscribers.py + s3transfer/tasks.py + s3transfer/upload.py + s3transfer/utils.py +) + RESOURCE_FILES( PREFIX contrib/python/s3transfer/py2/ .dist-info/METADATA .dist-info/top_level.txt ) -END() +END() RECURSE_FOR_TESTS( tests diff --git a/contrib/python/s3transfer/py3/s3transfer/__init__.py b/contrib/python/s3transfer/py3/s3transfer/__init__.py index 15db8ce5fc..1a749c712e 100644 --- a/contrib/python/s3transfer/py3/s3transfer/__init__.py +++ b/contrib/python/s3transfer/py3/s3transfer/__init__.py @@ -1,186 +1,186 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -"""Abstractions over S3's upload/download operations. - -This module provides high level abstractions for efficient -uploads/downloads. It handles several things for the user: - -* Automatically switching to multipart transfers when - a file is over a specific size threshold -* Uploading/downloading a file in parallel -* Throttling based on max bandwidth -* Progress callbacks to monitor transfers -* Retries. While botocore handles retries for streaming uploads, - it is not possible for it to handle retries for streaming - downloads. This module handles retries for both cases so - you don't need to implement any retry logic yourself. - -This module has a reasonable set of defaults. It also allows you -to configure many aspects of the transfer process including: - -* Multipart threshold size -* Max parallel downloads -* Max bandwidth -* Socket timeouts -* Retry amounts - -There is no support for s3->s3 multipart copies at this -time. - - -.. _ref_s3transfer_usage: - -Usage -===== - -The simplest way to use this module is: - -.. code-block:: python - - client = boto3.client('s3', 'us-west-2') - transfer = S3Transfer(client) - # Upload /tmp/myfile to s3://bucket/key - transfer.upload_file('/tmp/myfile', 'bucket', 'key') - - # Download s3://bucket/key to /tmp/myfile - transfer.download_file('bucket', 'key', '/tmp/myfile') - -The ``upload_file`` and ``download_file`` methods also accept -``**kwargs``, which will be forwarded through to the corresponding -client operation. Here are a few examples using ``upload_file``:: - - # Making the object public - transfer.upload_file('/tmp/myfile', 'bucket', 'key', - extra_args={'ACL': 'public-read'}) - - # Setting metadata - transfer.upload_file('/tmp/myfile', 'bucket', 'key', - extra_args={'Metadata': {'a': 'b', 'c': 'd'}}) - - # Setting content type - transfer.upload_file('/tmp/myfile.json', 'bucket', 'key', - extra_args={'ContentType': "application/json"}) - - +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Abstractions over S3's upload/download operations. + +This module provides high level abstractions for efficient +uploads/downloads. It handles several things for the user: + +* Automatically switching to multipart transfers when + a file is over a specific size threshold +* Uploading/downloading a file in parallel +* Throttling based on max bandwidth +* Progress callbacks to monitor transfers +* Retries. While botocore handles retries for streaming uploads, + it is not possible for it to handle retries for streaming + downloads. This module handles retries for both cases so + you don't need to implement any retry logic yourself. + +This module has a reasonable set of defaults. It also allows you +to configure many aspects of the transfer process including: + +* Multipart threshold size +* Max parallel downloads +* Max bandwidth +* Socket timeouts +* Retry amounts + +There is no support for s3->s3 multipart copies at this +time. + + +.. _ref_s3transfer_usage: + +Usage +===== + +The simplest way to use this module is: + +.. code-block:: python + + client = boto3.client('s3', 'us-west-2') + transfer = S3Transfer(client) + # Upload /tmp/myfile to s3://bucket/key + transfer.upload_file('/tmp/myfile', 'bucket', 'key') + + # Download s3://bucket/key to /tmp/myfile + transfer.download_file('bucket', 'key', '/tmp/myfile') + +The ``upload_file`` and ``download_file`` methods also accept +``**kwargs``, which will be forwarded through to the corresponding +client operation. Here are a few examples using ``upload_file``:: + + # Making the object public + transfer.upload_file('/tmp/myfile', 'bucket', 'key', + extra_args={'ACL': 'public-read'}) + + # Setting metadata + transfer.upload_file('/tmp/myfile', 'bucket', 'key', + extra_args={'Metadata': {'a': 'b', 'c': 'd'}}) + + # Setting content type + transfer.upload_file('/tmp/myfile.json', 'bucket', 'key', + extra_args={'ContentType': "application/json"}) + + The ``S3Transfer`` class also supports progress callbacks so you can -provide transfer progress to users. Both the ``upload_file`` and -``download_file`` methods take an optional ``callback`` parameter. -Here's an example of how to print a simple progress percentage -to the user: - -.. code-block:: python - - class ProgressPercentage(object): - def __init__(self, filename): - self._filename = filename - self._size = float(os.path.getsize(filename)) - self._seen_so_far = 0 - self._lock = threading.Lock() - - def __call__(self, bytes_amount): - # To simplify we'll assume this is hooked up - # to a single filename. - with self._lock: - self._seen_so_far += bytes_amount - percentage = (self._seen_so_far / self._size) * 100 - sys.stdout.write( - "\r%s %s / %s (%.2f%%)" % (self._filename, self._seen_so_far, - self._size, percentage)) - sys.stdout.flush() - - - transfer = S3Transfer(boto3.client('s3', 'us-west-2')) - # Upload /tmp/myfile to s3://bucket/key and print upload progress. - transfer.upload_file('/tmp/myfile', 'bucket', 'key', - callback=ProgressPercentage('/tmp/myfile')) - - - -You can also provide a TransferConfig object to the S3Transfer -object that gives you more fine grained control over the -transfer. For example: - -.. code-block:: python - - client = boto3.client('s3', 'us-west-2') - config = TransferConfig( - multipart_threshold=8 * 1024 * 1024, - max_concurrency=10, - num_download_attempts=10, - ) - transfer = S3Transfer(client, config) - transfer.upload_file('/tmp/foo', 'bucket', 'key') - - -""" +provide transfer progress to users. Both the ``upload_file`` and +``download_file`` methods take an optional ``callback`` parameter. +Here's an example of how to print a simple progress percentage +to the user: + +.. code-block:: python + + class ProgressPercentage(object): + def __init__(self, filename): + self._filename = filename + self._size = float(os.path.getsize(filename)) + self._seen_so_far = 0 + self._lock = threading.Lock() + + def __call__(self, bytes_amount): + # To simplify we'll assume this is hooked up + # to a single filename. + with self._lock: + self._seen_so_far += bytes_amount + percentage = (self._seen_so_far / self._size) * 100 + sys.stdout.write( + "\r%s %s / %s (%.2f%%)" % (self._filename, self._seen_so_far, + self._size, percentage)) + sys.stdout.flush() + + + transfer = S3Transfer(boto3.client('s3', 'us-west-2')) + # Upload /tmp/myfile to s3://bucket/key and print upload progress. + transfer.upload_file('/tmp/myfile', 'bucket', 'key', + callback=ProgressPercentage('/tmp/myfile')) + + + +You can also provide a TransferConfig object to the S3Transfer +object that gives you more fine grained control over the +transfer. For example: + +.. code-block:: python + + client = boto3.client('s3', 'us-west-2') + config = TransferConfig( + multipart_threshold=8 * 1024 * 1024, + max_concurrency=10, + num_download_attempts=10, + ) + transfer = S3Transfer(client, config) + transfer.upload_file('/tmp/foo', 'bucket', 'key') + + +""" import concurrent.futures -import functools -import logging +import functools +import logging import math import os import queue import random -import socket +import socket import string -import threading - +import threading + from botocore.compat import six # noqa: F401 -from botocore.exceptions import IncompleteReadError +from botocore.exceptions import IncompleteReadError from botocore.vendored.requests.packages.urllib3.exceptions import ( ReadTimeoutError, ) - -import s3transfer.compat -from s3transfer.exceptions import RetriesExceededError, S3UploadFailedError - -__author__ = 'Amazon Web Services' + +import s3transfer.compat +from s3transfer.exceptions import RetriesExceededError, S3UploadFailedError + +__author__ = 'Amazon Web Services' __version__ = '0.5.1' - - -class NullHandler(logging.Handler): - def emit(self, record): - pass - - -logger = logging.getLogger(__name__) -logger.addHandler(NullHandler()) - -MB = 1024 * 1024 -SHUTDOWN_SENTINEL = object() - - -def random_file_extension(num_digits=8): - return ''.join(random.choice(string.hexdigits) for _ in range(num_digits)) - - -def disable_upload_callbacks(request, operation_name, **kwargs): + + +class NullHandler(logging.Handler): + def emit(self, record): + pass + + +logger = logging.getLogger(__name__) +logger.addHandler(NullHandler()) + +MB = 1024 * 1024 +SHUTDOWN_SENTINEL = object() + + +def random_file_extension(num_digits=8): + return ''.join(random.choice(string.hexdigits) for _ in range(num_digits)) + + +def disable_upload_callbacks(request, operation_name, **kwargs): if operation_name in ['PutObject', 'UploadPart'] and hasattr( request.body, 'disable_callback' ): - request.body.disable_callback() - - -def enable_upload_callbacks(request, operation_name, **kwargs): + request.body.disable_callback() + + +def enable_upload_callbacks(request, operation_name, **kwargs): if operation_name in ['PutObject', 'UploadPart'] and hasattr( request.body, 'enable_callback' ): - request.body.enable_callback() - - -class QueueShutdownError(Exception): - pass - - + request.body.enable_callback() + + +class QueueShutdownError(Exception): + pass + + class ReadFileChunk: def __init__( self, @@ -191,48 +191,48 @@ class ReadFileChunk: callback=None, enable_callback=True, ): - """ - - Given a file object shown below: - - |___________________________________________________| - 0 | | full_file_size - |----chunk_size---| - start_byte - - :type fileobj: file - :param fileobj: File like object - - :type start_byte: int - :param start_byte: The first byte from which to start reading. - - :type chunk_size: int - :param chunk_size: The max chunk size to read. Trying to read - pass the end of the chunk size will behave like you've - reached the end of the file. - - :type full_file_size: int - :param full_file_size: The entire content length associated - with ``fileobj``. - - :type callback: function(amount_read) - :param callback: Called whenever data is read from this object. - - """ - self._fileobj = fileobj - self._start_byte = start_byte - self._size = self._calculate_file_size( + """ + + Given a file object shown below: + + |___________________________________________________| + 0 | | full_file_size + |----chunk_size---| + start_byte + + :type fileobj: file + :param fileobj: File like object + + :type start_byte: int + :param start_byte: The first byte from which to start reading. + + :type chunk_size: int + :param chunk_size: The max chunk size to read. Trying to read + pass the end of the chunk size will behave like you've + reached the end of the file. + + :type full_file_size: int + :param full_file_size: The entire content length associated + with ``fileobj``. + + :type callback: function(amount_read) + :param callback: Called whenever data is read from this object. + + """ + self._fileobj = fileobj + self._start_byte = start_byte + self._size = self._calculate_file_size( self._fileobj, requested_size=chunk_size, start_byte=start_byte, actual_file_size=full_file_size, ) - self._fileobj.seek(self._start_byte) - self._amount_read = 0 - self._callback = callback - self._callback_enabled = enable_callback - - @classmethod + self._fileobj.seek(self._start_byte) + self._amount_read = 0 + self._callback = callback + self._callback_enabled = enable_callback + + @classmethod def from_filename( cls, filename, @@ -241,145 +241,145 @@ class ReadFileChunk: callback=None, enable_callback=True, ): - """Convenience factory function to create from a filename. - - :type start_byte: int - :param start_byte: The first byte from which to start reading. - - :type chunk_size: int - :param chunk_size: The max chunk size to read. Trying to read - pass the end of the chunk size will behave like you've - reached the end of the file. - - :type full_file_size: int - :param full_file_size: The entire content length associated - with ``fileobj``. - - :type callback: function(amount_read) - :param callback: Called whenever data is read from this object. - - :type enable_callback: bool - :param enable_callback: Indicate whether to invoke callback - during read() calls. - - :rtype: ``ReadFileChunk`` - :return: A new instance of ``ReadFileChunk`` - - """ - f = open(filename, 'rb') - file_size = os.fstat(f.fileno()).st_size + """Convenience factory function to create from a filename. + + :type start_byte: int + :param start_byte: The first byte from which to start reading. + + :type chunk_size: int + :param chunk_size: The max chunk size to read. Trying to read + pass the end of the chunk size will behave like you've + reached the end of the file. + + :type full_file_size: int + :param full_file_size: The entire content length associated + with ``fileobj``. + + :type callback: function(amount_read) + :param callback: Called whenever data is read from this object. + + :type enable_callback: bool + :param enable_callback: Indicate whether to invoke callback + during read() calls. + + :rtype: ``ReadFileChunk`` + :return: A new instance of ``ReadFileChunk`` + + """ + f = open(filename, 'rb') + file_size = os.fstat(f.fileno()).st_size return cls( f, start_byte, chunk_size, file_size, callback, enable_callback ) - + def _calculate_file_size( self, fileobj, requested_size, start_byte, actual_file_size ): - max_chunk_size = actual_file_size - start_byte - return min(max_chunk_size, requested_size) - - def read(self, amount=None): - if amount is None: - amount_to_read = self._size - self._amount_read - else: - amount_to_read = min(self._size - self._amount_read, amount) - data = self._fileobj.read(amount_to_read) - self._amount_read += len(data) - if self._callback is not None and self._callback_enabled: - self._callback(len(data)) - return data - - def enable_callback(self): - self._callback_enabled = True - - def disable_callback(self): - self._callback_enabled = False - - def seek(self, where): - self._fileobj.seek(self._start_byte + where) - if self._callback is not None and self._callback_enabled: - # To also rewind the callback() for an accurate progress report - self._callback(where - self._amount_read) - self._amount_read = where - - def close(self): - self._fileobj.close() - - def tell(self): - return self._amount_read - - def __len__(self): - # __len__ is defined because requests will try to determine the length - # of the stream to set a content length. In the normal case - # of the file it will just stat the file, but we need to change that - # behavior. By providing a __len__, requests will use that instead - # of stat'ing the file. - return self._size - - def __enter__(self): - return self - - def __exit__(self, *args, **kwargs): - self.close() - - def __iter__(self): - # This is a workaround for http://bugs.python.org/issue17575 - # Basically httplib will try to iterate over the contents, even - # if its a file like object. This wasn't noticed because we've - # already exhausted the stream so iterating over the file immediately - # stops, which is what we're simulating here. - return iter([]) - - + max_chunk_size = actual_file_size - start_byte + return min(max_chunk_size, requested_size) + + def read(self, amount=None): + if amount is None: + amount_to_read = self._size - self._amount_read + else: + amount_to_read = min(self._size - self._amount_read, amount) + data = self._fileobj.read(amount_to_read) + self._amount_read += len(data) + if self._callback is not None and self._callback_enabled: + self._callback(len(data)) + return data + + def enable_callback(self): + self._callback_enabled = True + + def disable_callback(self): + self._callback_enabled = False + + def seek(self, where): + self._fileobj.seek(self._start_byte + where) + if self._callback is not None and self._callback_enabled: + # To also rewind the callback() for an accurate progress report + self._callback(where - self._amount_read) + self._amount_read = where + + def close(self): + self._fileobj.close() + + def tell(self): + return self._amount_read + + def __len__(self): + # __len__ is defined because requests will try to determine the length + # of the stream to set a content length. In the normal case + # of the file it will just stat the file, but we need to change that + # behavior. By providing a __len__, requests will use that instead + # of stat'ing the file. + return self._size + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + self.close() + + def __iter__(self): + # This is a workaround for http://bugs.python.org/issue17575 + # Basically httplib will try to iterate over the contents, even + # if its a file like object. This wasn't noticed because we've + # already exhausted the stream so iterating over the file immediately + # stops, which is what we're simulating here. + return iter([]) + + class StreamReaderProgress: - """Wrapper for a read only stream that adds progress callbacks.""" - - def __init__(self, stream, callback=None): - self._stream = stream - self._callback = callback - - def read(self, *args, **kwargs): - value = self._stream.read(*args, **kwargs) - if self._callback is not None: - self._callback(len(value)) - return value - - + """Wrapper for a read only stream that adds progress callbacks.""" + + def __init__(self, stream, callback=None): + self._stream = stream + self._callback = callback + + def read(self, *args, **kwargs): + value = self._stream.read(*args, **kwargs) + if self._callback is not None: + self._callback(len(value)) + return value + + class OSUtils: - def get_file_size(self, filename): - return os.path.getsize(filename) - - def open_file_chunk_reader(self, filename, start_byte, size, callback): + def get_file_size(self, filename): + return os.path.getsize(filename) + + def open_file_chunk_reader(self, filename, start_byte, size, callback): return ReadFileChunk.from_filename( filename, start_byte, size, callback, enable_callback=False ) - - def open(self, filename, mode): - return open(filename, mode) - - def remove_file(self, filename): - """Remove a file, noop if file does not exist.""" - # Unlike os.remove, if the file does not exist, - # then this method does nothing. - try: - os.remove(filename) - except OSError: - pass - - def rename_file(self, current_filename, new_filename): - s3transfer.compat.rename_file(current_filename, new_filename) - - + + def open(self, filename, mode): + return open(filename, mode) + + def remove_file(self, filename): + """Remove a file, noop if file does not exist.""" + # Unlike os.remove, if the file does not exist, + # then this method does nothing. + try: + os.remove(filename) + except OSError: + pass + + def rename_file(self, current_filename, new_filename): + s3transfer.compat.rename_file(current_filename, new_filename) + + class MultipartUploader: - # These are the extra_args that need to be forwarded onto - # subsequent upload_parts. - UPLOAD_PART_ARGS = [ - 'SSECustomerKey', - 'SSECustomerAlgorithm', - 'SSECustomerKeyMD5', - 'RequestPayer', - ] - + # These are the extra_args that need to be forwarded onto + # subsequent upload_parts. + UPLOAD_PART_ARGS = [ + 'SSECustomerKey', + 'SSECustomerAlgorithm', + 'SSECustomerKeyMD5', + 'RequestPayer', + ] + def __init__( self, client, @@ -387,62 +387,62 @@ class MultipartUploader: osutil, executor_cls=concurrent.futures.ThreadPoolExecutor, ): - self._client = client - self._config = config - self._os = osutil - self._executor_cls = executor_cls - - def _extra_upload_part_args(self, extra_args): - # Only the args in UPLOAD_PART_ARGS actually need to be passed - # onto the upload_part calls. - upload_parts_args = {} - for key, value in extra_args.items(): - if key in self.UPLOAD_PART_ARGS: - upload_parts_args[key] = value - return upload_parts_args - - def upload_file(self, filename, bucket, key, callback, extra_args): + self._client = client + self._config = config + self._os = osutil + self._executor_cls = executor_cls + + def _extra_upload_part_args(self, extra_args): + # Only the args in UPLOAD_PART_ARGS actually need to be passed + # onto the upload_part calls. + upload_parts_args = {} + for key, value in extra_args.items(): + if key in self.UPLOAD_PART_ARGS: + upload_parts_args[key] = value + return upload_parts_args + + def upload_file(self, filename, bucket, key, callback, extra_args): response = self._client.create_multipart_upload( Bucket=bucket, Key=key, **extra_args ) - upload_id = response['UploadId'] - try: + upload_id = response['UploadId'] + try: parts = self._upload_parts( upload_id, filename, bucket, key, callback, extra_args ) - except Exception as e: + except Exception as e: logger.debug( "Exception raised while uploading parts, " "aborting multipart upload.", exc_info=True, ) - self._client.abort_multipart_upload( + self._client.abort_multipart_upload( Bucket=bucket, Key=key, UploadId=upload_id ) - raise S3UploadFailedError( + raise S3UploadFailedError( "Failed to upload {} to {}: {}".format( filename, '/'.join([bucket, key]), e ) ) - self._client.complete_multipart_upload( + self._client.complete_multipart_upload( Bucket=bucket, Key=key, UploadId=upload_id, MultipartUpload={'Parts': parts}, ) - + def _upload_parts( self, upload_id, filename, bucket, key, callback, extra_args ): - upload_parts_extra_args = self._extra_upload_part_args(extra_args) - parts = [] - part_size = self._config.multipart_chunksize - num_parts = int( + upload_parts_extra_args = self._extra_upload_part_args(extra_args) + parts = [] + part_size = self._config.multipart_chunksize + num_parts = int( math.ceil(self._os.get_file_size(filename) / float(part_size)) ) - max_workers = self._config.max_concurrency - with self._executor_cls(max_workers=max_workers) as executor: - upload_partial = functools.partial( + max_workers = self._config.max_concurrency + with self._executor_cls(max_workers=max_workers) as executor: + upload_partial = functools.partial( self._upload_one_part, filename, bucket, @@ -452,10 +452,10 @@ class MultipartUploader: upload_parts_extra_args, callback, ) - for part in executor.map(upload_partial, range(1, num_parts + 1)): - parts.append(part) - return parts - + for part in executor.map(upload_partial, range(1, num_parts + 1)): + parts.append(part) + return parts + def _upload_one_part( self, filename, @@ -467,11 +467,11 @@ class MultipartUploader: callback, part_number, ): - open_chunk_reader = self._os.open_file_chunk_reader + open_chunk_reader = self._os.open_file_chunk_reader with open_chunk_reader( filename, part_size * (part_number - 1), part_size, callback ) as body: - response = self._client.upload_part( + response = self._client.upload_part( Bucket=bucket, Key=key, UploadId=upload_id, @@ -479,44 +479,44 @@ class MultipartUploader: Body=body, **extra_args, ) - etag = response['ETag'] - return {'ETag': etag, 'PartNumber': part_number} - - -class ShutdownQueue(queue.Queue): - """A queue implementation that can be shutdown. - - Shutting down a queue means that this class adds a - trigger_shutdown method that will trigger all subsequent - calls to put() to fail with a ``QueueShutdownError``. - - It purposefully deviates from queue.Queue, and is *not* meant - to be a drop in replacement for ``queue.Queue``. - - """ - - def _init(self, maxsize): - self._shutdown = False - self._shutdown_lock = threading.Lock() - # queue.Queue is an old style class so we don't use super(). - return queue.Queue._init(self, maxsize) - - def trigger_shutdown(self): - with self._shutdown_lock: - self._shutdown = True - logger.debug("The IO queue is now shutdown.") - - def put(self, item): - # Note: this is not sufficient, it's still possible to deadlock! - # Need to hook into the condition vars used by this class. - with self._shutdown_lock: - if self._shutdown: + etag = response['ETag'] + return {'ETag': etag, 'PartNumber': part_number} + + +class ShutdownQueue(queue.Queue): + """A queue implementation that can be shutdown. + + Shutting down a queue means that this class adds a + trigger_shutdown method that will trigger all subsequent + calls to put() to fail with a ``QueueShutdownError``. + + It purposefully deviates from queue.Queue, and is *not* meant + to be a drop in replacement for ``queue.Queue``. + + """ + + def _init(self, maxsize): + self._shutdown = False + self._shutdown_lock = threading.Lock() + # queue.Queue is an old style class so we don't use super(). + return queue.Queue._init(self, maxsize) + + def trigger_shutdown(self): + with self._shutdown_lock: + self._shutdown = True + logger.debug("The IO queue is now shutdown.") + + def put(self, item): + # Note: this is not sufficient, it's still possible to deadlock! + # Need to hook into the condition vars used by this class. + with self._shutdown_lock: + if self._shutdown: raise QueueShutdownError( "Cannot put item to queue when " "queue has been shutdown." ) - return queue.Queue.put(self, item) - - + return queue.Queue.put(self, item) + + class MultipartDownloader: def __init__( self, @@ -525,50 +525,50 @@ class MultipartDownloader: osutil, executor_cls=concurrent.futures.ThreadPoolExecutor, ): - self._client = client - self._config = config - self._os = osutil - self._executor_cls = executor_cls - self._ioqueue = ShutdownQueue(self._config.max_io_queue) - + self._client = client + self._config = config + self._os = osutil + self._executor_cls = executor_cls + self._ioqueue = ShutdownQueue(self._config.max_io_queue) + def download_file( self, bucket, key, filename, object_size, extra_args, callback=None ): - with self._executor_cls(max_workers=2) as controller: - # 1 thread for the future that manages the uploading of files - # 1 thread for the future that manages IO writes. - download_parts_handler = functools.partial( - self._download_file_as_future, + with self._executor_cls(max_workers=2) as controller: + # 1 thread for the future that manages the uploading of files + # 1 thread for the future that manages IO writes. + download_parts_handler = functools.partial( + self._download_file_as_future, bucket, key, filename, object_size, callback, ) - parts_future = controller.submit(download_parts_handler) - - io_writes_handler = functools.partial( + parts_future = controller.submit(download_parts_handler) + + io_writes_handler = functools.partial( self._perform_io_writes, filename ) - io_future = controller.submit(io_writes_handler) - results = concurrent.futures.wait( - [parts_future, io_future], + io_future = controller.submit(io_writes_handler) + results = concurrent.futures.wait( + [parts_future, io_future], return_when=concurrent.futures.FIRST_EXCEPTION, ) - self._process_future_results(results) - - def _process_future_results(self, futures): - finished, unfinished = futures - for future in finished: - future.result() - + self._process_future_results(results) + + def _process_future_results(self, futures): + finished, unfinished = futures + for future in finished: + future.result() + def _download_file_as_future( self, bucket, key, filename, object_size, callback ): - part_size = self._config.multipart_chunksize - num_parts = int(math.ceil(object_size / float(part_size))) - max_workers = self._config.max_concurrency - download_partial = functools.partial( + part_size = self._config.multipart_chunksize + num_parts = int(math.ceil(object_size / float(part_size))) + max_workers = self._config.max_concurrency + download_partial = functools.partial( self._download_range, bucket, key, @@ -577,48 +577,48 @@ class MultipartDownloader: num_parts, callback, ) - try: - with self._executor_cls(max_workers=max_workers) as executor: - list(executor.map(download_partial, range(num_parts))) - finally: - self._ioqueue.put(SHUTDOWN_SENTINEL) - - def _calculate_range_param(self, part_size, part_index, num_parts): - start_range = part_index * part_size - if part_index == num_parts - 1: - end_range = '' - else: - end_range = start_range + part_size - 1 + try: + with self._executor_cls(max_workers=max_workers) as executor: + list(executor.map(download_partial, range(num_parts))) + finally: + self._ioqueue.put(SHUTDOWN_SENTINEL) + + def _calculate_range_param(self, part_size, part_index, num_parts): + start_range = part_index * part_size + if part_index == num_parts - 1: + end_range = '' + else: + end_range = start_range + part_size - 1 range_param = f'bytes={start_range}-{end_range}' - return range_param - + return range_param + def _download_range( self, bucket, key, filename, part_size, num_parts, callback, part_index ): - try: - range_param = self._calculate_range_param( + try: + range_param = self._calculate_range_param( part_size, part_index, num_parts ) - - max_attempts = self._config.num_download_attempts - last_exception = None - for i in range(max_attempts): - try: - logger.debug("Making get_object call.") - response = self._client.get_object( + + max_attempts = self._config.num_download_attempts + last_exception = None + for i in range(max_attempts): + try: + logger.debug("Making get_object call.") + response = self._client.get_object( Bucket=bucket, Key=key, Range=range_param ) - streaming_body = StreamReaderProgress( + streaming_body = StreamReaderProgress( response['Body'], callback ) - buffer_size = 1024 * 16 - current_index = part_size * part_index + buffer_size = 1024 * 16 + current_index = part_size * part_index for chunk in iter( lambda: streaming_body.read(buffer_size), b'' ): - self._ioqueue.put((current_index, chunk)) - current_index += len(chunk) - return + self._ioqueue.put((current_index, chunk)) + current_index += len(chunk) + return except ( socket.timeout, OSError, @@ -633,37 +633,37 @@ class MultipartDownloader: max_attempts, exc_info=True, ) - last_exception = e - continue - raise RetriesExceededError(last_exception) - finally: - logger.debug("EXITING _download_range for part: %s", part_index) - - def _perform_io_writes(self, filename): - with self._os.open(filename, 'wb') as f: - while True: - task = self._ioqueue.get() - if task is SHUTDOWN_SENTINEL: + last_exception = e + continue + raise RetriesExceededError(last_exception) + finally: + logger.debug("EXITING _download_range for part: %s", part_index) + + def _perform_io_writes(self, filename): + with self._os.open(filename, 'wb') as f: + while True: + task = self._ioqueue.get() + if task is SHUTDOWN_SENTINEL: logger.debug( "Shutdown sentinel received in IO handler, " "shutting down IO handler." ) - return - else: - try: - offset, data = task - f.seek(offset) - f.write(data) - except Exception as e: + return + else: + try: + offset, data = task + f.seek(offset) + f.write(data) + except Exception as e: logger.debug( "Caught exception in IO thread: %s", e, exc_info=True, ) - self._ioqueue.trigger_shutdown() - raise - - + self._ioqueue.trigger_shutdown() + raise + + class TransferConfig: def __init__( self, @@ -673,68 +673,68 @@ class TransferConfig: num_download_attempts=5, max_io_queue=100, ): - self.multipart_threshold = multipart_threshold - self.max_concurrency = max_concurrency - self.multipart_chunksize = multipart_chunksize - self.num_download_attempts = num_download_attempts - self.max_io_queue = max_io_queue - - + self.multipart_threshold = multipart_threshold + self.max_concurrency = max_concurrency + self.multipart_chunksize = multipart_chunksize + self.num_download_attempts = num_download_attempts + self.max_io_queue = max_io_queue + + class S3Transfer: - - ALLOWED_DOWNLOAD_ARGS = [ - 'VersionId', - 'SSECustomerAlgorithm', - 'SSECustomerKey', - 'SSECustomerKeyMD5', - 'RequestPayer', - ] - - ALLOWED_UPLOAD_ARGS = [ - 'ACL', - 'CacheControl', - 'ContentDisposition', - 'ContentEncoding', - 'ContentLanguage', - 'ContentType', - 'Expires', - 'GrantFullControl', - 'GrantRead', - 'GrantReadACP', - 'GrantWriteACL', - 'Metadata', - 'RequestPayer', - 'ServerSideEncryption', - 'StorageClass', - 'SSECustomerAlgorithm', - 'SSECustomerKey', - 'SSECustomerKeyMD5', - 'SSEKMSKeyId', + + ALLOWED_DOWNLOAD_ARGS = [ + 'VersionId', + 'SSECustomerAlgorithm', + 'SSECustomerKey', + 'SSECustomerKeyMD5', + 'RequestPayer', + ] + + ALLOWED_UPLOAD_ARGS = [ + 'ACL', + 'CacheControl', + 'ContentDisposition', + 'ContentEncoding', + 'ContentLanguage', + 'ContentType', + 'Expires', + 'GrantFullControl', + 'GrantRead', + 'GrantReadACP', + 'GrantWriteACL', + 'Metadata', + 'RequestPayer', + 'ServerSideEncryption', + 'StorageClass', + 'SSECustomerAlgorithm', + 'SSECustomerKey', + 'SSECustomerKeyMD5', + 'SSEKMSKeyId', 'SSEKMSEncryptionContext', 'Tagging', - ] - - def __init__(self, client, config=None, osutil=None): - self._client = client - if config is None: - config = TransferConfig() - self._config = config - if osutil is None: - osutil = OSUtils() - self._osutil = osutil - + ] + + def __init__(self, client, config=None, osutil=None): + self._client = client + if config is None: + config = TransferConfig() + self._config = config + if osutil is None: + osutil = OSUtils() + self._osutil = osutil + def upload_file( self, filename, bucket, key, callback=None, extra_args=None ): - """Upload a file to an S3 object. - - Variants have also been injected into S3 client, Bucket and Object. - You don't have to use S3Transfer.upload_file() directly. - """ - if extra_args is None: - extra_args = {} - self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS) - events = self._client.meta.events + """Upload a file to an S3 object. + + Variants have also been injected into S3 client, Bucket and Object. + You don't have to use S3Transfer.upload_file() directly. + """ + if extra_args is None: + extra_args = {} + self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS) + events = self._client.meta.events events.register_first( 'request-created.s3', disable_upload_callbacks, @@ -749,14 +749,14 @@ class S3Transfer: self._osutil.get_file_size(filename) >= self._config.multipart_threshold ): - self._multipart_upload(filename, bucket, key, callback, extra_args) - else: - self._put_object(filename, bucket, key, callback, extra_args) - - def _put_object(self, filename, bucket, key, callback, extra_args): - # We're using open_file_chunk_reader so we can take advantage of the - # progress callback functionality. - open_chunk_reader = self._osutil.open_file_chunk_reader + self._multipart_upload(filename, bucket, key, callback, extra_args) + else: + self._put_object(filename, bucket, key, callback, extra_args) + + def _put_object(self, filename, bucket, key, callback, extra_args): + # We're using open_file_chunk_reader so we can take advantage of the + # progress callback functionality. + open_chunk_reader = self._osutil.open_file_chunk_reader with open_chunk_reader( filename, 0, @@ -766,57 +766,57 @@ class S3Transfer: self._client.put_object( Bucket=bucket, Key=key, Body=body, **extra_args ) - + def download_file( self, bucket, key, filename, extra_args=None, callback=None ): - """Download an S3 object to a file. - - Variants have also been injected into S3 client, Bucket and Object. - You don't have to use S3Transfer.download_file() directly. - """ - # This method will issue a ``head_object`` request to determine - # the size of the S3 object. This is used to determine if the - # object is downloaded in parallel. - if extra_args is None: - extra_args = {} - self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS) - object_size = self._object_size(bucket, key, extra_args) - temp_filename = filename + os.extsep + random_file_extension() - try: + """Download an S3 object to a file. + + Variants have also been injected into S3 client, Bucket and Object. + You don't have to use S3Transfer.download_file() directly. + """ + # This method will issue a ``head_object`` request to determine + # the size of the S3 object. This is used to determine if the + # object is downloaded in parallel. + if extra_args is None: + extra_args = {} + self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS) + object_size = self._object_size(bucket, key, extra_args) + temp_filename = filename + os.extsep + random_file_extension() + try: self._download_file( bucket, key, temp_filename, object_size, extra_args, callback ) - except Exception: + except Exception: logger.debug( "Exception caught in download_file, removing partial " "file: %s", temp_filename, exc_info=True, ) - self._osutil.remove_file(temp_filename) - raise - else: - self._osutil.rename_file(temp_filename, filename) - + self._osutil.remove_file(temp_filename) + raise + else: + self._osutil.rename_file(temp_filename, filename) + def _download_file( self, bucket, key, filename, object_size, extra_args, callback ): - if object_size >= self._config.multipart_threshold: + if object_size >= self._config.multipart_threshold: self._ranged_download( bucket, key, filename, object_size, extra_args, callback ) - else: - self._get_object(bucket, key, filename, extra_args, callback) - - def _validate_all_known_args(self, actual, allowed): - for kwarg in actual: - if kwarg not in allowed: - raise ValueError( - "Invalid extra_args key '%s', " + else: + self._get_object(bucket, key, filename, extra_args, callback) + + def _validate_all_known_args(self, actual, allowed): + for kwarg in actual: + if kwarg not in allowed: + raise ValueError( + "Invalid extra_args key '%s', " "must be one of: %s" % (kwarg, ', '.join(allowed)) ) - + def _ranged_download( self, bucket, key, filename, object_size, extra_args, callback ): @@ -826,13 +826,13 @@ class S3Transfer: downloader.download_file( bucket, key, filename, object_size, extra_args, callback ) - - def _get_object(self, bucket, key, filename, extra_args, callback): - # precondition: num_download_attempts > 0 - max_attempts = self._config.num_download_attempts - last_exception = None - for i in range(max_attempts): - try: + + def _get_object(self, bucket, key, filename, extra_args, callback): + # precondition: num_download_attempts > 0 + max_attempts = self._config.num_download_attempts + last_exception = None + for i in range(max_attempts): + try: return self._do_get_object( bucket, key, filename, extra_args, callback ) @@ -842,8 +842,8 @@ class S3Transfer: ReadTimeoutError, IncompleteReadError, ) as e: - # TODO: we need a way to reset the callback if the - # download failed. + # TODO: we need a way to reset the callback if the + # download failed. logger.debug( "Retrying exception caught (%s), " "retrying request, (attempt %s / %s)", @@ -852,24 +852,24 @@ class S3Transfer: max_attempts, exc_info=True, ) - last_exception = e - continue - raise RetriesExceededError(last_exception) - - def _do_get_object(self, bucket, key, filename, extra_args, callback): + last_exception = e + continue + raise RetriesExceededError(last_exception) + + def _do_get_object(self, bucket, key, filename, extra_args, callback): response = self._client.get_object( Bucket=bucket, Key=key, **extra_args ) streaming_body = StreamReaderProgress(response['Body'], callback) - with self._osutil.open(filename, 'wb') as f: - for chunk in iter(lambda: streaming_body.read(8192), b''): - f.write(chunk) - - def _object_size(self, bucket, key, extra_args): + with self._osutil.open(filename, 'wb') as f: + for chunk in iter(lambda: streaming_body.read(8192), b''): + f.write(chunk) + + def _object_size(self, bucket, key, extra_args): return self._client.head_object(Bucket=bucket, Key=key, **extra_args)[ 'ContentLength' ] - - def _multipart_upload(self, filename, bucket, key, callback, extra_args): - uploader = MultipartUploader(self._client, self._config, self._osutil) - uploader.upload_file(filename, bucket, key, callback, extra_args) + + def _multipart_upload(self, filename, bucket, key, callback, extra_args): + uploader = MultipartUploader(self._client, self._config, self._osutil) + uploader.upload_file(filename, bucket, key, callback, extra_args) diff --git a/contrib/python/s3transfer/py3/s3transfer/bandwidth.py b/contrib/python/s3transfer/py3/s3transfer/bandwidth.py index e3ff9c2717..9bac5885e1 100644 --- a/contrib/python/s3transfer/py3/s3transfer/bandwidth.py +++ b/contrib/python/s3transfer/py3/s3transfer/bandwidth.py @@ -1,103 +1,103 @@ -# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. +# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. import threading -import time - - -class RequestExceededException(Exception): - def __init__(self, requested_amt, retry_time): - """Error when requested amount exceeds what is allowed - - The request that raised this error should be retried after waiting - the time specified by ``retry_time``. - - :type requested_amt: int - :param requested_amt: The originally requested byte amount - - :type retry_time: float - :param retry_time: The length in time to wait to retry for the - requested amount - """ - self.requested_amt = requested_amt - self.retry_time = retry_time +import time + + +class RequestExceededException(Exception): + def __init__(self, requested_amt, retry_time): + """Error when requested amount exceeds what is allowed + + The request that raised this error should be retried after waiting + the time specified by ``retry_time``. + + :type requested_amt: int + :param requested_amt: The originally requested byte amount + + :type retry_time: float + :param retry_time: The length in time to wait to retry for the + requested amount + """ + self.requested_amt = requested_amt + self.retry_time = retry_time msg = 'Request amount {} exceeded the amount available. Retry in {}'.format( requested_amt, retry_time - ) + ) super().__init__(msg) - - + + class RequestToken: - """A token to pass as an identifier when consuming from the LeakyBucket""" + """A token to pass as an identifier when consuming from the LeakyBucket""" + + pass + - pass - - class TimeUtils: - def time(self): - """Get the current time back - - :rtype: float - :returns: The current time in seconds - """ - return time.time() - - def sleep(self, value): - """Sleep for a designated time - - :type value: float - :param value: The time to sleep for in seconds - """ - return time.sleep(value) - - + def time(self): + """Get the current time back + + :rtype: float + :returns: The current time in seconds + """ + return time.time() + + def sleep(self, value): + """Sleep for a designated time + + :type value: float + :param value: The time to sleep for in seconds + """ + return time.sleep(value) + + class BandwidthLimiter: - def __init__(self, leaky_bucket, time_utils=None): - """Limits bandwidth for shared S3 transfers - - :type leaky_bucket: LeakyBucket - :param leaky_bucket: The leaky bucket to use limit bandwidth - - :type time_utils: TimeUtils - :param time_utils: Time utility to use for interacting with time. - """ - self._leaky_bucket = leaky_bucket - self._time_utils = time_utils - if time_utils is None: - self._time_utils = TimeUtils() - + def __init__(self, leaky_bucket, time_utils=None): + """Limits bandwidth for shared S3 transfers + + :type leaky_bucket: LeakyBucket + :param leaky_bucket: The leaky bucket to use limit bandwidth + + :type time_utils: TimeUtils + :param time_utils: Time utility to use for interacting with time. + """ + self._leaky_bucket = leaky_bucket + self._time_utils = time_utils + if time_utils is None: + self._time_utils = TimeUtils() + def get_bandwith_limited_stream( self, fileobj, transfer_coordinator, enabled=True ): - """Wraps a fileobj in a bandwidth limited stream wrapper - - :type fileobj: file-like obj - :param fileobj: The file-like obj to wrap - - :type transfer_coordinator: s3transfer.futures.TransferCoordinator - param transfer_coordinator: The coordinator for the general transfer - that the wrapped stream is a part of - - :type enabled: boolean - :param enabled: Whether bandwidth limiting should be enabled to start - """ - stream = BandwidthLimitedStream( + """Wraps a fileobj in a bandwidth limited stream wrapper + + :type fileobj: file-like obj + :param fileobj: The file-like obj to wrap + + :type transfer_coordinator: s3transfer.futures.TransferCoordinator + param transfer_coordinator: The coordinator for the general transfer + that the wrapped stream is a part of + + :type enabled: boolean + :param enabled: Whether bandwidth limiting should be enabled to start + """ + stream = BandwidthLimitedStream( fileobj, self._leaky_bucket, transfer_coordinator, self._time_utils ) - if not enabled: - stream.disable_bandwidth_limiting() - return stream - - + if not enabled: + stream.disable_bandwidth_limiting() + return stream + + class BandwidthLimitedStream: def __init__( self, @@ -107,110 +107,110 @@ class BandwidthLimitedStream: time_utils=None, bytes_threshold=256 * 1024, ): - """Limits bandwidth for reads on a wrapped stream - - :type fileobj: file-like object - :param fileobj: The file like object to wrap - - :type leaky_bucket: LeakyBucket - :param leaky_bucket: The leaky bucket to use to throttle reads on - the stream - - :type transfer_coordinator: s3transfer.futures.TransferCoordinator - param transfer_coordinator: The coordinator for the general transfer - that the wrapped stream is a part of - - :type time_utils: TimeUtils - :param time_utils: The time utility to use for interacting with time - """ - self._fileobj = fileobj - self._leaky_bucket = leaky_bucket - self._transfer_coordinator = transfer_coordinator - self._time_utils = time_utils - if time_utils is None: - self._time_utils = TimeUtils() - self._bandwidth_limiting_enabled = True - self._request_token = RequestToken() - self._bytes_seen = 0 - self._bytes_threshold = bytes_threshold - - def enable_bandwidth_limiting(self): - """Enable bandwidth limiting on reads to the stream""" - self._bandwidth_limiting_enabled = True - - def disable_bandwidth_limiting(self): - """Disable bandwidth limiting on reads to the stream""" - self._bandwidth_limiting_enabled = False - - def read(self, amount): - """Read a specified amount - - Reads will only be throttled if bandwidth limiting is enabled. - """ - if not self._bandwidth_limiting_enabled: - return self._fileobj.read(amount) - - # We do not want to be calling consume on every read as the read - # amounts can be small causing the lock of the leaky bucket to - # introduce noticeable overhead. So instead we keep track of - # how many bytes we have seen and only call consume once we pass a - # certain threshold. - self._bytes_seen += amount - if self._bytes_seen < self._bytes_threshold: - return self._fileobj.read(amount) - - self._consume_through_leaky_bucket() - return self._fileobj.read(amount) - - def _consume_through_leaky_bucket(self): + """Limits bandwidth for reads on a wrapped stream + + :type fileobj: file-like object + :param fileobj: The file like object to wrap + + :type leaky_bucket: LeakyBucket + :param leaky_bucket: The leaky bucket to use to throttle reads on + the stream + + :type transfer_coordinator: s3transfer.futures.TransferCoordinator + param transfer_coordinator: The coordinator for the general transfer + that the wrapped stream is a part of + + :type time_utils: TimeUtils + :param time_utils: The time utility to use for interacting with time + """ + self._fileobj = fileobj + self._leaky_bucket = leaky_bucket + self._transfer_coordinator = transfer_coordinator + self._time_utils = time_utils + if time_utils is None: + self._time_utils = TimeUtils() + self._bandwidth_limiting_enabled = True + self._request_token = RequestToken() + self._bytes_seen = 0 + self._bytes_threshold = bytes_threshold + + def enable_bandwidth_limiting(self): + """Enable bandwidth limiting on reads to the stream""" + self._bandwidth_limiting_enabled = True + + def disable_bandwidth_limiting(self): + """Disable bandwidth limiting on reads to the stream""" + self._bandwidth_limiting_enabled = False + + def read(self, amount): + """Read a specified amount + + Reads will only be throttled if bandwidth limiting is enabled. + """ + if not self._bandwidth_limiting_enabled: + return self._fileobj.read(amount) + + # We do not want to be calling consume on every read as the read + # amounts can be small causing the lock of the leaky bucket to + # introduce noticeable overhead. So instead we keep track of + # how many bytes we have seen and only call consume once we pass a + # certain threshold. + self._bytes_seen += amount + if self._bytes_seen < self._bytes_threshold: + return self._fileobj.read(amount) + + self._consume_through_leaky_bucket() + return self._fileobj.read(amount) + + def _consume_through_leaky_bucket(self): # NOTE: If the read amount on the stream are high, it will result - # in large bursty behavior as there is not an interface for partial - # reads. However given the read's on this abstraction are at most 256KB - # (via downloads), it reduces the burstiness to be small KB bursts at - # worst. - while not self._transfer_coordinator.exception: - try: - self._leaky_bucket.consume( + # in large bursty behavior as there is not an interface for partial + # reads. However given the read's on this abstraction are at most 256KB + # (via downloads), it reduces the burstiness to be small KB bursts at + # worst. + while not self._transfer_coordinator.exception: + try: + self._leaky_bucket.consume( self._bytes_seen, self._request_token ) - self._bytes_seen = 0 - return - except RequestExceededException as e: - self._time_utils.sleep(e.retry_time) - else: - raise self._transfer_coordinator.exception - - def signal_transferring(self): - """Signal that data being read is being transferred to S3""" - self.enable_bandwidth_limiting() - - def signal_not_transferring(self): - """Signal that data being read is not being transferred to S3""" - self.disable_bandwidth_limiting() - + self._bytes_seen = 0 + return + except RequestExceededException as e: + self._time_utils.sleep(e.retry_time) + else: + raise self._transfer_coordinator.exception + + def signal_transferring(self): + """Signal that data being read is being transferred to S3""" + self.enable_bandwidth_limiting() + + def signal_not_transferring(self): + """Signal that data being read is not being transferred to S3""" + self.disable_bandwidth_limiting() + def seek(self, where, whence=0): self._fileobj.seek(where, whence) - - def tell(self): - return self._fileobj.tell() - - def close(self): - if self._bandwidth_limiting_enabled and self._bytes_seen: - # This handles the case where the file is small enough to never - # trigger the threshold and thus is never subjugated to the - # leaky bucket on read(). This specifically happens for small - # uploads. So instead to account for those bytes, have - # it go through the leaky bucket when the file gets closed. - self._consume_through_leaky_bucket() - self._fileobj.close() - - def __enter__(self): - return self - - def __exit__(self, *args, **kwargs): - self.close() - - + + def tell(self): + return self._fileobj.tell() + + def close(self): + if self._bandwidth_limiting_enabled and self._bytes_seen: + # This handles the case where the file is small enough to never + # trigger the threshold and thus is never subjugated to the + # leaky bucket on read(). This specifically happens for small + # uploads. So instead to account for those bytes, have + # it go through the leaky bucket when the file gets closed. + self._consume_through_leaky_bucket() + self._fileobj.close() + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + self.close() + + class LeakyBucket: def __init__( self, @@ -219,221 +219,221 @@ class LeakyBucket: rate_tracker=None, consumption_scheduler=None, ): - """A leaky bucket abstraction to limit bandwidth consumption - - :type rate: int - :type rate: The maximum rate to allow. This rate is in terms of - bytes per second. - - :type time_utils: TimeUtils - :param time_utils: The time utility to use for interacting with time - - :type rate_tracker: BandwidthRateTracker - :param rate_tracker: Tracks bandwidth consumption - - :type consumption_scheduler: ConsumptionScheduler - :param consumption_scheduler: Schedules consumption retries when - necessary - """ - self._max_rate = float(max_rate) - self._time_utils = time_utils - if time_utils is None: - self._time_utils = TimeUtils() - self._lock = threading.Lock() - self._rate_tracker = rate_tracker - if rate_tracker is None: - self._rate_tracker = BandwidthRateTracker() - self._consumption_scheduler = consumption_scheduler - if consumption_scheduler is None: - self._consumption_scheduler = ConsumptionScheduler() - - def consume(self, amt, request_token): - """Consume an a requested amount - - :type amt: int - :param amt: The amount of bytes to request to consume - - :type request_token: RequestToken - :param request_token: The token associated to the consumption - request that is used to identify the request. So if a - RequestExceededException is raised the token should be used - in subsequent retry consume() request. - - :raises RequestExceededException: If the consumption amount would - exceed the maximum allocated bandwidth - - :rtype: int - :returns: The amount consumed - """ - with self._lock: - time_now = self._time_utils.time() - if self._consumption_scheduler.is_scheduled(request_token): - return self._release_requested_amt_for_scheduled_request( + """A leaky bucket abstraction to limit bandwidth consumption + + :type rate: int + :type rate: The maximum rate to allow. This rate is in terms of + bytes per second. + + :type time_utils: TimeUtils + :param time_utils: The time utility to use for interacting with time + + :type rate_tracker: BandwidthRateTracker + :param rate_tracker: Tracks bandwidth consumption + + :type consumption_scheduler: ConsumptionScheduler + :param consumption_scheduler: Schedules consumption retries when + necessary + """ + self._max_rate = float(max_rate) + self._time_utils = time_utils + if time_utils is None: + self._time_utils = TimeUtils() + self._lock = threading.Lock() + self._rate_tracker = rate_tracker + if rate_tracker is None: + self._rate_tracker = BandwidthRateTracker() + self._consumption_scheduler = consumption_scheduler + if consumption_scheduler is None: + self._consumption_scheduler = ConsumptionScheduler() + + def consume(self, amt, request_token): + """Consume an a requested amount + + :type amt: int + :param amt: The amount of bytes to request to consume + + :type request_token: RequestToken + :param request_token: The token associated to the consumption + request that is used to identify the request. So if a + RequestExceededException is raised the token should be used + in subsequent retry consume() request. + + :raises RequestExceededException: If the consumption amount would + exceed the maximum allocated bandwidth + + :rtype: int + :returns: The amount consumed + """ + with self._lock: + time_now = self._time_utils.time() + if self._consumption_scheduler.is_scheduled(request_token): + return self._release_requested_amt_for_scheduled_request( amt, request_token, time_now ) - elif self._projected_to_exceed_max_rate(amt, time_now): - self._raise_request_exceeded_exception( + elif self._projected_to_exceed_max_rate(amt, time_now): + self._raise_request_exceeded_exception( amt, request_token, time_now ) - else: - return self._release_requested_amt(amt, time_now) - - def _projected_to_exceed_max_rate(self, amt, time_now): - projected_rate = self._rate_tracker.get_projected_rate(amt, time_now) - return projected_rate > self._max_rate - + else: + return self._release_requested_amt(amt, time_now) + + def _projected_to_exceed_max_rate(self, amt, time_now): + projected_rate = self._rate_tracker.get_projected_rate(amt, time_now) + return projected_rate > self._max_rate + def _release_requested_amt_for_scheduled_request( self, amt, request_token, time_now ): - self._consumption_scheduler.process_scheduled_consumption( + self._consumption_scheduler.process_scheduled_consumption( request_token ) - return self._release_requested_amt(amt, time_now) - - def _raise_request_exceeded_exception(self, amt, request_token, time_now): + return self._release_requested_amt(amt, time_now) + + def _raise_request_exceeded_exception(self, amt, request_token, time_now): allocated_time = amt / float(self._max_rate) - retry_time = self._consumption_scheduler.schedule_consumption( + retry_time = self._consumption_scheduler.schedule_consumption( amt, request_token, allocated_time ) - raise RequestExceededException( + raise RequestExceededException( requested_amt=amt, retry_time=retry_time ) - - def _release_requested_amt(self, amt, time_now): - self._rate_tracker.record_consumption_rate(amt, time_now) - return amt - - + + def _release_requested_amt(self, amt, time_now): + self._rate_tracker.record_consumption_rate(amt, time_now) + return amt + + class ConsumptionScheduler: - def __init__(self): - """Schedules when to consume a desired amount""" - self._tokens_to_scheduled_consumption = {} - self._total_wait = 0 - - def is_scheduled(self, token): - """Indicates if a consumption request has been scheduled - - :type token: RequestToken - :param token: The token associated to the consumption - request that is used to identify the request. - """ - return token in self._tokens_to_scheduled_consumption - - def schedule_consumption(self, amt, token, time_to_consume): - """Schedules a wait time to be able to consume an amount - - :type amt: int - :param amt: The amount of bytes scheduled to be consumed - - :type token: RequestToken - :param token: The token associated to the consumption - request that is used to identify the request. - - :type time_to_consume: float - :param time_to_consume: The desired time it should take for that - specific request amount to be consumed in regardless of previously - scheduled consumption requests - - :rtype: float - :returns: The amount of time to wait for the specific request before - actually consuming the specified amount. - """ - self._total_wait += time_to_consume - self._tokens_to_scheduled_consumption[token] = { - 'wait_duration': self._total_wait, - 'time_to_consume': time_to_consume, - } - return self._total_wait - - def process_scheduled_consumption(self, token): - """Processes a scheduled consumption request that has completed - - :type token: RequestToken - :param token: The token associated to the consumption - request that is used to identify the request. - """ - scheduled_retry = self._tokens_to_scheduled_consumption.pop(token) - self._total_wait = max( + def __init__(self): + """Schedules when to consume a desired amount""" + self._tokens_to_scheduled_consumption = {} + self._total_wait = 0 + + def is_scheduled(self, token): + """Indicates if a consumption request has been scheduled + + :type token: RequestToken + :param token: The token associated to the consumption + request that is used to identify the request. + """ + return token in self._tokens_to_scheduled_consumption + + def schedule_consumption(self, amt, token, time_to_consume): + """Schedules a wait time to be able to consume an amount + + :type amt: int + :param amt: The amount of bytes scheduled to be consumed + + :type token: RequestToken + :param token: The token associated to the consumption + request that is used to identify the request. + + :type time_to_consume: float + :param time_to_consume: The desired time it should take for that + specific request amount to be consumed in regardless of previously + scheduled consumption requests + + :rtype: float + :returns: The amount of time to wait for the specific request before + actually consuming the specified amount. + """ + self._total_wait += time_to_consume + self._tokens_to_scheduled_consumption[token] = { + 'wait_duration': self._total_wait, + 'time_to_consume': time_to_consume, + } + return self._total_wait + + def process_scheduled_consumption(self, token): + """Processes a scheduled consumption request that has completed + + :type token: RequestToken + :param token: The token associated to the consumption + request that is used to identify the request. + """ + scheduled_retry = self._tokens_to_scheduled_consumption.pop(token) + self._total_wait = max( self._total_wait - scheduled_retry['time_to_consume'], 0 ) - - + + class BandwidthRateTracker: - def __init__(self, alpha=0.8): - """Tracks the rate of bandwidth consumption - - :type a: float - :param a: The constant to use in calculating the exponentional moving - average of the bandwidth rate. Specifically it is used in the - following calculation: - - current_rate = alpha * new_rate + (1 - alpha) * current_rate - - This value of this constant should be between 0 and 1. - """ - self._alpha = alpha - self._last_time = None - self._current_rate = None - - @property - def current_rate(self): - """The current transfer rate - - :rtype: float - :returns: The current tracked transfer rate - """ - if self._last_time is None: - return 0.0 - return self._current_rate - - def get_projected_rate(self, amt, time_at_consumption): - """Get the projected rate using a provided amount and time - - :type amt: int - :param amt: The proposed amount to consume - - :type time_at_consumption: float - :param time_at_consumption: The proposed time to consume at - - :rtype: float - :returns: The consumption rate if that amt and time were consumed - """ - if self._last_time is None: - return 0.0 - return self._calculate_exponential_moving_average_rate( + def __init__(self, alpha=0.8): + """Tracks the rate of bandwidth consumption + + :type a: float + :param a: The constant to use in calculating the exponentional moving + average of the bandwidth rate. Specifically it is used in the + following calculation: + + current_rate = alpha * new_rate + (1 - alpha) * current_rate + + This value of this constant should be between 0 and 1. + """ + self._alpha = alpha + self._last_time = None + self._current_rate = None + + @property + def current_rate(self): + """The current transfer rate + + :rtype: float + :returns: The current tracked transfer rate + """ + if self._last_time is None: + return 0.0 + return self._current_rate + + def get_projected_rate(self, amt, time_at_consumption): + """Get the projected rate using a provided amount and time + + :type amt: int + :param amt: The proposed amount to consume + + :type time_at_consumption: float + :param time_at_consumption: The proposed time to consume at + + :rtype: float + :returns: The consumption rate if that amt and time were consumed + """ + if self._last_time is None: + return 0.0 + return self._calculate_exponential_moving_average_rate( amt, time_at_consumption ) - - def record_consumption_rate(self, amt, time_at_consumption): - """Record the consumption rate based off amount and time point - - :type amt: int - :param amt: The amount that got consumed - - :type time_at_consumption: float - :param time_at_consumption: The time at which the amount was consumed - """ - if self._last_time is None: - self._last_time = time_at_consumption - self._current_rate = 0.0 - return - self._current_rate = self._calculate_exponential_moving_average_rate( + + def record_consumption_rate(self, amt, time_at_consumption): + """Record the consumption rate based off amount and time point + + :type amt: int + :param amt: The amount that got consumed + + :type time_at_consumption: float + :param time_at_consumption: The time at which the amount was consumed + """ + if self._last_time is None: + self._last_time = time_at_consumption + self._current_rate = 0.0 + return + self._current_rate = self._calculate_exponential_moving_average_rate( amt, time_at_consumption ) - self._last_time = time_at_consumption - - def _calculate_rate(self, amt, time_at_consumption): - time_delta = time_at_consumption - self._last_time - if time_delta <= 0: + self._last_time = time_at_consumption + + def _calculate_rate(self, amt, time_at_consumption): + time_delta = time_at_consumption - self._last_time + if time_delta <= 0: # While it is really unlikely to see this in an actual transfer, - # we do not want to be returning back a negative rate or try to - # divide the amount by zero. So instead return back an infinite - # rate as the time delta is infinitesimally small. - return float('inf') - return amt / (time_delta) - + # we do not want to be returning back a negative rate or try to + # divide the amount by zero. So instead return back an infinite + # rate as the time delta is infinitesimally small. + return float('inf') + return amt / (time_delta) + def _calculate_exponential_moving_average_rate( self, amt, time_at_consumption ): - new_rate = self._calculate_rate(amt, time_at_consumption) - return self._alpha * new_rate + (1 - self._alpha) * self._current_rate + new_rate = self._calculate_rate(amt, time_at_consumption) + return self._alpha * new_rate + (1 - self._alpha) * self._current_rate diff --git a/contrib/python/s3transfer/py3/s3transfer/compat.py b/contrib/python/s3transfer/py3/s3transfer/compat.py index dc0c2fee3e..68267ad0e2 100644 --- a/contrib/python/s3transfer/py3/s3transfer/compat.py +++ b/contrib/python/s3transfer/py3/s3transfer/compat.py @@ -1,86 +1,86 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. import errno -import inspect -import os -import socket +import inspect +import os +import socket import sys - -from botocore.compat import six - -if sys.platform.startswith('win'): - def rename_file(current_filename, new_filename): - try: - os.remove(new_filename) - except OSError as e: - if not e.errno == errno.ENOENT: - # We only want to a ignore trying to remove - # a file that does not exist. If it fails - # for any other reason we should be propagating - # that exception. - raise - os.rename(current_filename, new_filename) -else: - rename_file = os.rename - - + +from botocore.compat import six + +if sys.platform.startswith('win'): + def rename_file(current_filename, new_filename): + try: + os.remove(new_filename) + except OSError as e: + if not e.errno == errno.ENOENT: + # We only want to a ignore trying to remove + # a file that does not exist. If it fails + # for any other reason we should be propagating + # that exception. + raise + os.rename(current_filename, new_filename) +else: + rename_file = os.rename + + def accepts_kwargs(func): return inspect.getfullargspec(func)[2] - - + + # In python 3, socket.error is OSError, which is too general # for what we want (i.e FileNotFoundError is a subclass of OSError). # In python 3, all the socket related errors are in a newly created # ConnectionError. SOCKET_ERROR = ConnectionError MAXINT = None - - -def seekable(fileobj): - """Backwards compat function to determine if a fileobj is seekable - - :param fileobj: The file-like object to determine if seekable - - :returns: True, if seekable. False, otherwise. - """ - # If the fileobj has a seekable attr, try calling the seekable() - # method on it. - if hasattr(fileobj, 'seekable'): - return fileobj.seekable() - # If there is no seekable attr, check if the object can be seeked - # or telled. If it can, try to seek to the current position. - elif hasattr(fileobj, 'seek') and hasattr(fileobj, 'tell'): - try: - fileobj.seek(0, 1) - return True + + +def seekable(fileobj): + """Backwards compat function to determine if a fileobj is seekable + + :param fileobj: The file-like object to determine if seekable + + :returns: True, if seekable. False, otherwise. + """ + # If the fileobj has a seekable attr, try calling the seekable() + # method on it. + if hasattr(fileobj, 'seekable'): + return fileobj.seekable() + # If there is no seekable attr, check if the object can be seeked + # or telled. If it can, try to seek to the current position. + elif hasattr(fileobj, 'seek') and hasattr(fileobj, 'tell'): + try: + fileobj.seek(0, 1) + return True except OSError: - # If an io related error was thrown then it is not seekable. - return False - # Else, the fileobj is not seekable - return False - - -def readable(fileobj): - """Determines whether or not a file-like object is readable. - - :param fileobj: The file-like object to determine if readable - - :returns: True, if readable. False otherwise. - """ - if hasattr(fileobj, 'readable'): - return fileobj.readable() - - return hasattr(fileobj, 'read') + # If an io related error was thrown then it is not seekable. + return False + # Else, the fileobj is not seekable + return False + + +def readable(fileobj): + """Determines whether or not a file-like object is readable. + + :param fileobj: The file-like object to determine if readable + + :returns: True, if readable. False otherwise. + """ + if hasattr(fileobj, 'readable'): + return fileobj.readable() + + return hasattr(fileobj, 'read') def fallocate(fileobj, size): diff --git a/contrib/python/s3transfer/py3/s3transfer/copies.py b/contrib/python/s3transfer/py3/s3transfer/copies.py index 2ef3d06b2d..a1dfdc8ba3 100644 --- a/contrib/python/s3transfer/py3/s3transfer/copies.py +++ b/contrib/python/s3transfer/py3/s3transfer/copies.py @@ -1,18 +1,18 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import copy -import math - +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import copy +import math + from s3transfer.tasks import ( CompleteMultipartUploadTask, CreateMultipartUploadTask, @@ -25,300 +25,300 @@ from s3transfer.utils import ( get_callbacks, get_filtered_dict, ) - - -class CopySubmissionTask(SubmissionTask): - """Task for submitting tasks to execute a copy""" - - EXTRA_ARGS_TO_HEAD_ARGS_MAPPING = { - 'CopySourceIfMatch': 'IfMatch', - 'CopySourceIfModifiedSince': 'IfModifiedSince', - 'CopySourceIfNoneMatch': 'IfNoneMatch', - 'CopySourceIfUnmodifiedSince': 'IfUnmodifiedSince', - 'CopySourceSSECustomerKey': 'SSECustomerKey', - 'CopySourceSSECustomerAlgorithm': 'SSECustomerAlgorithm', - 'CopySourceSSECustomerKeyMD5': 'SSECustomerKeyMD5', + + +class CopySubmissionTask(SubmissionTask): + """Task for submitting tasks to execute a copy""" + + EXTRA_ARGS_TO_HEAD_ARGS_MAPPING = { + 'CopySourceIfMatch': 'IfMatch', + 'CopySourceIfModifiedSince': 'IfModifiedSince', + 'CopySourceIfNoneMatch': 'IfNoneMatch', + 'CopySourceIfUnmodifiedSince': 'IfUnmodifiedSince', + 'CopySourceSSECustomerKey': 'SSECustomerKey', + 'CopySourceSSECustomerAlgorithm': 'SSECustomerAlgorithm', + 'CopySourceSSECustomerKeyMD5': 'SSECustomerKeyMD5', 'RequestPayer': 'RequestPayer', 'ExpectedBucketOwner': 'ExpectedBucketOwner', - } - - UPLOAD_PART_COPY_ARGS = [ - 'CopySourceIfMatch', - 'CopySourceIfModifiedSince', - 'CopySourceIfNoneMatch', - 'CopySourceIfUnmodifiedSince', - 'CopySourceSSECustomerKey', - 'CopySourceSSECustomerAlgorithm', - 'CopySourceSSECustomerKeyMD5', - 'SSECustomerKey', - 'SSECustomerAlgorithm', - 'SSECustomerKeyMD5', - 'RequestPayer', + } + + UPLOAD_PART_COPY_ARGS = [ + 'CopySourceIfMatch', + 'CopySourceIfModifiedSince', + 'CopySourceIfNoneMatch', + 'CopySourceIfUnmodifiedSince', + 'CopySourceSSECustomerKey', + 'CopySourceSSECustomerAlgorithm', + 'CopySourceSSECustomerKeyMD5', + 'SSECustomerKey', + 'SSECustomerAlgorithm', + 'SSECustomerKeyMD5', + 'RequestPayer', 'ExpectedBucketOwner', - ] - - CREATE_MULTIPART_ARGS_BLACKLIST = [ - 'CopySourceIfMatch', - 'CopySourceIfModifiedSince', - 'CopySourceIfNoneMatch', - 'CopySourceIfUnmodifiedSince', - 'CopySourceSSECustomerKey', - 'CopySourceSSECustomerAlgorithm', - 'CopySourceSSECustomerKeyMD5', + ] + + CREATE_MULTIPART_ARGS_BLACKLIST = [ + 'CopySourceIfMatch', + 'CopySourceIfModifiedSince', + 'CopySourceIfNoneMatch', + 'CopySourceIfUnmodifiedSince', + 'CopySourceSSECustomerKey', + 'CopySourceSSECustomerAlgorithm', + 'CopySourceSSECustomerKeyMD5', 'MetadataDirective', 'TaggingDirective', - ] - + ] + COMPLETE_MULTIPART_ARGS = ['RequestPayer', 'ExpectedBucketOwner'] - + def _submit( self, client, config, osutil, request_executor, transfer_future ): - """ - :param client: The client associated with the transfer manager - - :type config: s3transfer.manager.TransferConfig - :param config: The transfer config associated with the transfer - manager - - :type osutil: s3transfer.utils.OSUtil - :param osutil: The os utility associated to the transfer manager - - :type request_executor: s3transfer.futures.BoundedExecutor - :param request_executor: The request executor associated with the - transfer manager - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future associated with the - transfer request that tasks are being submitted for - """ - # Determine the size if it was not provided - if transfer_future.meta.size is None: - # If a size was not provided figure out the size for the - # user. Note that we will only use the client provided to - # the TransferManager. If the object is outside of the region - # of the client, they may have to provide the file size themselves - # with a completely new client. - call_args = transfer_future.meta.call_args + """ + :param client: The client associated with the transfer manager + + :type config: s3transfer.manager.TransferConfig + :param config: The transfer config associated with the transfer + manager + + :type osutil: s3transfer.utils.OSUtil + :param osutil: The os utility associated to the transfer manager + + :type request_executor: s3transfer.futures.BoundedExecutor + :param request_executor: The request executor associated with the + transfer manager + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future associated with the + transfer request that tasks are being submitted for + """ + # Determine the size if it was not provided + if transfer_future.meta.size is None: + # If a size was not provided figure out the size for the + # user. Note that we will only use the client provided to + # the TransferManager. If the object is outside of the region + # of the client, they may have to provide the file size themselves + # with a completely new client. + call_args = transfer_future.meta.call_args head_object_request = ( - self._get_head_object_request_from_copy_source( + self._get_head_object_request_from_copy_source( call_args.copy_source ) ) - extra_args = call_args.extra_args - - # Map any values that may be used in the head object that is - # used in the copy object - for param, value in extra_args.items(): - if param in self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING: - head_object_request[ + extra_args = call_args.extra_args + + # Map any values that may be used in the head object that is + # used in the copy object + for param, value in extra_args.items(): + if param in self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING: + head_object_request[ self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING[param] ] = value - - response = call_args.source_client.head_object( + + response = call_args.source_client.head_object( **head_object_request ) - transfer_future.meta.provide_transfer_size( + transfer_future.meta.provide_transfer_size( response['ContentLength'] ) - - # If it is greater than threshold do a multipart copy, otherwise - # do a regular copy object. - if transfer_future.meta.size < config.multipart_threshold: - self._submit_copy_request( + + # If it is greater than threshold do a multipart copy, otherwise + # do a regular copy object. + if transfer_future.meta.size < config.multipart_threshold: + self._submit_copy_request( client, config, osutil, request_executor, transfer_future ) - else: - self._submit_multipart_request( + else: + self._submit_multipart_request( client, config, osutil, request_executor, transfer_future ) - + def _submit_copy_request( self, client, config, osutil, request_executor, transfer_future ): - call_args = transfer_future.meta.call_args - - # Get the needed progress callbacks for the task - progress_callbacks = get_callbacks(transfer_future, 'progress') - - # Submit the request of a single copy. - self._transfer_coordinator.submit( - request_executor, - CopyObjectTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'copy_source': call_args.copy_source, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': call_args.extra_args, - 'callbacks': progress_callbacks, + call_args = transfer_future.meta.call_args + + # Get the needed progress callbacks for the task + progress_callbacks = get_callbacks(transfer_future, 'progress') + + # Submit the request of a single copy. + self._transfer_coordinator.submit( + request_executor, + CopyObjectTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'copy_source': call_args.copy_source, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'extra_args': call_args.extra_args, + 'callbacks': progress_callbacks, 'size': transfer_future.meta.size, - }, + }, is_final=True, ), - ) - + ) + def _submit_multipart_request( self, client, config, osutil, request_executor, transfer_future ): - call_args = transfer_future.meta.call_args - - # Submit the request to create a multipart upload and make sure it - # does not include any of the arguments used for copy part. - create_multipart_extra_args = {} - for param, val in call_args.extra_args.items(): - if param not in self.CREATE_MULTIPART_ARGS_BLACKLIST: - create_multipart_extra_args[param] = val - - create_multipart_future = self._transfer_coordinator.submit( - request_executor, - CreateMultipartUploadTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': create_multipart_extra_args, + call_args = transfer_future.meta.call_args + + # Submit the request to create a multipart upload and make sure it + # does not include any of the arguments used for copy part. + create_multipart_extra_args = {} + for param, val in call_args.extra_args.items(): + if param not in self.CREATE_MULTIPART_ARGS_BLACKLIST: + create_multipart_extra_args[param] = val + + create_multipart_future = self._transfer_coordinator.submit( + request_executor, + CreateMultipartUploadTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'extra_args': create_multipart_extra_args, }, ), - ) - - # Determine how many parts are needed based on filesize and - # desired chunksize. - part_size = config.multipart_chunksize - adjuster = ChunksizeAdjuster() - part_size = adjuster.adjust_chunksize( + ) + + # Determine how many parts are needed based on filesize and + # desired chunksize. + part_size = config.multipart_chunksize + adjuster = ChunksizeAdjuster() + part_size = adjuster.adjust_chunksize( part_size, transfer_future.meta.size ) - num_parts = int( + num_parts = int( math.ceil(transfer_future.meta.size / float(part_size)) ) - - # Submit requests to upload the parts of the file. - part_futures = [] - progress_callbacks = get_callbacks(transfer_future, 'progress') - - for part_number in range(1, num_parts + 1): - extra_part_args = self._extra_upload_part_args( + + # Submit requests to upload the parts of the file. + part_futures = [] + progress_callbacks = get_callbacks(transfer_future, 'progress') + + for part_number in range(1, num_parts + 1): + extra_part_args = self._extra_upload_part_args( call_args.extra_args ) - # The part number for upload part starts at 1 while the - # range parameter starts at zero, so just subtract 1 off of - # the part number - extra_part_args['CopySourceRange'] = calculate_range_parameter( + # The part number for upload part starts at 1 while the + # range parameter starts at zero, so just subtract 1 off of + # the part number + extra_part_args['CopySourceRange'] = calculate_range_parameter( part_size, part_number - 1, num_parts, transfer_future.meta.size, ) - # Get the size of the part copy as well for the progress - # callbacks. - size = self._get_transfer_size( + # Get the size of the part copy as well for the progress + # callbacks. + size = self._get_transfer_size( part_size, part_number - 1, num_parts, transfer_future.meta.size, - ) - part_futures.append( - self._transfer_coordinator.submit( - request_executor, - CopyPartTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'copy_source': call_args.copy_source, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'part_number': part_number, - 'extra_args': extra_part_args, - 'callbacks': progress_callbacks, + ) + part_futures.append( + self._transfer_coordinator.submit( + request_executor, + CopyPartTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'copy_source': call_args.copy_source, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'part_number': part_number, + 'extra_args': extra_part_args, + 'callbacks': progress_callbacks, 'size': size, - }, - pending_main_kwargs={ - 'upload_id': create_multipart_future + }, + pending_main_kwargs={ + 'upload_id': create_multipart_future }, ), - ) - ) - - complete_multipart_extra_args = self._extra_complete_multipart_args( + ) + ) + + complete_multipart_extra_args = self._extra_complete_multipart_args( call_args.extra_args ) - # Submit the request to complete the multipart upload. - self._transfer_coordinator.submit( - request_executor, - CompleteMultipartUploadTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': complete_multipart_extra_args, - }, - pending_main_kwargs={ - 'upload_id': create_multipart_future, + # Submit the request to complete the multipart upload. + self._transfer_coordinator.submit( + request_executor, + CompleteMultipartUploadTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'extra_args': complete_multipart_extra_args, + }, + pending_main_kwargs={ + 'upload_id': create_multipart_future, 'parts': part_futures, - }, + }, is_final=True, ), - ) - - def _get_head_object_request_from_copy_source(self, copy_source): - if isinstance(copy_source, dict): - return copy.copy(copy_source) - else: - raise TypeError( - 'Expecting dictionary formatted: ' - '{"Bucket": bucket_name, "Key": key} ' + ) + + def _get_head_object_request_from_copy_source(self, copy_source): + if isinstance(copy_source, dict): + return copy.copy(copy_source) + else: + raise TypeError( + 'Expecting dictionary formatted: ' + '{"Bucket": bucket_name, "Key": key} ' 'but got %s or type %s.' % (copy_source, type(copy_source)) - ) - - def _extra_upload_part_args(self, extra_args): - # Only the args in COPY_PART_ARGS actually need to be passed - # onto the upload_part_copy calls. - return get_filtered_dict(extra_args, self.UPLOAD_PART_COPY_ARGS) - - def _extra_complete_multipart_args(self, extra_args): - return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS) - + ) + + def _extra_upload_part_args(self, extra_args): + # Only the args in COPY_PART_ARGS actually need to be passed + # onto the upload_part_copy calls. + return get_filtered_dict(extra_args, self.UPLOAD_PART_COPY_ARGS) + + def _extra_complete_multipart_args(self, extra_args): + return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS) + def _get_transfer_size( self, part_size, part_index, num_parts, total_transfer_size ): - if part_index == num_parts - 1: - # The last part may be different in size then the rest of the - # parts. - return total_transfer_size - (part_index * part_size) - return part_size - - -class CopyObjectTask(Task): - """Task to do a nonmultipart copy""" + if part_index == num_parts - 1: + # The last part may be different in size then the rest of the + # parts. + return total_transfer_size - (part_index * part_size) + return part_size + + +class CopyObjectTask(Task): + """Task to do a nonmultipart copy""" def _main( self, client, copy_source, bucket, key, extra_args, callbacks, size ): - """ - :param client: The client to use when calling PutObject - :param copy_source: The CopySource parameter to use - :param bucket: The name of the bucket to copy to - :param key: The name of the key to copy to - :param extra_args: A dictionary of any extra arguments that may be - used in the upload. - :param callbacks: List of callbacks to call after copy - :param size: The size of the transfer. This value is passed into - the callbacks - - """ - client.copy_object( + """ + :param client: The client to use when calling PutObject + :param copy_source: The CopySource parameter to use + :param bucket: The name of the bucket to copy to + :param key: The name of the key to copy to + :param extra_args: A dictionary of any extra arguments that may be + used in the upload. + :param callbacks: List of callbacks to call after copy + :param size: The size of the transfer. This value is passed into + the callbacks + + """ + client.copy_object( CopySource=copy_source, Bucket=bucket, Key=key, **extra_args ) - for callback in callbacks: - callback(bytes_transferred=size) - - -class CopyPartTask(Task): - """Task to upload a part in a multipart copy""" + for callback in callbacks: + callback(bytes_transferred=size) + + +class CopyPartTask(Task): + """Task to upload a part in a multipart copy""" def _main( self, @@ -332,29 +332,29 @@ class CopyPartTask(Task): callbacks, size, ): - """ - :param client: The client to use when calling PutObject - :param copy_source: The CopySource parameter to use - :param bucket: The name of the bucket to upload to - :param key: The name of the key to upload to - :param upload_id: The id of the upload - :param part_number: The number representing the part of the multipart - upload - :param extra_args: A dictionary of any extra arguments that may be - used in the upload. - :param callbacks: List of callbacks to call after copy part - :param size: The size of the transfer. This value is passed into - the callbacks - - :rtype: dict - :returns: A dictionary representing a part:: - - {'Etag': etag_value, 'PartNumber': part_number} - - This value can be appended to a list to be used to complete - the multipart upload. - """ - response = client.upload_part_copy( + """ + :param client: The client to use when calling PutObject + :param copy_source: The CopySource parameter to use + :param bucket: The name of the bucket to upload to + :param key: The name of the key to upload to + :param upload_id: The id of the upload + :param part_number: The number representing the part of the multipart + upload + :param extra_args: A dictionary of any extra arguments that may be + used in the upload. + :param callbacks: List of callbacks to call after copy part + :param size: The size of the transfer. This value is passed into + the callbacks + + :rtype: dict + :returns: A dictionary representing a part:: + + {'Etag': etag_value, 'PartNumber': part_number} + + This value can be appended to a list to be used to complete + the multipart upload. + """ + response = client.upload_part_copy( CopySource=copy_source, Bucket=bucket, Key=key, @@ -362,7 +362,7 @@ class CopyPartTask(Task): PartNumber=part_number, **extra_args ) - for callback in callbacks: - callback(bytes_transferred=size) - etag = response['CopyPartResult']['ETag'] - return {'ETag': etag, 'PartNumber': part_number} + for callback in callbacks: + callback(bytes_transferred=size) + etag = response['CopyPartResult']['ETag'] + return {'ETag': etag, 'PartNumber': part_number} diff --git a/contrib/python/s3transfer/py3/s3transfer/delete.py b/contrib/python/s3transfer/py3/s3transfer/delete.py index 3d8e6ec9b2..74084d312a 100644 --- a/contrib/python/s3transfer/py3/s3transfer/delete.py +++ b/contrib/python/s3transfer/py3/s3transfer/delete.py @@ -1,71 +1,71 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. from s3transfer.tasks import SubmissionTask, Task - - -class DeleteSubmissionTask(SubmissionTask): - """Task for submitting tasks to execute an object deletion.""" - - def _submit(self, client, request_executor, transfer_future, **kwargs): - """ - :param client: The client associated with the transfer manager - - :type config: s3transfer.manager.TransferConfig - :param config: The transfer config associated with the transfer - manager - - :type osutil: s3transfer.utils.OSUtil - :param osutil: The os utility associated to the transfer manager - - :type request_executor: s3transfer.futures.BoundedExecutor - :param request_executor: The request executor associated with the - transfer manager - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future associated with the - transfer request that tasks are being submitted for - """ - call_args = transfer_future.meta.call_args - - self._transfer_coordinator.submit( - request_executor, - DeleteObjectTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': call_args.extra_args, - }, + + +class DeleteSubmissionTask(SubmissionTask): + """Task for submitting tasks to execute an object deletion.""" + + def _submit(self, client, request_executor, transfer_future, **kwargs): + """ + :param client: The client associated with the transfer manager + + :type config: s3transfer.manager.TransferConfig + :param config: The transfer config associated with the transfer + manager + + :type osutil: s3transfer.utils.OSUtil + :param osutil: The os utility associated to the transfer manager + + :type request_executor: s3transfer.futures.BoundedExecutor + :param request_executor: The request executor associated with the + transfer manager + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future associated with the + transfer request that tasks are being submitted for + """ + call_args = transfer_future.meta.call_args + + self._transfer_coordinator.submit( + request_executor, + DeleteObjectTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'extra_args': call_args.extra_args, + }, is_final=True, ), - ) - - -class DeleteObjectTask(Task): - def _main(self, client, bucket, key, extra_args): - """ - - :param client: The S3 client to use when calling DeleteObject - - :type bucket: str - :param bucket: The name of the bucket. - - :type key: str - :param key: The name of the object to delete. - - :type extra_args: dict - :param extra_args: Extra arguments to pass to the DeleteObject call. - - """ - client.delete_object(Bucket=bucket, Key=key, **extra_args) + ) + + +class DeleteObjectTask(Task): + def _main(self, client, bucket, key, extra_args): + """ + + :param client: The S3 client to use when calling DeleteObject + + :type bucket: str + :param bucket: The name of the bucket. + + :type key: str + :param key: The name of the object to delete. + + :type extra_args: dict + :param extra_args: Extra arguments to pass to the DeleteObject call. + + """ + client.delete_object(Bucket=bucket, Key=key, **extra_args) diff --git a/contrib/python/s3transfer/py3/s3transfer/download.py b/contrib/python/s3transfer/py3/s3transfer/download.py index e1f429065d..dc8980d4ed 100644 --- a/contrib/python/s3transfer/py3/s3transfer/download.py +++ b/contrib/python/s3transfer/py3/s3transfer/download.py @@ -1,22 +1,22 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. import heapq -import logging -import threading - -from s3transfer.compat import seekable -from s3transfer.exceptions import RetriesExceededError -from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG +import logging +import threading + +from s3transfer.compat import seekable +from s3transfer.exceptions import RetriesExceededError +from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG from s3transfer.tasks import SubmissionTask, Task from s3transfer.utils import ( S3_RETRYABLE_DOWNLOAD_ERRORS, @@ -29,289 +29,289 @@ from s3transfer.utils import ( get_callbacks, invoke_progress_callbacks, ) - -logger = logging.getLogger(__name__) - - + +logger = logging.getLogger(__name__) + + class DownloadOutputManager: - """Base manager class for handling various types of files for downloads - - This class is typically used for the DownloadSubmissionTask class to help - determine the following: - - * Provides the fileobj to write to downloads to - * Get a task to complete once everything downloaded has been written - - The answers/implementations differ for the various types of file outputs - that may be accepted. All implementations must subclass and override - public methods from this class. - """ - - def __init__(self, osutil, transfer_coordinator, io_executor): - self._osutil = osutil - self._transfer_coordinator = transfer_coordinator - self._io_executor = io_executor - - @classmethod - def is_compatible(cls, download_target, osutil): - """Determines if the target for the download is compatible with manager - - :param download_target: The target for which the upload will write - data to. - - :param osutil: The os utility to be used for the transfer - - :returns: True if the manager can handle the type of target specified - otherwise returns False. - """ - raise NotImplementedError('must implement is_compatible()') - - def get_download_task_tag(self): - """Get the tag (if any) to associate all GetObjectTasks - - :rtype: s3transfer.futures.TaskTag - :returns: The tag to associate all GetObjectTasks with - """ - return None - - def get_fileobj_for_io_writes(self, transfer_future): - """Get file-like object to use for io writes in the io executor - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The future associated with upload request - - returns: A file-like object to write to - """ - raise NotImplementedError('must implement get_fileobj_for_io_writes()') - - def queue_file_io_task(self, fileobj, data, offset): - """Queue IO write for submission to the IO executor. - - This method accepts an IO executor and information about the - downloaded data, and handles submitting this to the IO executor. - - This method may defer submission to the IO executor if necessary. - - """ - self._transfer_coordinator.submit( + """Base manager class for handling various types of files for downloads + + This class is typically used for the DownloadSubmissionTask class to help + determine the following: + + * Provides the fileobj to write to downloads to + * Get a task to complete once everything downloaded has been written + + The answers/implementations differ for the various types of file outputs + that may be accepted. All implementations must subclass and override + public methods from this class. + """ + + def __init__(self, osutil, transfer_coordinator, io_executor): + self._osutil = osutil + self._transfer_coordinator = transfer_coordinator + self._io_executor = io_executor + + @classmethod + def is_compatible(cls, download_target, osutil): + """Determines if the target for the download is compatible with manager + + :param download_target: The target for which the upload will write + data to. + + :param osutil: The os utility to be used for the transfer + + :returns: True if the manager can handle the type of target specified + otherwise returns False. + """ + raise NotImplementedError('must implement is_compatible()') + + def get_download_task_tag(self): + """Get the tag (if any) to associate all GetObjectTasks + + :rtype: s3transfer.futures.TaskTag + :returns: The tag to associate all GetObjectTasks with + """ + return None + + def get_fileobj_for_io_writes(self, transfer_future): + """Get file-like object to use for io writes in the io executor + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The future associated with upload request + + returns: A file-like object to write to + """ + raise NotImplementedError('must implement get_fileobj_for_io_writes()') + + def queue_file_io_task(self, fileobj, data, offset): + """Queue IO write for submission to the IO executor. + + This method accepts an IO executor and information about the + downloaded data, and handles submitting this to the IO executor. + + This method may defer submission to the IO executor if necessary. + + """ + self._transfer_coordinator.submit( self._io_executor, self.get_io_write_task(fileobj, data, offset) ) - - def get_io_write_task(self, fileobj, data, offset): - """Get an IO write task for the requested set of data - - This task can be ran immediately or be submitted to the IO executor - for it to run. - - :type fileobj: file-like object - :param fileobj: The file-like object to write to - - :type data: bytes - :param data: The data to write out - - :type offset: integer - :param offset: The offset to write the data to in the file-like object - - :returns: An IO task to be used to write data to a file-like object - """ - return IOWriteTask( - self._transfer_coordinator, - main_kwargs={ - 'fileobj': fileobj, - 'data': data, - 'offset': offset, + + def get_io_write_task(self, fileobj, data, offset): + """Get an IO write task for the requested set of data + + This task can be ran immediately or be submitted to the IO executor + for it to run. + + :type fileobj: file-like object + :param fileobj: The file-like object to write to + + :type data: bytes + :param data: The data to write out + + :type offset: integer + :param offset: The offset to write the data to in the file-like object + + :returns: An IO task to be used to write data to a file-like object + """ + return IOWriteTask( + self._transfer_coordinator, + main_kwargs={ + 'fileobj': fileobj, + 'data': data, + 'offset': offset, }, - ) - - def get_final_io_task(self): - """Get the final io task to complete the download - - This is needed because based on the architecture of the TransferManager - the final tasks will be sent to the IO executor, but the executor - needs a final task for it to signal that the transfer is done and - all done callbacks can be run. - - :rtype: s3transfer.tasks.Task - :returns: A final task to completed in the io executor - """ + ) + + def get_final_io_task(self): + """Get the final io task to complete the download + + This is needed because based on the architecture of the TransferManager + the final tasks will be sent to the IO executor, but the executor + needs a final task for it to signal that the transfer is done and + all done callbacks can be run. + + :rtype: s3transfer.tasks.Task + :returns: A final task to completed in the io executor + """ raise NotImplementedError('must implement get_final_io_task()') - - def _get_fileobj_from_filename(self, filename): - f = DeferredOpenFile( + + def _get_fileobj_from_filename(self, filename): + f = DeferredOpenFile( filename, mode='wb', open_function=self._osutil.open ) - # Make sure the file gets closed and we remove the temporary file - # if anything goes wrong during the process. - self._transfer_coordinator.add_failure_cleanup(f.close) - return f - - -class DownloadFilenameOutputManager(DownloadOutputManager): - def __init__(self, osutil, transfer_coordinator, io_executor): + # Make sure the file gets closed and we remove the temporary file + # if anything goes wrong during the process. + self._transfer_coordinator.add_failure_cleanup(f.close) + return f + + +class DownloadFilenameOutputManager(DownloadOutputManager): + def __init__(self, osutil, transfer_coordinator, io_executor): super().__init__(osutil, transfer_coordinator, io_executor) - self._final_filename = None - self._temp_filename = None - self._temp_fileobj = None - - @classmethod - def is_compatible(cls, download_target, osutil): + self._final_filename = None + self._temp_filename = None + self._temp_fileobj = None + + @classmethod + def is_compatible(cls, download_target, osutil): return isinstance(download_target, str) - - def get_fileobj_for_io_writes(self, transfer_future): - fileobj = transfer_future.meta.call_args.fileobj - self._final_filename = fileobj + + def get_fileobj_for_io_writes(self, transfer_future): + fileobj = transfer_future.meta.call_args.fileobj + self._final_filename = fileobj self._temp_filename = self._osutil.get_temp_filename(fileobj) - self._temp_fileobj = self._get_temp_fileobj() - return self._temp_fileobj - - def get_final_io_task(self): - # A task to rename the file from the temporary file to its final - # location is needed. This should be the last task needed to complete - # the download. - return IORenameFileTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'fileobj': self._temp_fileobj, - 'final_filename': self._final_filename, + self._temp_fileobj = self._get_temp_fileobj() + return self._temp_fileobj + + def get_final_io_task(self): + # A task to rename the file from the temporary file to its final + # location is needed. This should be the last task needed to complete + # the download. + return IORenameFileTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'fileobj': self._temp_fileobj, + 'final_filename': self._final_filename, 'osutil': self._osutil, - }, + }, is_final=True, - ) - - def _get_temp_fileobj(self): - f = self._get_fileobj_from_filename(self._temp_filename) - self._transfer_coordinator.add_failure_cleanup( + ) + + def _get_temp_fileobj(self): + f = self._get_fileobj_from_filename(self._temp_filename) + self._transfer_coordinator.add_failure_cleanup( self._osutil.remove_file, self._temp_filename ) - return f - - -class DownloadSeekableOutputManager(DownloadOutputManager): - @classmethod - def is_compatible(cls, download_target, osutil): - return seekable(download_target) - - def get_fileobj_for_io_writes(self, transfer_future): - # Return the fileobj provided to the future. - return transfer_future.meta.call_args.fileobj - - def get_final_io_task(self): - # This task will serve the purpose of signaling when all of the io - # writes have finished so done callbacks can be called. - return CompleteDownloadNOOPTask( + return f + + +class DownloadSeekableOutputManager(DownloadOutputManager): + @classmethod + def is_compatible(cls, download_target, osutil): + return seekable(download_target) + + def get_fileobj_for_io_writes(self, transfer_future): + # Return the fileobj provided to the future. + return transfer_future.meta.call_args.fileobj + + def get_final_io_task(self): + # This task will serve the purpose of signaling when all of the io + # writes have finished so done callbacks can be called. + return CompleteDownloadNOOPTask( transfer_coordinator=self._transfer_coordinator ) - - -class DownloadNonSeekableOutputManager(DownloadOutputManager): + + +class DownloadNonSeekableOutputManager(DownloadOutputManager): def __init__( self, osutil, transfer_coordinator, io_executor, defer_queue=None ): super().__init__(osutil, transfer_coordinator, io_executor) - if defer_queue is None: - defer_queue = DeferQueue() - self._defer_queue = defer_queue - self._io_submit_lock = threading.Lock() - - @classmethod - def is_compatible(cls, download_target, osutil): - return hasattr(download_target, 'write') - - def get_download_task_tag(self): - return IN_MEMORY_DOWNLOAD_TAG - - def get_fileobj_for_io_writes(self, transfer_future): - return transfer_future.meta.call_args.fileobj - - def get_final_io_task(self): - return CompleteDownloadNOOPTask( + if defer_queue is None: + defer_queue = DeferQueue() + self._defer_queue = defer_queue + self._io_submit_lock = threading.Lock() + + @classmethod + def is_compatible(cls, download_target, osutil): + return hasattr(download_target, 'write') + + def get_download_task_tag(self): + return IN_MEMORY_DOWNLOAD_TAG + + def get_fileobj_for_io_writes(self, transfer_future): + return transfer_future.meta.call_args.fileobj + + def get_final_io_task(self): + return CompleteDownloadNOOPTask( transfer_coordinator=self._transfer_coordinator ) - - def queue_file_io_task(self, fileobj, data, offset): - with self._io_submit_lock: - writes = self._defer_queue.request_writes(offset, data) - for write in writes: - data = write['data'] + + def queue_file_io_task(self, fileobj, data, offset): + with self._io_submit_lock: + writes = self._defer_queue.request_writes(offset, data) + for write in writes: + data = write['data'] logger.debug( "Queueing IO offset %s for fileobj: %s", write['offset'], fileobj, ) super().queue_file_io_task(fileobj, data, offset) - - def get_io_write_task(self, fileobj, data, offset): - return IOStreamingWriteTask( - self._transfer_coordinator, - main_kwargs={ - 'fileobj': fileobj, - 'data': data, + + def get_io_write_task(self, fileobj, data, offset): + return IOStreamingWriteTask( + self._transfer_coordinator, + main_kwargs={ + 'fileobj': fileobj, + 'data': data, }, - ) - - -class DownloadSpecialFilenameOutputManager(DownloadNonSeekableOutputManager): + ) + + +class DownloadSpecialFilenameOutputManager(DownloadNonSeekableOutputManager): def __init__( self, osutil, transfer_coordinator, io_executor, defer_queue=None ): super().__init__( osutil, transfer_coordinator, io_executor, defer_queue ) - self._fileobj = None - - @classmethod - def is_compatible(cls, download_target, osutil): + self._fileobj = None + + @classmethod + def is_compatible(cls, download_target, osutil): return isinstance(download_target, str) and osutil.is_special_file( download_target ) - - def get_fileobj_for_io_writes(self, transfer_future): - filename = transfer_future.meta.call_args.fileobj - self._fileobj = self._get_fileobj_from_filename(filename) - return self._fileobj - - def get_final_io_task(self): - # Make sure the file gets closed once the transfer is done. - return IOCloseTask( - transfer_coordinator=self._transfer_coordinator, - is_final=True, + + def get_fileobj_for_io_writes(self, transfer_future): + filename = transfer_future.meta.call_args.fileobj + self._fileobj = self._get_fileobj_from_filename(filename) + return self._fileobj + + def get_final_io_task(self): + # Make sure the file gets closed once the transfer is done. + return IOCloseTask( + transfer_coordinator=self._transfer_coordinator, + is_final=True, main_kwargs={'fileobj': self._fileobj}, ) - - -class DownloadSubmissionTask(SubmissionTask): - """Task for submitting tasks to execute a download""" - - def _get_download_output_manager_cls(self, transfer_future, osutil): - """Retrieves a class for managing output for a download - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future for the request - - :type osutil: s3transfer.utils.OSUtils - :param osutil: The os utility associated to the transfer - - :rtype: class of DownloadOutputManager - :returns: The appropriate class to use for managing a specific type of - input for downloads. - """ - download_manager_resolver_chain = [ - DownloadSpecialFilenameOutputManager, - DownloadFilenameOutputManager, - DownloadSeekableOutputManager, - DownloadNonSeekableOutputManager, - ] - - fileobj = transfer_future.meta.call_args.fileobj - for download_manager_cls in download_manager_resolver_chain: - if download_manager_cls.is_compatible(fileobj, osutil): - return download_manager_cls - raise RuntimeError( + + +class DownloadSubmissionTask(SubmissionTask): + """Task for submitting tasks to execute a download""" + + def _get_download_output_manager_cls(self, transfer_future, osutil): + """Retrieves a class for managing output for a download + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future for the request + + :type osutil: s3transfer.utils.OSUtils + :param osutil: The os utility associated to the transfer + + :rtype: class of DownloadOutputManager + :returns: The appropriate class to use for managing a specific type of + input for downloads. + """ + download_manager_resolver_chain = [ + DownloadSpecialFilenameOutputManager, + DownloadFilenameOutputManager, + DownloadSeekableOutputManager, + DownloadNonSeekableOutputManager, + ] + + fileobj = transfer_future.meta.call_args.fileobj + for download_manager_cls in download_manager_resolver_chain: + if download_manager_cls.is_compatible(fileobj, osutil): + return download_manager_cls + raise RuntimeError( 'Output {} of type: {} is not supported.'.format( fileobj, type(fileobj) ) ) - + def _submit( self, client, @@ -322,52 +322,52 @@ class DownloadSubmissionTask(SubmissionTask): transfer_future, bandwidth_limiter=None, ): - """ - :param client: The client associated with the transfer manager - - :type config: s3transfer.manager.TransferConfig - :param config: The transfer config associated with the transfer - manager - - :type osutil: s3transfer.utils.OSUtil - :param osutil: The os utility associated to the transfer manager - - :type request_executor: s3transfer.futures.BoundedExecutor - :param request_executor: The request executor associated with the - transfer manager - - :type io_executor: s3transfer.futures.BoundedExecutor - :param io_executor: The io executor associated with the - transfer manager - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future associated with the - transfer request that tasks are being submitted for - - :type bandwidth_limiter: s3transfer.bandwidth.BandwidthLimiter - :param bandwidth_limiter: The bandwidth limiter to use when - downloading streams - """ - if transfer_future.meta.size is None: - # If a size was not provided figure out the size for the - # user. - response = client.head_object( - Bucket=transfer_future.meta.call_args.bucket, - Key=transfer_future.meta.call_args.key, + """ + :param client: The client associated with the transfer manager + + :type config: s3transfer.manager.TransferConfig + :param config: The transfer config associated with the transfer + manager + + :type osutil: s3transfer.utils.OSUtil + :param osutil: The os utility associated to the transfer manager + + :type request_executor: s3transfer.futures.BoundedExecutor + :param request_executor: The request executor associated with the + transfer manager + + :type io_executor: s3transfer.futures.BoundedExecutor + :param io_executor: The io executor associated with the + transfer manager + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future associated with the + transfer request that tasks are being submitted for + + :type bandwidth_limiter: s3transfer.bandwidth.BandwidthLimiter + :param bandwidth_limiter: The bandwidth limiter to use when + downloading streams + """ + if transfer_future.meta.size is None: + # If a size was not provided figure out the size for the + # user. + response = client.head_object( + Bucket=transfer_future.meta.call_args.bucket, + Key=transfer_future.meta.call_args.key, **transfer_future.meta.call_args.extra_args, - ) - transfer_future.meta.provide_transfer_size( + ) + transfer_future.meta.provide_transfer_size( response['ContentLength'] ) - - download_output_manager = self._get_download_output_manager_cls( + + download_output_manager = self._get_download_output_manager_cls( transfer_future, osutil )(osutil, self._transfer_coordinator, io_executor) - - # If it is greater than threshold do a ranged download, otherwise - # do a regular GetObject download. - if transfer_future.meta.size < config.multipart_threshold: - self._submit_download_request( + + # If it is greater than threshold do a ranged download, otherwise + # do a regular GetObject download. + if transfer_future.meta.size < config.multipart_threshold: + self._submit_download_request( client, config, osutil, @@ -377,8 +377,8 @@ class DownloadSubmissionTask(SubmissionTask): transfer_future, bandwidth_limiter, ) - else: - self._submit_ranged_download_request( + else: + self._submit_ranged_download_request( client, config, osutil, @@ -388,7 +388,7 @@ class DownloadSubmissionTask(SubmissionTask): transfer_future, bandwidth_limiter, ) - + def _submit_download_request( self, client, @@ -400,45 +400,45 @@ class DownloadSubmissionTask(SubmissionTask): transfer_future, bandwidth_limiter, ): - call_args = transfer_future.meta.call_args - - # Get a handle to the file that will be used for writing downloaded - # contents - fileobj = download_output_manager.get_fileobj_for_io_writes( + call_args = transfer_future.meta.call_args + + # Get a handle to the file that will be used for writing downloaded + # contents + fileobj = download_output_manager.get_fileobj_for_io_writes( transfer_future ) - - # Get the needed callbacks for the task - progress_callbacks = get_callbacks(transfer_future, 'progress') - - # Get any associated tags for the get object task. - get_object_tag = download_output_manager.get_download_task_tag() - - # Get the final io task to run once the download is complete. - final_task = download_output_manager.get_final_io_task() - - # Submit the task to download the object. - self._transfer_coordinator.submit( - request_executor, - ImmediatelyWriteIOGetObjectTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'fileobj': fileobj, - 'extra_args': call_args.extra_args, - 'callbacks': progress_callbacks, - 'max_attempts': config.num_download_attempts, - 'download_output_manager': download_output_manager, - 'io_chunksize': config.io_chunksize, + + # Get the needed callbacks for the task + progress_callbacks = get_callbacks(transfer_future, 'progress') + + # Get any associated tags for the get object task. + get_object_tag = download_output_manager.get_download_task_tag() + + # Get the final io task to run once the download is complete. + final_task = download_output_manager.get_final_io_task() + + # Submit the task to download the object. + self._transfer_coordinator.submit( + request_executor, + ImmediatelyWriteIOGetObjectTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'fileobj': fileobj, + 'extra_args': call_args.extra_args, + 'callbacks': progress_callbacks, + 'max_attempts': config.num_download_attempts, + 'download_output_manager': download_output_manager, + 'io_chunksize': config.io_chunksize, 'bandwidth_limiter': bandwidth_limiter, - }, + }, done_callbacks=[final_task], - ), + ), tag=get_object_tag, - ) - + ) + def _submit_ranged_download_request( self, client, @@ -450,86 +450,86 @@ class DownloadSubmissionTask(SubmissionTask): transfer_future, bandwidth_limiter, ): - call_args = transfer_future.meta.call_args - - # Get the needed progress callbacks for the task - progress_callbacks = get_callbacks(transfer_future, 'progress') - - # Get a handle to the file that will be used for writing downloaded - # contents - fileobj = download_output_manager.get_fileobj_for_io_writes( + call_args = transfer_future.meta.call_args + + # Get the needed progress callbacks for the task + progress_callbacks = get_callbacks(transfer_future, 'progress') + + # Get a handle to the file that will be used for writing downloaded + # contents + fileobj = download_output_manager.get_fileobj_for_io_writes( transfer_future ) - - # Determine the number of parts - part_size = config.multipart_chunksize + + # Determine the number of parts + part_size = config.multipart_chunksize num_parts = calculate_num_parts(transfer_future.meta.size, part_size) - - # Get any associated tags for the get object task. - get_object_tag = download_output_manager.get_download_task_tag() - - # Callback invoker to submit the final io task once all downloads - # are complete. - finalize_download_invoker = CountCallbackInvoker( - self._get_final_io_task_submission_callback( - download_output_manager, io_executor - ) - ) - for i in range(num_parts): - # Calculate the range parameter - range_parameter = calculate_range_parameter( + + # Get any associated tags for the get object task. + get_object_tag = download_output_manager.get_download_task_tag() + + # Callback invoker to submit the final io task once all downloads + # are complete. + finalize_download_invoker = CountCallbackInvoker( + self._get_final_io_task_submission_callback( + download_output_manager, io_executor + ) + ) + for i in range(num_parts): + # Calculate the range parameter + range_parameter = calculate_range_parameter( part_size, i, num_parts ) - - # Inject the Range parameter to the parameters to be passed in - # as extra args - extra_args = {'Range': range_parameter} - extra_args.update(call_args.extra_args) - finalize_download_invoker.increment() - # Submit the ranged downloads - self._transfer_coordinator.submit( - request_executor, - GetObjectTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'fileobj': fileobj, - 'extra_args': extra_args, - 'callbacks': progress_callbacks, - 'max_attempts': config.num_download_attempts, - 'start_index': i * part_size, - 'download_output_manager': download_output_manager, - 'io_chunksize': config.io_chunksize, + + # Inject the Range parameter to the parameters to be passed in + # as extra args + extra_args = {'Range': range_parameter} + extra_args.update(call_args.extra_args) + finalize_download_invoker.increment() + # Submit the ranged downloads + self._transfer_coordinator.submit( + request_executor, + GetObjectTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'fileobj': fileobj, + 'extra_args': extra_args, + 'callbacks': progress_callbacks, + 'max_attempts': config.num_download_attempts, + 'start_index': i * part_size, + 'download_output_manager': download_output_manager, + 'io_chunksize': config.io_chunksize, 'bandwidth_limiter': bandwidth_limiter, - }, + }, done_callbacks=[finalize_download_invoker.decrement], - ), + ), tag=get_object_tag, - ) - finalize_download_invoker.finalize() - + ) + finalize_download_invoker.finalize() + def _get_final_io_task_submission_callback( self, download_manager, io_executor ): - final_task = download_manager.get_final_io_task() - return FunctionContainer( + final_task = download_manager.get_final_io_task() + return FunctionContainer( self._transfer_coordinator.submit, io_executor, final_task ) - - def _calculate_range_param(self, part_size, part_index, num_parts): - # Used to calculate the Range parameter - start_range = part_index * part_size - if part_index == num_parts - 1: - end_range = '' - else: - end_range = start_range + part_size - 1 + + def _calculate_range_param(self, part_size, part_index, num_parts): + # Used to calculate the Range parameter + start_range = part_index * part_size + if part_index == num_parts - 1: + end_range = '' + else: + end_range = start_range + part_size - 1 range_param = f'bytes={start_range}-{end_range}' - return range_param - - -class GetObjectTask(Task): + return range_param + + +class GetObjectTask(Task): def _main( self, client, @@ -544,57 +544,57 @@ class GetObjectTask(Task): start_index=0, bandwidth_limiter=None, ): - """Downloads an object and places content into io queue - - :param client: The client to use when calling GetObject - :param bucket: The bucket to download from - :param key: The key to download from - :param fileobj: The file handle to write content to + """Downloads an object and places content into io queue + + :param client: The client to use when calling GetObject + :param bucket: The bucket to download from + :param key: The key to download from + :param fileobj: The file handle to write content to :param exta_args: Any extra arguments to include in GetObject request - :param callbacks: List of progress callbacks to invoke on download - :param max_attempts: The number of retries to do when downloading - :param download_output_manager: The download output manager associated - with the current download. - :param io_chunksize: The size of each io chunk to read from the - download stream and queue in the io queue. - :param start_index: The location in the file to start writing the - content of the key to. - :param bandwidth_limiter: The bandwidth limiter to use when throttling - the downloading of data in streams. - """ - last_exception = None - for i in range(max_attempts): - try: + :param callbacks: List of progress callbacks to invoke on download + :param max_attempts: The number of retries to do when downloading + :param download_output_manager: The download output manager associated + with the current download. + :param io_chunksize: The size of each io chunk to read from the + download stream and queue in the io queue. + :param start_index: The location in the file to start writing the + content of the key to. + :param bandwidth_limiter: The bandwidth limiter to use when throttling + the downloading of data in streams. + """ + last_exception = None + for i in range(max_attempts): + try: current_index = start_index - response = client.get_object( + response = client.get_object( Bucket=bucket, Key=key, **extra_args ) - streaming_body = StreamReaderProgress( + streaming_body = StreamReaderProgress( response['Body'], callbacks ) - if bandwidth_limiter: + if bandwidth_limiter: streaming_body = ( - bandwidth_limiter.get_bandwith_limited_stream( + bandwidth_limiter.get_bandwith_limited_stream( streaming_body, self._transfer_coordinator ) ) - - chunks = DownloadChunkIterator(streaming_body, io_chunksize) - for chunk in chunks: - # If the transfer is done because of a cancellation - # or error somewhere else, stop trying to submit more - # data to be written and break out of the download. - if not self._transfer_coordinator.done(): - self._handle_io( + + chunks = DownloadChunkIterator(streaming_body, io_chunksize) + for chunk in chunks: + # If the transfer is done because of a cancellation + # or error somewhere else, stop trying to submit more + # data to be written and break out of the download. + if not self._transfer_coordinator.done(): + self._handle_io( download_output_manager, fileobj, chunk, current_index, - ) - current_index += len(chunk) - else: - return - return + ) + current_index += len(chunk) + else: + return + return except S3_RETRYABLE_DOWNLOAD_ERRORS as e: logger.debug( "Retrying exception caught (%s), " @@ -604,91 +604,91 @@ class GetObjectTask(Task): max_attempts, exc_info=True, ) - last_exception = e - # Also invoke the progress callbacks to indicate that we - # are trying to download the stream again and all progress - # for this GetObject has been lost. - invoke_progress_callbacks( + last_exception = e + # Also invoke the progress callbacks to indicate that we + # are trying to download the stream again and all progress + # for this GetObject has been lost. + invoke_progress_callbacks( callbacks, start_index - current_index ) - continue - raise RetriesExceededError(last_exception) - - def _handle_io(self, download_output_manager, fileobj, chunk, index): - download_output_manager.queue_file_io_task(fileobj, chunk, index) - - -class ImmediatelyWriteIOGetObjectTask(GetObjectTask): - """GetObjectTask that immediately writes to the provided file object - - This is useful for downloads where it is known only one thread is - downloading the object so there is no reason to go through the - overhead of using an IO queue and executor. - """ - - def _handle_io(self, download_output_manager, fileobj, chunk, index): - task = download_output_manager.get_io_write_task(fileobj, chunk, index) - task() - - -class IOWriteTask(Task): - def _main(self, fileobj, data, offset): - """Pulls off an io queue to write contents to a file - - :param fileobj: The file handle to write content to - :param data: The data to write - :param offset: The offset to write the data to. - """ - fileobj.seek(offset) - fileobj.write(data) - - -class IOStreamingWriteTask(Task): - """Task for writing data to a non-seekable stream.""" - - def _main(self, fileobj, data): - """Write data to a fileobj. - + continue + raise RetriesExceededError(last_exception) + + def _handle_io(self, download_output_manager, fileobj, chunk, index): + download_output_manager.queue_file_io_task(fileobj, chunk, index) + + +class ImmediatelyWriteIOGetObjectTask(GetObjectTask): + """GetObjectTask that immediately writes to the provided file object + + This is useful for downloads where it is known only one thread is + downloading the object so there is no reason to go through the + overhead of using an IO queue and executor. + """ + + def _handle_io(self, download_output_manager, fileobj, chunk, index): + task = download_output_manager.get_io_write_task(fileobj, chunk, index) + task() + + +class IOWriteTask(Task): + def _main(self, fileobj, data, offset): + """Pulls off an io queue to write contents to a file + + :param fileobj: The file handle to write content to + :param data: The data to write + :param offset: The offset to write the data to. + """ + fileobj.seek(offset) + fileobj.write(data) + + +class IOStreamingWriteTask(Task): + """Task for writing data to a non-seekable stream.""" + + def _main(self, fileobj, data): + """Write data to a fileobj. + Data will be written directly to the fileobj without - any prior seeking. - - :param fileobj: The fileobj to write content to - :param data: The data to write - - """ - fileobj.write(data) - - -class IORenameFileTask(Task): - """A task to rename a temporary file to its final filename - - :param fileobj: The file handle that content was written to. - :param final_filename: The final name of the file to rename to - upon completion of writing the contents. - :param osutil: OS utility - """ - - def _main(self, fileobj, final_filename, osutil): - fileobj.close() - osutil.rename_file(fileobj.name, final_filename) - - -class IOCloseTask(Task): - """A task to close out a file once the download is complete. - - :param fileobj: The fileobj to close. - """ - - def _main(self, fileobj): - fileobj.close() - - -class CompleteDownloadNOOPTask(Task): - """A NOOP task to serve as an indicator that the download is complete - - Note that the default for is_final is set to True because this should - always be the last task. - """ + any prior seeking. + + :param fileobj: The fileobj to write content to + :param data: The data to write + + """ + fileobj.write(data) + + +class IORenameFileTask(Task): + """A task to rename a temporary file to its final filename + + :param fileobj: The file handle that content was written to. + :param final_filename: The final name of the file to rename to + upon completion of writing the contents. + :param osutil: OS utility + """ + + def _main(self, fileobj, final_filename, osutil): + fileobj.close() + osutil.rename_file(fileobj.name, final_filename) + + +class IOCloseTask(Task): + """A task to close out a file once the download is complete. + + :param fileobj: The fileobj to close. + """ + + def _main(self, fileobj): + fileobj.close() + + +class CompleteDownloadNOOPTask(Task): + """A NOOP task to serve as an indicator that the download is complete + + Note that the default for is_final is set to True because this should + always be the last task. + """ def __init__( self, @@ -699,92 +699,92 @@ class CompleteDownloadNOOPTask(Task): is_final=True, ): super().__init__( - transfer_coordinator=transfer_coordinator, - main_kwargs=main_kwargs, - pending_main_kwargs=pending_main_kwargs, - done_callbacks=done_callbacks, + transfer_coordinator=transfer_coordinator, + main_kwargs=main_kwargs, + pending_main_kwargs=pending_main_kwargs, + done_callbacks=done_callbacks, is_final=is_final, - ) - - def _main(self): - pass - - + ) + + def _main(self): + pass + + class DownloadChunkIterator: - def __init__(self, body, chunksize): - """Iterator to chunk out a downloaded S3 stream - - :param body: A readable file-like object - :param chunksize: The amount to read each time - """ - self._body = body - self._chunksize = chunksize - self._num_reads = 0 - - def __iter__(self): - return self - - def __next__(self): - chunk = self._body.read(self._chunksize) - self._num_reads += 1 - if chunk: - return chunk - elif self._num_reads == 1: - # Even though the response may have not had any - # content, we still want to account for an empty object's + def __init__(self, body, chunksize): + """Iterator to chunk out a downloaded S3 stream + + :param body: A readable file-like object + :param chunksize: The amount to read each time + """ + self._body = body + self._chunksize = chunksize + self._num_reads = 0 + + def __iter__(self): + return self + + def __next__(self): + chunk = self._body.read(self._chunksize) + self._num_reads += 1 + if chunk: + return chunk + elif self._num_reads == 1: + # Even though the response may have not had any + # content, we still want to account for an empty object's # existence so return the empty chunk for that initial - # read. - return chunk - raise StopIteration() - - next = __next__ - - + # read. + return chunk + raise StopIteration() + + next = __next__ + + class DeferQueue: - """IO queue that defers write requests until they are queued sequentially. - - This class is used to track IO data for a *single* fileobj. - - You can send data to this queue, and it will defer any IO write requests - until it has the next contiguous block available (starting at 0). - - """ - - def __init__(self): - self._writes = [] - self._pending_offsets = set() - self._next_offset = 0 - - def request_writes(self, offset, data): - """Request any available writes given new incoming data. - - You call this method by providing new data along with the - offset associated with the data. If that new data unlocks - any contiguous writes that can now be submitted, this - method will return all applicable writes. - - This is done with 1 method call so you don't have to - make two method calls (put(), get()) which acquires a lock - each method call. - - """ - if offset < self._next_offset: - # This is a request for a write that we've already - # seen. This can happen in the event of a retry - # where if we retry at at offset N/2, we'll requeue - # offsets 0-N/2 again. - return [] - writes = [] - if offset in self._pending_offsets: - # We've already queued this offset so this request is - # a duplicate. In this case we should ignore - # this request and prefer what's already queued. - return [] - heapq.heappush(self._writes, (offset, data)) - self._pending_offsets.add(offset) - while self._writes and self._writes[0][0] == self._next_offset: - next_write = heapq.heappop(self._writes) - writes.append({'offset': next_write[0], 'data': next_write[1]}) - self._pending_offsets.remove(next_write[0]) - self._next_offset += len(next_write[1]) - return writes + """IO queue that defers write requests until they are queued sequentially. + + This class is used to track IO data for a *single* fileobj. + + You can send data to this queue, and it will defer any IO write requests + until it has the next contiguous block available (starting at 0). + + """ + + def __init__(self): + self._writes = [] + self._pending_offsets = set() + self._next_offset = 0 + + def request_writes(self, offset, data): + """Request any available writes given new incoming data. + + You call this method by providing new data along with the + offset associated with the data. If that new data unlocks + any contiguous writes that can now be submitted, this + method will return all applicable writes. + + This is done with 1 method call so you don't have to + make two method calls (put(), get()) which acquires a lock + each method call. + + """ + if offset < self._next_offset: + # This is a request for a write that we've already + # seen. This can happen in the event of a retry + # where if we retry at at offset N/2, we'll requeue + # offsets 0-N/2 again. + return [] + writes = [] + if offset in self._pending_offsets: + # We've already queued this offset so this request is + # a duplicate. In this case we should ignore + # this request and prefer what's already queued. + return [] + heapq.heappush(self._writes, (offset, data)) + self._pending_offsets.add(offset) + while self._writes and self._writes[0][0] == self._next_offset: + next_write = heapq.heappop(self._writes) + writes.append({'offset': next_write[0], 'data': next_write[1]}) + self._pending_offsets.remove(next_write[0]) + self._next_offset += len(next_write[1]) + return writes diff --git a/contrib/python/s3transfer/py3/s3transfer/exceptions.py b/contrib/python/s3transfer/py3/s3transfer/exceptions.py index ef48766f5f..6150fe650d 100644 --- a/contrib/python/s3transfer/py3/s3transfer/exceptions.py +++ b/contrib/python/s3transfer/py3/s3transfer/exceptions.py @@ -1,37 +1,37 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -from concurrent.futures import CancelledError - - -class RetriesExceededError(Exception): - def __init__(self, last_exception, msg='Max Retries Exceeded'): +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from concurrent.futures import CancelledError + + +class RetriesExceededError(Exception): + def __init__(self, last_exception, msg='Max Retries Exceeded'): super().__init__(msg) - self.last_exception = last_exception - - -class S3UploadFailedError(Exception): - pass - - -class InvalidSubscriberMethodError(Exception): - pass - - -class TransferNotDoneError(Exception): - pass - - -class FatalError(CancelledError): - """A CancelledError raised from an error in the TransferManager""" - - pass + self.last_exception = last_exception + + +class S3UploadFailedError(Exception): + pass + + +class InvalidSubscriberMethodError(Exception): + pass + + +class TransferNotDoneError(Exception): + pass + + +class FatalError(CancelledError): + """A CancelledError raised from an error in the TransferManager""" + + pass diff --git a/contrib/python/s3transfer/py3/s3transfer/futures.py b/contrib/python/s3transfer/py3/s3transfer/futures.py index a56c9e3215..39e071fb60 100644 --- a/contrib/python/s3transfer/py3/s3transfer/futures.py +++ b/contrib/python/s3transfer/py3/s3transfer/futures.py @@ -1,29 +1,29 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import copy -import logging -import sys -import threading +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import copy +import logging +import sys +import threading from collections import namedtuple from concurrent import futures - -from s3transfer.compat import MAXINT -from s3transfer.exceptions import CancelledError, TransferNotDoneError + +from s3transfer.compat import MAXINT +from s3transfer.exceptions import CancelledError, TransferNotDoneError from s3transfer.utils import FunctionContainer, TaskSemaphore - -logger = logging.getLogger(__name__) - - + +logger = logging.getLogger(__name__) + + class BaseTransferFuture: @property def meta(self): @@ -69,538 +69,538 @@ class BaseTransferMeta: class TransferFuture(BaseTransferFuture): - def __init__(self, meta=None, coordinator=None): - """The future associated to a submitted transfer request - - :type meta: TransferMeta - :param meta: The metadata associated to the request. This object - is visible to the requester. - - :type coordinator: TransferCoordinator - :param coordinator: The coordinator associated to the request. This - object is not visible to the requester. - """ - self._meta = meta - if meta is None: - self._meta = TransferMeta() - - self._coordinator = coordinator - if coordinator is None: - self._coordinator = TransferCoordinator() - - @property - def meta(self): - return self._meta - - def done(self): - return self._coordinator.done() - - def result(self): - try: - # Usually the result() method blocks until the transfer is done, - # however if a KeyboardInterrupt is raised we want want to exit + def __init__(self, meta=None, coordinator=None): + """The future associated to a submitted transfer request + + :type meta: TransferMeta + :param meta: The metadata associated to the request. This object + is visible to the requester. + + :type coordinator: TransferCoordinator + :param coordinator: The coordinator associated to the request. This + object is not visible to the requester. + """ + self._meta = meta + if meta is None: + self._meta = TransferMeta() + + self._coordinator = coordinator + if coordinator is None: + self._coordinator = TransferCoordinator() + + @property + def meta(self): + return self._meta + + def done(self): + return self._coordinator.done() + + def result(self): + try: + # Usually the result() method blocks until the transfer is done, + # however if a KeyboardInterrupt is raised we want want to exit # out of this and propagate the exception. - return self._coordinator.result() - except KeyboardInterrupt as e: - self.cancel() - raise e - - def cancel(self): - self._coordinator.cancel() - - def set_exception(self, exception): - """Sets the exception on the future.""" - if not self.done(): - raise TransferNotDoneError( - 'set_exception can only be called once the transfer is ' + return self._coordinator.result() + except KeyboardInterrupt as e: + self.cancel() + raise e + + def cancel(self): + self._coordinator.cancel() + + def set_exception(self, exception): + """Sets the exception on the future.""" + if not self.done(): + raise TransferNotDoneError( + 'set_exception can only be called once the transfer is ' 'complete.' ) - self._coordinator.set_exception(exception, override=True) - - + self._coordinator.set_exception(exception, override=True) + + class TransferMeta(BaseTransferMeta): - """Holds metadata about the TransferFuture""" - - def __init__(self, call_args=None, transfer_id=None): - self._call_args = call_args - self._transfer_id = transfer_id - self._size = None - self._user_context = {} - - @property - def call_args(self): - """The call args used in the transfer request""" - return self._call_args - - @property - def transfer_id(self): - """The unique id of the transfer""" - return self._transfer_id - - @property - def size(self): - """The size of the transfer request if known""" - return self._size - - @property - def user_context(self): - """A dictionary that requesters can store data in""" - return self._user_context - - def provide_transfer_size(self, size): - """A method to provide the size of a transfer request - - By providing this value, the TransferManager will not try to - call HeadObject or use the use OS to determine the size of the - transfer. - """ - self._size = size - - + """Holds metadata about the TransferFuture""" + + def __init__(self, call_args=None, transfer_id=None): + self._call_args = call_args + self._transfer_id = transfer_id + self._size = None + self._user_context = {} + + @property + def call_args(self): + """The call args used in the transfer request""" + return self._call_args + + @property + def transfer_id(self): + """The unique id of the transfer""" + return self._transfer_id + + @property + def size(self): + """The size of the transfer request if known""" + return self._size + + @property + def user_context(self): + """A dictionary that requesters can store data in""" + return self._user_context + + def provide_transfer_size(self, size): + """A method to provide the size of a transfer request + + By providing this value, the TransferManager will not try to + call HeadObject or use the use OS to determine the size of the + transfer. + """ + self._size = size + + class TransferCoordinator: - """A helper class for managing TransferFuture""" - - def __init__(self, transfer_id=None): - self.transfer_id = transfer_id - self._status = 'not-started' - self._result = None - self._exception = None - self._associated_futures = set() - self._failure_cleanups = [] - self._done_callbacks = [] - self._done_event = threading.Event() - self._lock = threading.Lock() - self._associated_futures_lock = threading.Lock() - self._done_callbacks_lock = threading.Lock() - self._failure_cleanups_lock = threading.Lock() - - def __repr__(self): + """A helper class for managing TransferFuture""" + + def __init__(self, transfer_id=None): + self.transfer_id = transfer_id + self._status = 'not-started' + self._result = None + self._exception = None + self._associated_futures = set() + self._failure_cleanups = [] + self._done_callbacks = [] + self._done_event = threading.Event() + self._lock = threading.Lock() + self._associated_futures_lock = threading.Lock() + self._done_callbacks_lock = threading.Lock() + self._failure_cleanups_lock = threading.Lock() + + def __repr__(self): return '{}(transfer_id={})'.format( self.__class__.__name__, self.transfer_id ) - - @property - def exception(self): - return self._exception - - @property - def associated_futures(self): - """The list of futures associated to the inprogress TransferFuture - - Once the transfer finishes this list becomes empty as the transfer - is considered done and there should be no running futures left. - """ - with self._associated_futures_lock: - # We return a copy of the list because we do not want to - # processing the returned list while another thread is adding - # more futures to the actual list. - return copy.copy(self._associated_futures) - - @property - def failure_cleanups(self): - """The list of callbacks to call when the TransferFuture fails""" - return self._failure_cleanups - - @property - def status(self): - """The status of the TransferFuture - - The currently supported states are: - * not-started - Has yet to start. If in this state, a transfer - can be canceled immediately and nothing will happen. - * queued - SubmissionTask is about to submit tasks - * running - Is inprogress. In-progress as of now means that - the SubmissionTask that runs the transfer is being executed. So - there is no guarantee any transfer requests had been made to - S3 if this state is reached. - * cancelled - Was cancelled - * failed - An exception other than CancelledError was thrown - * success - No exceptions were thrown and is done. - """ - return self._status - - def set_result(self, result): - """Set a result for the TransferFuture - - Implies that the TransferFuture succeeded. This will always set a - result because it is invoked on the final task where there is only - ever one final task and it is ran at the very end of a transfer - process. So if a result is being set for this final task, the transfer - succeeded even if something came a long and canceled the transfer - on the final task. - """ - with self._lock: - self._exception = None - self._result = result - self._status = 'success' - - def set_exception(self, exception, override=False): - """Set an exception for the TransferFuture - - Implies the TransferFuture failed. - - :param exception: The exception that cause the transfer to fail. - :param override: If True, override any existing state. - """ - with self._lock: - if not self.done() or override: - self._exception = exception - self._status = 'failed' - - def result(self): - """Waits until TransferFuture is done and returns the result - - If the TransferFuture succeeded, it will return the result. If the - TransferFuture failed, it will raise the exception associated to the - failure. - """ - # Doing a wait() with no timeout cannot be interrupted in python2 but - # can be interrupted in python3 so we just wait with the largest - # possible value integer value, which is on the scale of billions of - # years... - self._done_event.wait(MAXINT) - - # Once done waiting, raise an exception if present or return the - # final result. - if self._exception: - raise self._exception - return self._result - - def cancel(self, msg='', exc_type=CancelledError): - """Cancels the TransferFuture - - :param msg: The message to attach to the cancellation - :param exc_type: The type of exception to set for the cancellation - """ - with self._lock: - if not self.done(): - should_announce_done = False - logger.debug('%s cancel(%s) called', self, msg) - self._exception = exc_type(msg) - if self._status == 'not-started': - should_announce_done = True - self._status = 'cancelled' - if should_announce_done: - self.announce_done() - - def set_status_to_queued(self): - """Sets the TransferFutrue's status to running""" - self._transition_to_non_done_state('queued') - - def set_status_to_running(self): - """Sets the TransferFuture's status to running""" - self._transition_to_non_done_state('running') - - def _transition_to_non_done_state(self, desired_state): - with self._lock: - if self.done(): - raise RuntimeError( - 'Unable to transition from done state %s to non-done ' + + @property + def exception(self): + return self._exception + + @property + def associated_futures(self): + """The list of futures associated to the inprogress TransferFuture + + Once the transfer finishes this list becomes empty as the transfer + is considered done and there should be no running futures left. + """ + with self._associated_futures_lock: + # We return a copy of the list because we do not want to + # processing the returned list while another thread is adding + # more futures to the actual list. + return copy.copy(self._associated_futures) + + @property + def failure_cleanups(self): + """The list of callbacks to call when the TransferFuture fails""" + return self._failure_cleanups + + @property + def status(self): + """The status of the TransferFuture + + The currently supported states are: + * not-started - Has yet to start. If in this state, a transfer + can be canceled immediately and nothing will happen. + * queued - SubmissionTask is about to submit tasks + * running - Is inprogress. In-progress as of now means that + the SubmissionTask that runs the transfer is being executed. So + there is no guarantee any transfer requests had been made to + S3 if this state is reached. + * cancelled - Was cancelled + * failed - An exception other than CancelledError was thrown + * success - No exceptions were thrown and is done. + """ + return self._status + + def set_result(self, result): + """Set a result for the TransferFuture + + Implies that the TransferFuture succeeded. This will always set a + result because it is invoked on the final task where there is only + ever one final task and it is ran at the very end of a transfer + process. So if a result is being set for this final task, the transfer + succeeded even if something came a long and canceled the transfer + on the final task. + """ + with self._lock: + self._exception = None + self._result = result + self._status = 'success' + + def set_exception(self, exception, override=False): + """Set an exception for the TransferFuture + + Implies the TransferFuture failed. + + :param exception: The exception that cause the transfer to fail. + :param override: If True, override any existing state. + """ + with self._lock: + if not self.done() or override: + self._exception = exception + self._status = 'failed' + + def result(self): + """Waits until TransferFuture is done and returns the result + + If the TransferFuture succeeded, it will return the result. If the + TransferFuture failed, it will raise the exception associated to the + failure. + """ + # Doing a wait() with no timeout cannot be interrupted in python2 but + # can be interrupted in python3 so we just wait with the largest + # possible value integer value, which is on the scale of billions of + # years... + self._done_event.wait(MAXINT) + + # Once done waiting, raise an exception if present or return the + # final result. + if self._exception: + raise self._exception + return self._result + + def cancel(self, msg='', exc_type=CancelledError): + """Cancels the TransferFuture + + :param msg: The message to attach to the cancellation + :param exc_type: The type of exception to set for the cancellation + """ + with self._lock: + if not self.done(): + should_announce_done = False + logger.debug('%s cancel(%s) called', self, msg) + self._exception = exc_type(msg) + if self._status == 'not-started': + should_announce_done = True + self._status = 'cancelled' + if should_announce_done: + self.announce_done() + + def set_status_to_queued(self): + """Sets the TransferFutrue's status to running""" + self._transition_to_non_done_state('queued') + + def set_status_to_running(self): + """Sets the TransferFuture's status to running""" + self._transition_to_non_done_state('running') + + def _transition_to_non_done_state(self, desired_state): + with self._lock: + if self.done(): + raise RuntimeError( + 'Unable to transition from done state %s to non-done ' 'state %s.' % (self.status, desired_state) ) - self._status = desired_state - - def submit(self, executor, task, tag=None): - """Submits a task to a provided executor - - :type executor: s3transfer.futures.BoundedExecutor - :param executor: The executor to submit the callable to - - :type task: s3transfer.tasks.Task - :param task: The task to submit to the executor - - :type tag: s3transfer.futures.TaskTag - :param tag: A tag to associate to the submitted task - - :rtype: concurrent.futures.Future - :returns: A future representing the submitted task - """ - logger.debug( + self._status = desired_state + + def submit(self, executor, task, tag=None): + """Submits a task to a provided executor + + :type executor: s3transfer.futures.BoundedExecutor + :param executor: The executor to submit the callable to + + :type task: s3transfer.tasks.Task + :param task: The task to submit to the executor + + :type tag: s3transfer.futures.TaskTag + :param tag: A tag to associate to the submitted task + + :rtype: concurrent.futures.Future + :returns: A future representing the submitted task + """ + logger.debug( "Submitting task {} to executor {} for transfer request: {}.".format( task, executor, self.transfer_id ) - ) - future = executor.submit(task, tag=tag) - # Add this created future to the list of associated future just - # in case it is needed during cleanups. - self.add_associated_future(future) - future.add_done_callback( + ) + future = executor.submit(task, tag=tag) + # Add this created future to the list of associated future just + # in case it is needed during cleanups. + self.add_associated_future(future) + future.add_done_callback( FunctionContainer(self.remove_associated_future, future) ) - return future - - def done(self): - """Determines if a TransferFuture has completed - - :returns: False if status is equal to 'failed', 'cancelled', or - 'success'. True, otherwise - """ - return self.status in ['failed', 'cancelled', 'success'] - - def add_associated_future(self, future): - """Adds a future to be associated with the TransferFuture""" - with self._associated_futures_lock: - self._associated_futures.add(future) - - def remove_associated_future(self, future): - """Removes a future's association to the TransferFuture""" - with self._associated_futures_lock: - self._associated_futures.remove(future) - - def add_done_callback(self, function, *args, **kwargs): - """Add a done callback to be invoked when transfer is done""" - with self._done_callbacks_lock: - self._done_callbacks.append( - FunctionContainer(function, *args, **kwargs) - ) - - def add_failure_cleanup(self, function, *args, **kwargs): - """Adds a callback to call upon failure""" - with self._failure_cleanups_lock: - self._failure_cleanups.append( + return future + + def done(self): + """Determines if a TransferFuture has completed + + :returns: False if status is equal to 'failed', 'cancelled', or + 'success'. True, otherwise + """ + return self.status in ['failed', 'cancelled', 'success'] + + def add_associated_future(self, future): + """Adds a future to be associated with the TransferFuture""" + with self._associated_futures_lock: + self._associated_futures.add(future) + + def remove_associated_future(self, future): + """Removes a future's association to the TransferFuture""" + with self._associated_futures_lock: + self._associated_futures.remove(future) + + def add_done_callback(self, function, *args, **kwargs): + """Add a done callback to be invoked when transfer is done""" + with self._done_callbacks_lock: + self._done_callbacks.append( + FunctionContainer(function, *args, **kwargs) + ) + + def add_failure_cleanup(self, function, *args, **kwargs): + """Adds a callback to call upon failure""" + with self._failure_cleanups_lock: + self._failure_cleanups.append( FunctionContainer(function, *args, **kwargs) ) - - def announce_done(self): - """Announce that future is done running and run associated callbacks - - This will run any failure cleanups if the transfer failed if not - they have not been run, allows the result() to be unblocked, and will - run any done callbacks associated to the TransferFuture if they have - not already been ran. - """ - if self.status != 'success': - self._run_failure_cleanups() - self._done_event.set() - self._run_done_callbacks() - - def _run_done_callbacks(self): - # Run the callbacks and remove the callbacks from the internal - # list so they do not get ran again if done is announced more than - # once. - with self._done_callbacks_lock: - self._run_callbacks(self._done_callbacks) - self._done_callbacks = [] - - def _run_failure_cleanups(self): - # Run the cleanup callbacks and remove the callbacks from the internal - # list so they do not get ran again if done is announced more than - # once. - with self._failure_cleanups_lock: - self._run_callbacks(self.failure_cleanups) - self._failure_cleanups = [] - - def _run_callbacks(self, callbacks): - for callback in callbacks: - self._run_callback(callback) - - def _run_callback(self, callback): - try: - callback() - # We do not want a callback interrupting the process, especially + + def announce_done(self): + """Announce that future is done running and run associated callbacks + + This will run any failure cleanups if the transfer failed if not + they have not been run, allows the result() to be unblocked, and will + run any done callbacks associated to the TransferFuture if they have + not already been ran. + """ + if self.status != 'success': + self._run_failure_cleanups() + self._done_event.set() + self._run_done_callbacks() + + def _run_done_callbacks(self): + # Run the callbacks and remove the callbacks from the internal + # list so they do not get ran again if done is announced more than + # once. + with self._done_callbacks_lock: + self._run_callbacks(self._done_callbacks) + self._done_callbacks = [] + + def _run_failure_cleanups(self): + # Run the cleanup callbacks and remove the callbacks from the internal + # list so they do not get ran again if done is announced more than + # once. + with self._failure_cleanups_lock: + self._run_callbacks(self.failure_cleanups) + self._failure_cleanups = [] + + def _run_callbacks(self, callbacks): + for callback in callbacks: + self._run_callback(callback) + + def _run_callback(self, callback): + try: + callback() + # We do not want a callback interrupting the process, especially # in the failure cleanups. So log and catch, the exception. - except Exception: - logger.debug("Exception raised in %s." % callback, exc_info=True) - - + except Exception: + logger.debug("Exception raised in %s." % callback, exc_info=True) + + class BoundedExecutor: - EXECUTOR_CLS = futures.ThreadPoolExecutor - + EXECUTOR_CLS = futures.ThreadPoolExecutor + def __init__( self, max_size, max_num_threads, tag_semaphores=None, executor_cls=None ): """An executor implementation that has a maximum queued up tasks - - The executor will block if the number of tasks that have been - submitted and is currently working on is past its maximum. - - :params max_size: The maximum number of inflight futures. An inflight - future means that the task is either queued up or is currently - being executed. A size of None or 0 means that the executor will - have no bound in terms of the number of inflight futures. - - :params max_num_threads: The maximum number of threads the executor - uses. - - :type tag_semaphores: dict - :params tag_semaphores: A dictionary where the key is the name of the - tag and the value is the semaphore to use when limiting the - number of tasks the executor is processing at a time. - - :type executor_cls: BaseExecutor - :param underlying_executor_cls: The executor class that - get bounded by this executor. If None is provided, the - concurrent.futures.ThreadPoolExecutor class is used. - """ - self._max_num_threads = max_num_threads - if executor_cls is None: - executor_cls = self.EXECUTOR_CLS - self._executor = executor_cls(max_workers=self._max_num_threads) - self._semaphore = TaskSemaphore(max_size) - self._tag_semaphores = tag_semaphores - - def submit(self, task, tag=None, block=True): - """Submit a task to complete - - :type task: s3transfer.tasks.Task - :param task: The task to run __call__ on - - - :type tag: s3transfer.futures.TaskTag - :param tag: An optional tag to associate to the task. This - is used to override which semaphore to use. - - :type block: boolean - :param block: True if to wait till it is possible to submit a task. - False, if not to wait and raise an error if not able to submit - a task. - + + The executor will block if the number of tasks that have been + submitted and is currently working on is past its maximum. + + :params max_size: The maximum number of inflight futures. An inflight + future means that the task is either queued up or is currently + being executed. A size of None or 0 means that the executor will + have no bound in terms of the number of inflight futures. + + :params max_num_threads: The maximum number of threads the executor + uses. + + :type tag_semaphores: dict + :params tag_semaphores: A dictionary where the key is the name of the + tag and the value is the semaphore to use when limiting the + number of tasks the executor is processing at a time. + + :type executor_cls: BaseExecutor + :param underlying_executor_cls: The executor class that + get bounded by this executor. If None is provided, the + concurrent.futures.ThreadPoolExecutor class is used. + """ + self._max_num_threads = max_num_threads + if executor_cls is None: + executor_cls = self.EXECUTOR_CLS + self._executor = executor_cls(max_workers=self._max_num_threads) + self._semaphore = TaskSemaphore(max_size) + self._tag_semaphores = tag_semaphores + + def submit(self, task, tag=None, block=True): + """Submit a task to complete + + :type task: s3transfer.tasks.Task + :param task: The task to run __call__ on + + + :type tag: s3transfer.futures.TaskTag + :param tag: An optional tag to associate to the task. This + is used to override which semaphore to use. + + :type block: boolean + :param block: True if to wait till it is possible to submit a task. + False, if not to wait and raise an error if not able to submit + a task. + :returns: The future associated to the submitted task - """ - semaphore = self._semaphore - # If a tag was provided, use the semaphore associated to that - # tag. - if tag: - semaphore = self._tag_semaphores[tag] - - # Call acquire on the semaphore. - acquire_token = semaphore.acquire(task.transfer_id, block) - # Create a callback to invoke when task is done in order to call - # release on the semaphore. - release_callback = FunctionContainer( + """ + semaphore = self._semaphore + # If a tag was provided, use the semaphore associated to that + # tag. + if tag: + semaphore = self._tag_semaphores[tag] + + # Call acquire on the semaphore. + acquire_token = semaphore.acquire(task.transfer_id, block) + # Create a callback to invoke when task is done in order to call + # release on the semaphore. + release_callback = FunctionContainer( semaphore.release, task.transfer_id, acquire_token ) - # Submit the task to the underlying executor. - future = ExecutorFuture(self._executor.submit(task)) - # Add the Semaphore.release() callback to the future such that - # it is invoked once the future completes. - future.add_done_callback(release_callback) - return future - - def shutdown(self, wait=True): - self._executor.shutdown(wait) - - + # Submit the task to the underlying executor. + future = ExecutorFuture(self._executor.submit(task)) + # Add the Semaphore.release() callback to the future such that + # it is invoked once the future completes. + future.add_done_callback(release_callback) + return future + + def shutdown(self, wait=True): + self._executor.shutdown(wait) + + class ExecutorFuture: - def __init__(self, future): - """A future returned from the executor - - Currently, it is just a wrapper around a concurrent.futures.Future. - However, this can eventually grow to implement the needed functionality - of concurrent.futures.Future if we move off of the library and not - affect the rest of the codebase. - - :type future: concurrent.futures.Future - :param future: The underlying future - """ - self._future = future - - def result(self): - return self._future.result() - - def add_done_callback(self, fn): - """Adds a callback to be completed once future is done - + def __init__(self, future): + """A future returned from the executor + + Currently, it is just a wrapper around a concurrent.futures.Future. + However, this can eventually grow to implement the needed functionality + of concurrent.futures.Future if we move off of the library and not + affect the rest of the codebase. + + :type future: concurrent.futures.Future + :param future: The underlying future + """ + self._future = future + + def result(self): + return self._future.result() + + def add_done_callback(self, fn): + """Adds a callback to be completed once future is done + :param fn: A callable that takes no arguments. Note that is different - than concurrent.futures.Future.add_done_callback that requires - a single argument for the future. - """ - # The done callback for concurrent.futures.Future will always pass a - # the future in as the only argument. So we need to create the - # proper signature wrapper that will invoke the callback provided. - def done_callback(future_passed_to_callback): - return fn() - - self._future.add_done_callback(done_callback) - - def done(self): - return self._future.done() - - + than concurrent.futures.Future.add_done_callback that requires + a single argument for the future. + """ + # The done callback for concurrent.futures.Future will always pass a + # the future in as the only argument. So we need to create the + # proper signature wrapper that will invoke the callback provided. + def done_callback(future_passed_to_callback): + return fn() + + self._future.add_done_callback(done_callback) + + def done(self): + return self._future.done() + + class BaseExecutor: - """Base Executor class implementation needed to work with s3transfer""" - - def __init__(self, max_workers=None): - pass - - def submit(self, fn, *args, **kwargs): - raise NotImplementedError('submit()') - - def shutdown(self, wait=True): - raise NotImplementedError('shutdown()') - - -class NonThreadedExecutor(BaseExecutor): - """A drop-in replacement non-threaded version of ThreadPoolExecutor""" - - def submit(self, fn, *args, **kwargs): - future = NonThreadedExecutorFuture() - try: - result = fn(*args, **kwargs) - future.set_result(result) - except Exception: - e, tb = sys.exc_info()[1:] - logger.debug( - 'Setting exception for %s to %s with traceback %s', + """Base Executor class implementation needed to work with s3transfer""" + + def __init__(self, max_workers=None): + pass + + def submit(self, fn, *args, **kwargs): + raise NotImplementedError('submit()') + + def shutdown(self, wait=True): + raise NotImplementedError('shutdown()') + + +class NonThreadedExecutor(BaseExecutor): + """A drop-in replacement non-threaded version of ThreadPoolExecutor""" + + def submit(self, fn, *args, **kwargs): + future = NonThreadedExecutorFuture() + try: + result = fn(*args, **kwargs) + future.set_result(result) + except Exception: + e, tb = sys.exc_info()[1:] + logger.debug( + 'Setting exception for %s to %s with traceback %s', future, e, tb, - ) - future.set_exception_info(e, tb) - return future - - def shutdown(self, wait=True): - pass - - + ) + future.set_exception_info(e, tb) + return future + + def shutdown(self, wait=True): + pass + + class NonThreadedExecutorFuture: - """The Future returned from NonThreadedExecutor - - Note that this future is **not** thread-safe as it is being used - from the context of a non-threaded environment. - """ - - def __init__(self): - self._result = None - self._exception = None - self._traceback = None - self._done = False - self._done_callbacks = [] - - def set_result(self, result): - self._result = result - self._set_done() - - def set_exception_info(self, exception, traceback): - self._exception = exception - self._traceback = traceback - self._set_done() - - def result(self, timeout=None): - if self._exception: + """The Future returned from NonThreadedExecutor + + Note that this future is **not** thread-safe as it is being used + from the context of a non-threaded environment. + """ + + def __init__(self): + self._result = None + self._exception = None + self._traceback = None + self._done = False + self._done_callbacks = [] + + def set_result(self, result): + self._result = result + self._set_done() + + def set_exception_info(self, exception, traceback): + self._exception = exception + self._traceback = traceback + self._set_done() + + def result(self, timeout=None): + if self._exception: raise self._exception.with_traceback(self._traceback) - return self._result - - def _set_done(self): - self._done = True - for done_callback in self._done_callbacks: - self._invoke_done_callback(done_callback) - self._done_callbacks = [] - - def _invoke_done_callback(self, done_callback): - return done_callback(self) - - def done(self): - return self._done - - def add_done_callback(self, fn): - if self._done: - self._invoke_done_callback(fn) - else: - self._done_callbacks.append(fn) - - -TaskTag = namedtuple('TaskTag', ['name']) - -IN_MEMORY_UPLOAD_TAG = TaskTag('in_memory_upload') -IN_MEMORY_DOWNLOAD_TAG = TaskTag('in_memory_download') + return self._result + + def _set_done(self): + self._done = True + for done_callback in self._done_callbacks: + self._invoke_done_callback(done_callback) + self._done_callbacks = [] + + def _invoke_done_callback(self, done_callback): + return done_callback(self) + + def done(self): + return self._done + + def add_done_callback(self, fn): + if self._done: + self._invoke_done_callback(fn) + else: + self._done_callbacks.append(fn) + + +TaskTag = namedtuple('TaskTag', ['name']) + +IN_MEMORY_UPLOAD_TAG = TaskTag('in_memory_upload') +IN_MEMORY_DOWNLOAD_TAG = TaskTag('in_memory_download') diff --git a/contrib/python/s3transfer/py3/s3transfer/manager.py b/contrib/python/s3transfer/py3/s3transfer/manager.py index 6ed7ec8711..ff6afa12c1 100644 --- a/contrib/python/s3transfer/py3/s3transfer/manager.py +++ b/contrib/python/s3transfer/py3/s3transfer/manager.py @@ -1,25 +1,25 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import copy -import logging +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import copy +import logging import re -import threading - +import threading + from s3transfer.bandwidth import BandwidthLimiter, LeakyBucket from s3transfer.constants import ALLOWED_DOWNLOAD_ARGS, KB, MB from s3transfer.copies import CopySubmissionTask from s3transfer.delete import DeleteSubmissionTask -from s3transfer.download import DownloadSubmissionTask +from s3transfer.download import DownloadSubmissionTask from s3transfer.exceptions import CancelledError, FatalError from s3transfer.futures import ( IN_MEMORY_DOWNLOAD_TAG, @@ -29,7 +29,7 @@ from s3transfer.futures import ( TransferFuture, TransferMeta, ) -from s3transfer.upload import UploadSubmissionTask +from s3transfer.upload import UploadSubmissionTask from s3transfer.utils import ( CallArgs, OSUtils, @@ -39,10 +39,10 @@ from s3transfer.utils import ( signal_not_transferring, signal_transferring, ) - -logger = logging.getLogger(__name__) - - + +logger = logging.getLogger(__name__) + + class TransferConfig: def __init__( self, @@ -60,147 +60,147 @@ class TransferConfig: max_bandwidth=None, ): """Configurations for the transfer manager - - :param multipart_threshold: The threshold for which multipart - transfers occur. - - :param max_request_concurrency: The maximum number of S3 API - transfer-related requests that can happen at a time. - - :param max_submission_concurrency: The maximum number of threads - processing a call to a TransferManager method. Processing a - call usually entails determining which S3 API requests that need - to be enqueued, but does **not** entail making any of the + + :param multipart_threshold: The threshold for which multipart + transfers occur. + + :param max_request_concurrency: The maximum number of S3 API + transfer-related requests that can happen at a time. + + :param max_submission_concurrency: The maximum number of threads + processing a call to a TransferManager method. Processing a + call usually entails determining which S3 API requests that need + to be enqueued, but does **not** entail making any of the S3 API data transferring requests needed to perform the transfer. - The threads controlled by ``max_request_concurrency`` is - responsible for that. - - :param multipart_chunksize: The size of each transfer if a request - becomes a multipart transfer. - - :param max_request_queue_size: The maximum amount of S3 API requests + The threads controlled by ``max_request_concurrency`` is + responsible for that. + + :param multipart_chunksize: The size of each transfer if a request + becomes a multipart transfer. + + :param max_request_queue_size: The maximum amount of S3 API requests that can be queued at a time. - - :param max_submission_queue_size: The maximum amount of + + :param max_submission_queue_size: The maximum amount of TransferManager method calls that can be queued at a time. - - :param max_io_queue_size: The maximum amount of read parts that + + :param max_io_queue_size: The maximum amount of read parts that can be queued to be written to disk per download. The default size for each elementin this queue is 8 KB. - - :param io_chunksize: The max size of each chunk in the io queue. - Currently, this is size used when reading from the downloaded - stream as well. - - :param num_download_attempts: The number of download attempts that - will be tried upon errors with downloading an object in S3. Note + + :param io_chunksize: The max size of each chunk in the io queue. + Currently, this is size used when reading from the downloaded + stream as well. + + :param num_download_attempts: The number of download attempts that + will be tried upon errors with downloading an object in S3. Note that these retries account for errors that occur when streaming - down the data from s3 (i.e. socket errors and read timeouts that + down the data from s3 (i.e. socket errors and read timeouts that occur after receiving an OK response from s3). - Other retryable exceptions such as throttling errors and 5xx errors - are already retried by botocore (this default is 5). The - ``num_download_attempts`` does not take into account the - number of exceptions retried by botocore. - - :param max_in_memory_upload_chunks: The number of chunks that can - be stored in memory at a time for all ongoing upload requests. - This pertains to chunks of data that need to be stored in memory - during an upload if the data is sourced from a file-like object. - The total maximum memory footprint due to a in-memory upload - chunks is roughly equal to: - - max_in_memory_upload_chunks * multipart_chunksize - + max_submission_concurrency * multipart_chunksize - - ``max_submission_concurrency`` has an affect on this value because - for each thread pulling data off of a file-like object, they may - be waiting with a single read chunk to be submitted for upload - because the ``max_in_memory_upload_chunks`` value has been reached - by the threads making the upload request. - - :param max_in_memory_download_chunks: The number of chunks that can - be buffered in memory and **not** in the io queue at a time for all + Other retryable exceptions such as throttling errors and 5xx errors + are already retried by botocore (this default is 5). The + ``num_download_attempts`` does not take into account the + number of exceptions retried by botocore. + + :param max_in_memory_upload_chunks: The number of chunks that can + be stored in memory at a time for all ongoing upload requests. + This pertains to chunks of data that need to be stored in memory + during an upload if the data is sourced from a file-like object. + The total maximum memory footprint due to a in-memory upload + chunks is roughly equal to: + + max_in_memory_upload_chunks * multipart_chunksize + + max_submission_concurrency * multipart_chunksize + + ``max_submission_concurrency`` has an affect on this value because + for each thread pulling data off of a file-like object, they may + be waiting with a single read chunk to be submitted for upload + because the ``max_in_memory_upload_chunks`` value has been reached + by the threads making the upload request. + + :param max_in_memory_download_chunks: The number of chunks that can + be buffered in memory and **not** in the io queue at a time for all ongoing download requests. This pertains specifically to file-like - objects that cannot be seeked. The total maximum memory footprint - due to a in-memory download chunks is roughly equal to: - - max_in_memory_download_chunks * multipart_chunksize - - :param max_bandwidth: The maximum bandwidth that will be consumed - in uploading and downloading file content. The value is in terms of - bytes per second. - """ - self.multipart_threshold = multipart_threshold - self.multipart_chunksize = multipart_chunksize - self.max_request_concurrency = max_request_concurrency - self.max_submission_concurrency = max_submission_concurrency - self.max_request_queue_size = max_request_queue_size - self.max_submission_queue_size = max_submission_queue_size - self.max_io_queue_size = max_io_queue_size - self.io_chunksize = io_chunksize - self.num_download_attempts = num_download_attempts - self.max_in_memory_upload_chunks = max_in_memory_upload_chunks - self.max_in_memory_download_chunks = max_in_memory_download_chunks - self.max_bandwidth = max_bandwidth - self._validate_attrs_are_nonzero() - - def _validate_attrs_are_nonzero(self): + objects that cannot be seeked. The total maximum memory footprint + due to a in-memory download chunks is roughly equal to: + + max_in_memory_download_chunks * multipart_chunksize + + :param max_bandwidth: The maximum bandwidth that will be consumed + in uploading and downloading file content. The value is in terms of + bytes per second. + """ + self.multipart_threshold = multipart_threshold + self.multipart_chunksize = multipart_chunksize + self.max_request_concurrency = max_request_concurrency + self.max_submission_concurrency = max_submission_concurrency + self.max_request_queue_size = max_request_queue_size + self.max_submission_queue_size = max_submission_queue_size + self.max_io_queue_size = max_io_queue_size + self.io_chunksize = io_chunksize + self.num_download_attempts = num_download_attempts + self.max_in_memory_upload_chunks = max_in_memory_upload_chunks + self.max_in_memory_download_chunks = max_in_memory_download_chunks + self.max_bandwidth = max_bandwidth + self._validate_attrs_are_nonzero() + + def _validate_attrs_are_nonzero(self): for attr, attr_val in self.__dict__.items(): - if attr_val is not None and attr_val <= 0: - raise ValueError( - 'Provided parameter %s of value %s must be greater than ' + if attr_val is not None and attr_val <= 0: + raise ValueError( + 'Provided parameter %s of value %s must be greater than ' '0.' % (attr, attr_val) ) - - + + class TransferManager: ALLOWED_DOWNLOAD_ARGS = ALLOWED_DOWNLOAD_ARGS - - ALLOWED_UPLOAD_ARGS = [ - 'ACL', - 'CacheControl', - 'ContentDisposition', - 'ContentEncoding', - 'ContentLanguage', - 'ContentType', + + ALLOWED_UPLOAD_ARGS = [ + 'ACL', + 'CacheControl', + 'ContentDisposition', + 'ContentEncoding', + 'ContentLanguage', + 'ContentType', 'ExpectedBucketOwner', - 'Expires', - 'GrantFullControl', - 'GrantRead', - 'GrantReadACP', - 'GrantWriteACP', - 'Metadata', - 'RequestPayer', - 'ServerSideEncryption', - 'StorageClass', - 'SSECustomerAlgorithm', - 'SSECustomerKey', - 'SSECustomerKeyMD5', - 'SSEKMSKeyId', + 'Expires', + 'GrantFullControl', + 'GrantRead', + 'GrantReadACP', + 'GrantWriteACP', + 'Metadata', + 'RequestPayer', + 'ServerSideEncryption', + 'StorageClass', + 'SSECustomerAlgorithm', + 'SSECustomerKey', + 'SSECustomerKeyMD5', + 'SSEKMSKeyId', 'SSEKMSEncryptionContext', 'Tagging', 'WebsiteRedirectLocation', - ] - - ALLOWED_COPY_ARGS = ALLOWED_UPLOAD_ARGS + [ - 'CopySourceIfMatch', - 'CopySourceIfModifiedSince', - 'CopySourceIfNoneMatch', - 'CopySourceIfUnmodifiedSince', - 'CopySourceSSECustomerAlgorithm', - 'CopySourceSSECustomerKey', - 'CopySourceSSECustomerKeyMD5', + ] + + ALLOWED_COPY_ARGS = ALLOWED_UPLOAD_ARGS + [ + 'CopySourceIfMatch', + 'CopySourceIfModifiedSince', + 'CopySourceIfNoneMatch', + 'CopySourceIfUnmodifiedSince', + 'CopySourceSSECustomerAlgorithm', + 'CopySourceSSECustomerKey', + 'CopySourceSSECustomerKeyMD5', 'MetadataDirective', 'TaggingDirective', - ] - - ALLOWED_DELETE_ARGS = [ - 'MFA', - 'VersionId', - 'RequestPayer', + ] + + ALLOWED_DELETE_ARGS = [ + 'MFA', + 'VersionId', + 'RequestPayer', 'ExpectedBucketOwner', - ] - + ] + VALIDATE_SUPPORTED_BUCKET_VALUES = True _UNSUPPORTED_BUCKET_PATTERNS = { @@ -210,72 +210,72 @@ class TransferManager: ), } - def __init__(self, client, config=None, osutil=None, executor_cls=None): - """A transfer manager interface for Amazon S3 - - :param client: Client to be used by the manager - :param config: TransferConfig to associate specific configurations - :param osutil: OSUtils object to use for os-related behavior when - using with transfer manager. - - :type executor_cls: s3transfer.futures.BaseExecutor - :param executor_cls: The class of executor to use with the transfer - manager. By default, concurrent.futures.ThreadPoolExecutor is used. - """ - self._client = client - self._config = config - if config is None: - self._config = TransferConfig() - self._osutil = osutil - if osutil is None: - self._osutil = OSUtils() - self._coordinator_controller = TransferCoordinatorController() - # A counter to create unique id's for each transfer submitted. - self._id_counter = 0 - - # The executor responsible for making S3 API transfer requests - self._request_executor = BoundedExecutor( - max_size=self._config.max_request_queue_size, - max_num_threads=self._config.max_request_concurrency, - tag_semaphores={ - IN_MEMORY_UPLOAD_TAG: TaskSemaphore( + def __init__(self, client, config=None, osutil=None, executor_cls=None): + """A transfer manager interface for Amazon S3 + + :param client: Client to be used by the manager + :param config: TransferConfig to associate specific configurations + :param osutil: OSUtils object to use for os-related behavior when + using with transfer manager. + + :type executor_cls: s3transfer.futures.BaseExecutor + :param executor_cls: The class of executor to use with the transfer + manager. By default, concurrent.futures.ThreadPoolExecutor is used. + """ + self._client = client + self._config = config + if config is None: + self._config = TransferConfig() + self._osutil = osutil + if osutil is None: + self._osutil = OSUtils() + self._coordinator_controller = TransferCoordinatorController() + # A counter to create unique id's for each transfer submitted. + self._id_counter = 0 + + # The executor responsible for making S3 API transfer requests + self._request_executor = BoundedExecutor( + max_size=self._config.max_request_queue_size, + max_num_threads=self._config.max_request_concurrency, + tag_semaphores={ + IN_MEMORY_UPLOAD_TAG: TaskSemaphore( self._config.max_in_memory_upload_chunks ), - IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore( + IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore( self._config.max_in_memory_download_chunks ), - }, + }, executor_cls=executor_cls, - ) - - # The executor responsible for submitting the necessary tasks to - # perform the desired transfer - self._submission_executor = BoundedExecutor( - max_size=self._config.max_submission_queue_size, - max_num_threads=self._config.max_submission_concurrency, + ) + + # The executor responsible for submitting the necessary tasks to + # perform the desired transfer + self._submission_executor = BoundedExecutor( + max_size=self._config.max_submission_queue_size, + max_num_threads=self._config.max_submission_concurrency, executor_cls=executor_cls, - ) - - # There is one thread available for writing to disk. It will handle - # downloads for all files. - self._io_executor = BoundedExecutor( - max_size=self._config.max_io_queue_size, - max_num_threads=1, + ) + + # There is one thread available for writing to disk. It will handle + # downloads for all files. + self._io_executor = BoundedExecutor( + max_size=self._config.max_io_queue_size, + max_num_threads=1, executor_cls=executor_cls, - ) - - # The component responsible for limiting bandwidth usage if it - # is configured. - self._bandwidth_limiter = None - if self._config.max_bandwidth is not None: - logger.debug( + ) + + # The component responsible for limiting bandwidth usage if it + # is configured. + self._bandwidth_limiter = None + if self._config.max_bandwidth is not None: + logger.debug( 'Setting max_bandwidth to %s', self._config.max_bandwidth ) - leaky_bucket = LeakyBucket(self._config.max_bandwidth) - self._bandwidth_limiter = BandwidthLimiter(leaky_bucket) - - self._register_handlers() - + leaky_bucket = LeakyBucket(self._config.max_bandwidth) + self._bandwidth_limiter = BandwidthLimiter(leaky_bucket) + + self._register_handlers() + @property def client(self): return self._client @@ -284,100 +284,100 @@ class TransferManager: def config(self): return self._config - def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None): - """Uploads a file to S3 - - :type fileobj: str or seekable file-like object - :param fileobj: The name of a file to upload or a seekable file-like - object to upload. It is recommended to use a filename because - file-like objects may result in higher memory usage. - - :type bucket: str - :param bucket: The name of the bucket to upload to - - :type key: str - :param key: The name of the key to upload to - - :type extra_args: dict - :param extra_args: Extra arguments that may be passed to the - client operation - - :type subscribers: list(s3transfer.subscribers.BaseSubscriber) - :param subscribers: The list of subscribers to be invoked in the - order provided based on the event emit during the process of - the transfer request. - - :rtype: s3transfer.futures.TransferFuture - :returns: Transfer future representing the upload - """ - if extra_args is None: - extra_args = {} - if subscribers is None: - subscribers = [] - self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS) + def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None): + """Uploads a file to S3 + + :type fileobj: str or seekable file-like object + :param fileobj: The name of a file to upload or a seekable file-like + object to upload. It is recommended to use a filename because + file-like objects may result in higher memory usage. + + :type bucket: str + :param bucket: The name of the bucket to upload to + + :type key: str + :param key: The name of the key to upload to + + :type extra_args: dict + :param extra_args: Extra arguments that may be passed to the + client operation + + :type subscribers: list(s3transfer.subscribers.BaseSubscriber) + :param subscribers: The list of subscribers to be invoked in the + order provided based on the event emit during the process of + the transfer request. + + :rtype: s3transfer.futures.TransferFuture + :returns: Transfer future representing the upload + """ + if extra_args is None: + extra_args = {} + if subscribers is None: + subscribers = [] + self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS) self._validate_if_bucket_supported(bucket) - call_args = CallArgs( + call_args = CallArgs( fileobj=fileobj, bucket=bucket, key=key, extra_args=extra_args, subscribers=subscribers, - ) - extra_main_kwargs = {} - if self._bandwidth_limiter: - extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter - return self._submit_transfer( + ) + extra_main_kwargs = {} + if self._bandwidth_limiter: + extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter + return self._submit_transfer( call_args, UploadSubmissionTask, extra_main_kwargs ) - + def download( self, bucket, key, fileobj, extra_args=None, subscribers=None ): - """Downloads a file from S3 - - :type bucket: str - :param bucket: The name of the bucket to download from - - :type key: str - :param key: The name of the key to download from - - :type fileobj: str or seekable file-like object - :param fileobj: The name of a file to download or a seekable file-like - object to download. It is recommended to use a filename because - file-like objects may result in higher memory usage. - - :type extra_args: dict - :param extra_args: Extra arguments that may be passed to the - client operation - - :type subscribers: list(s3transfer.subscribers.BaseSubscriber) - :param subscribers: The list of subscribers to be invoked in the - order provided based on the event emit during the process of - the transfer request. - - :rtype: s3transfer.futures.TransferFuture - :returns: Transfer future representing the download - """ - if extra_args is None: - extra_args = {} - if subscribers is None: - subscribers = [] - self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS) + """Downloads a file from S3 + + :type bucket: str + :param bucket: The name of the bucket to download from + + :type key: str + :param key: The name of the key to download from + + :type fileobj: str or seekable file-like object + :param fileobj: The name of a file to download or a seekable file-like + object to download. It is recommended to use a filename because + file-like objects may result in higher memory usage. + + :type extra_args: dict + :param extra_args: Extra arguments that may be passed to the + client operation + + :type subscribers: list(s3transfer.subscribers.BaseSubscriber) + :param subscribers: The list of subscribers to be invoked in the + order provided based on the event emit during the process of + the transfer request. + + :rtype: s3transfer.futures.TransferFuture + :returns: Transfer future representing the download + """ + if extra_args is None: + extra_args = {} + if subscribers is None: + subscribers = [] + self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS) self._validate_if_bucket_supported(bucket) - call_args = CallArgs( + call_args = CallArgs( bucket=bucket, key=key, fileobj=fileobj, extra_args=extra_args, subscribers=subscribers, - ) - extra_main_kwargs = {'io_executor': self._io_executor} - if self._bandwidth_limiter: - extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter - return self._submit_transfer( + ) + extra_main_kwargs = {'io_executor': self._io_executor} + if self._bandwidth_limiter: + extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter + return self._submit_transfer( call_args, DownloadSubmissionTask, extra_main_kwargs ) - + def copy( self, copy_source, @@ -387,96 +387,96 @@ class TransferManager: subscribers=None, source_client=None, ): - """Copies a file in S3 - - :type copy_source: dict - :param copy_source: The name of the source bucket, key name of the - source object, and optional version ID of the source object. The - dictionary format is: - ``{'Bucket': 'bucket', 'Key': 'key', 'VersionId': 'id'}``. Note - that the ``VersionId`` key is optional and may be omitted. - - :type bucket: str - :param bucket: The name of the bucket to copy to - - :type key: str - :param key: The name of the key to copy to - - :type extra_args: dict - :param extra_args: Extra arguments that may be passed to the - client operation - - :type subscribers: a list of subscribers - :param subscribers: The list of subscribers to be invoked in the - order provided based on the event emit during the process of - the transfer request. - - :type source_client: botocore or boto3 Client - :param source_client: The client to be used for operation that - may happen at the source object. For example, this client is - used for the head_object that determines the size of the copy. - If no client is provided, the transfer manager's client is used - as the client for the source object. - - :rtype: s3transfer.futures.TransferFuture - :returns: Transfer future representing the copy - """ - if extra_args is None: - extra_args = {} - if subscribers is None: - subscribers = [] - if source_client is None: - source_client = self._client - self._validate_all_known_args(extra_args, self.ALLOWED_COPY_ARGS) + """Copies a file in S3 + + :type copy_source: dict + :param copy_source: The name of the source bucket, key name of the + source object, and optional version ID of the source object. The + dictionary format is: + ``{'Bucket': 'bucket', 'Key': 'key', 'VersionId': 'id'}``. Note + that the ``VersionId`` key is optional and may be omitted. + + :type bucket: str + :param bucket: The name of the bucket to copy to + + :type key: str + :param key: The name of the key to copy to + + :type extra_args: dict + :param extra_args: Extra arguments that may be passed to the + client operation + + :type subscribers: a list of subscribers + :param subscribers: The list of subscribers to be invoked in the + order provided based on the event emit during the process of + the transfer request. + + :type source_client: botocore or boto3 Client + :param source_client: The client to be used for operation that + may happen at the source object. For example, this client is + used for the head_object that determines the size of the copy. + If no client is provided, the transfer manager's client is used + as the client for the source object. + + :rtype: s3transfer.futures.TransferFuture + :returns: Transfer future representing the copy + """ + if extra_args is None: + extra_args = {} + if subscribers is None: + subscribers = [] + if source_client is None: + source_client = self._client + self._validate_all_known_args(extra_args, self.ALLOWED_COPY_ARGS) if isinstance(copy_source, dict): self._validate_if_bucket_supported(copy_source.get('Bucket')) self._validate_if_bucket_supported(bucket) - call_args = CallArgs( + call_args = CallArgs( copy_source=copy_source, bucket=bucket, key=key, extra_args=extra_args, subscribers=subscribers, source_client=source_client, - ) - return self._submit_transfer(call_args, CopySubmissionTask) - - def delete(self, bucket, key, extra_args=None, subscribers=None): - """Delete an S3 object. - - :type bucket: str - :param bucket: The name of the bucket. - - :type key: str - :param key: The name of the S3 object to delete. - - :type extra_args: dict - :param extra_args: Extra arguments that may be passed to the - DeleteObject call. - - :type subscribers: list - :param subscribers: A list of subscribers to be invoked during the - process of the transfer request. Note that the ``on_progress`` - callback is not invoked during object deletion. - - :rtype: s3transfer.futures.TransferFuture - :return: Transfer future representing the deletion. - - """ - if extra_args is None: - extra_args = {} - if subscribers is None: - subscribers = [] - self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS) + ) + return self._submit_transfer(call_args, CopySubmissionTask) + + def delete(self, bucket, key, extra_args=None, subscribers=None): + """Delete an S3 object. + + :type bucket: str + :param bucket: The name of the bucket. + + :type key: str + :param key: The name of the S3 object to delete. + + :type extra_args: dict + :param extra_args: Extra arguments that may be passed to the + DeleteObject call. + + :type subscribers: list + :param subscribers: A list of subscribers to be invoked during the + process of the transfer request. Note that the ``on_progress`` + callback is not invoked during object deletion. + + :rtype: s3transfer.futures.TransferFuture + :return: Transfer future representing the deletion. + + """ + if extra_args is None: + extra_args = {} + if subscribers is None: + subscribers = [] + self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS) self._validate_if_bucket_supported(bucket) - call_args = CallArgs( + call_args = CallArgs( bucket=bucket, key=key, extra_args=extra_args, subscribers=subscribers, - ) - return self._submit_transfer(call_args, DeleteSubmissionTask) - + ) + return self._submit_transfer(call_args, DeleteSubmissionTask) + def _validate_if_bucket_supported(self, bucket): # s3 high level operations don't support some resources # (eg. S3 Object Lambda) only direct API calls are available @@ -490,238 +490,238 @@ class TransferManager: 'resource. Use direct client calls instead.' % resource ) - def _validate_all_known_args(self, actual, allowed): - for kwarg in actual: - if kwarg not in allowed: - raise ValueError( - "Invalid extra_args key '%s', " + def _validate_all_known_args(self, actual, allowed): + for kwarg in actual: + if kwarg not in allowed: + raise ValueError( + "Invalid extra_args key '%s', " "must be one of: %s" % (kwarg, ', '.join(allowed)) ) - + def _submit_transfer( self, call_args, submission_task_cls, extra_main_kwargs=None ): - if not extra_main_kwargs: - extra_main_kwargs = {} - - # Create a TransferFuture to return back to the user - transfer_future, components = self._get_future_with_components( + if not extra_main_kwargs: + extra_main_kwargs = {} + + # Create a TransferFuture to return back to the user + transfer_future, components = self._get_future_with_components( call_args ) - - # Add any provided done callbacks to the created transfer future - # to be invoked on the transfer future being complete. - for callback in get_callbacks(transfer_future, 'done'): - components['coordinator'].add_done_callback(callback) - - # Get the main kwargs needed to instantiate the submission task - main_kwargs = self._get_submission_task_main_kwargs( + + # Add any provided done callbacks to the created transfer future + # to be invoked on the transfer future being complete. + for callback in get_callbacks(transfer_future, 'done'): + components['coordinator'].add_done_callback(callback) + + # Get the main kwargs needed to instantiate the submission task + main_kwargs = self._get_submission_task_main_kwargs( transfer_future, extra_main_kwargs ) - - # Submit a SubmissionTask that will submit all of the necessary - # tasks needed to complete the S3 transfer. - self._submission_executor.submit( - submission_task_cls( - transfer_coordinator=components['coordinator'], + + # Submit a SubmissionTask that will submit all of the necessary + # tasks needed to complete the S3 transfer. + self._submission_executor.submit( + submission_task_cls( + transfer_coordinator=components['coordinator'], main_kwargs=main_kwargs, - ) - ) - - # Increment the unique id counter for future transfer requests - self._id_counter += 1 - - return transfer_future - - def _get_future_with_components(self, call_args): - transfer_id = self._id_counter - # Creates a new transfer future along with its components - transfer_coordinator = TransferCoordinator(transfer_id=transfer_id) - # Track the transfer coordinator for transfers to manage. - self._coordinator_controller.add_transfer_coordinator( + ) + ) + + # Increment the unique id counter for future transfer requests + self._id_counter += 1 + + return transfer_future + + def _get_future_with_components(self, call_args): + transfer_id = self._id_counter + # Creates a new transfer future along with its components + transfer_coordinator = TransferCoordinator(transfer_id=transfer_id) + # Track the transfer coordinator for transfers to manage. + self._coordinator_controller.add_transfer_coordinator( transfer_coordinator ) - # Also make sure that the transfer coordinator is removed once - # the transfer completes so it does not stick around in memory. - transfer_coordinator.add_done_callback( - self._coordinator_controller.remove_transfer_coordinator, + # Also make sure that the transfer coordinator is removed once + # the transfer completes so it does not stick around in memory. + transfer_coordinator.add_done_callback( + self._coordinator_controller.remove_transfer_coordinator, transfer_coordinator, ) - components = { - 'meta': TransferMeta(call_args, transfer_id=transfer_id), + components = { + 'meta': TransferMeta(call_args, transfer_id=transfer_id), 'coordinator': transfer_coordinator, - } - transfer_future = TransferFuture(**components) - return transfer_future, components - - def _get_submission_task_main_kwargs( + } + transfer_future = TransferFuture(**components) + return transfer_future, components + + def _get_submission_task_main_kwargs( self, transfer_future, extra_main_kwargs ): - main_kwargs = { - 'client': self._client, - 'config': self._config, - 'osutil': self._osutil, - 'request_executor': self._request_executor, + main_kwargs = { + 'client': self._client, + 'config': self._config, + 'osutil': self._osutil, + 'request_executor': self._request_executor, 'transfer_future': transfer_future, - } - main_kwargs.update(extra_main_kwargs) - return main_kwargs - - def _register_handlers(self): - # Register handlers to enable/disable callbacks on uploads. - event_name = 'request-created.s3' - self._client.meta.events.register_first( + } + main_kwargs.update(extra_main_kwargs) + return main_kwargs + + def _register_handlers(self): + # Register handlers to enable/disable callbacks on uploads. + event_name = 'request-created.s3' + self._client.meta.events.register_first( event_name, signal_not_transferring, unique_id='s3upload-not-transferring', ) - self._client.meta.events.register_last( + self._client.meta.events.register_last( event_name, signal_transferring, unique_id='s3upload-transferring' ) - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, *args): - cancel = False - cancel_msg = '' - cancel_exc_type = FatalError - # If a exception was raised in the context handler, signal to cancel - # all of the inprogress futures in the shutdown. - if exc_type: - cancel = True + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, *args): + cancel = False + cancel_msg = '' + cancel_exc_type = FatalError + # If a exception was raised in the context handler, signal to cancel + # all of the inprogress futures in the shutdown. + if exc_type: + cancel = True cancel_msg = str(exc_value) - if not cancel_msg: - cancel_msg = repr(exc_value) - # If it was a KeyboardInterrupt, the cancellation was initiated - # by the user. - if isinstance(exc_value, KeyboardInterrupt): - cancel_exc_type = CancelledError - self._shutdown(cancel, cancel_msg, cancel_exc_type) - - def shutdown(self, cancel=False, cancel_msg=''): - """Shutdown the TransferManager - - It will wait till all transfers complete before it completely shuts - down. - - :type cancel: boolean - :param cancel: If True, calls TransferFuture.cancel() for - all in-progress in transfers. This is useful if you want the - shutdown to happen quicker. - - :type cancel_msg: str - :param cancel_msg: The message to specify if canceling all in-progress - transfers. - """ - self._shutdown(cancel, cancel, cancel_msg) - - def _shutdown(self, cancel, cancel_msg, exc_type=CancelledError): - if cancel: - # Cancel all in-flight transfers if requested, before waiting - # for them to complete. - self._coordinator_controller.cancel(cancel_msg, exc_type) - try: - # Wait until there are no more in-progress transfers. This is - # wrapped in a try statement because this can be interrupted - # with a KeyboardInterrupt that needs to be caught. - self._coordinator_controller.wait() - except KeyboardInterrupt: - # If not errors were raised in the try block, the cancel should - # have no coordinators it needs to run cancel on. If there was - # an error raised in the try statement we want to cancel all of - # the inflight transfers before shutting down to speed that - # process up. - self._coordinator_controller.cancel('KeyboardInterrupt()') - raise - finally: - # Shutdown all of the executors. - self._submission_executor.shutdown() - self._request_executor.shutdown() - self._io_executor.shutdown() - - + if not cancel_msg: + cancel_msg = repr(exc_value) + # If it was a KeyboardInterrupt, the cancellation was initiated + # by the user. + if isinstance(exc_value, KeyboardInterrupt): + cancel_exc_type = CancelledError + self._shutdown(cancel, cancel_msg, cancel_exc_type) + + def shutdown(self, cancel=False, cancel_msg=''): + """Shutdown the TransferManager + + It will wait till all transfers complete before it completely shuts + down. + + :type cancel: boolean + :param cancel: If True, calls TransferFuture.cancel() for + all in-progress in transfers. This is useful if you want the + shutdown to happen quicker. + + :type cancel_msg: str + :param cancel_msg: The message to specify if canceling all in-progress + transfers. + """ + self._shutdown(cancel, cancel, cancel_msg) + + def _shutdown(self, cancel, cancel_msg, exc_type=CancelledError): + if cancel: + # Cancel all in-flight transfers if requested, before waiting + # for them to complete. + self._coordinator_controller.cancel(cancel_msg, exc_type) + try: + # Wait until there are no more in-progress transfers. This is + # wrapped in a try statement because this can be interrupted + # with a KeyboardInterrupt that needs to be caught. + self._coordinator_controller.wait() + except KeyboardInterrupt: + # If not errors were raised in the try block, the cancel should + # have no coordinators it needs to run cancel on. If there was + # an error raised in the try statement we want to cancel all of + # the inflight transfers before shutting down to speed that + # process up. + self._coordinator_controller.cancel('KeyboardInterrupt()') + raise + finally: + # Shutdown all of the executors. + self._submission_executor.shutdown() + self._request_executor.shutdown() + self._io_executor.shutdown() + + class TransferCoordinatorController: - def __init__(self): - """Abstraction to control all transfer coordinators - - This abstraction allows the manager to wait for inprogress transfers - to complete and cancel all inprogress transfers. - """ - self._lock = threading.Lock() - self._tracked_transfer_coordinators = set() - - @property - def tracked_transfer_coordinators(self): - """The set of transfer coordinators being tracked""" - with self._lock: - # We return a copy because the set is mutable and if you were to - # iterate over the set, it may be changing in length due to - # additions and removals of transfer coordinators. - return copy.copy(self._tracked_transfer_coordinators) - - def add_transfer_coordinator(self, transfer_coordinator): - """Adds a transfer coordinator of a transfer to be canceled if needed - - :type transfer_coordinator: s3transfer.futures.TransferCoordinator - :param transfer_coordinator: The transfer coordinator for the - particular transfer - """ - with self._lock: - self._tracked_transfer_coordinators.add(transfer_coordinator) - - def remove_transfer_coordinator(self, transfer_coordinator): + def __init__(self): + """Abstraction to control all transfer coordinators + + This abstraction allows the manager to wait for inprogress transfers + to complete and cancel all inprogress transfers. + """ + self._lock = threading.Lock() + self._tracked_transfer_coordinators = set() + + @property + def tracked_transfer_coordinators(self): + """The set of transfer coordinators being tracked""" + with self._lock: + # We return a copy because the set is mutable and if you were to + # iterate over the set, it may be changing in length due to + # additions and removals of transfer coordinators. + return copy.copy(self._tracked_transfer_coordinators) + + def add_transfer_coordinator(self, transfer_coordinator): + """Adds a transfer coordinator of a transfer to be canceled if needed + + :type transfer_coordinator: s3transfer.futures.TransferCoordinator + :param transfer_coordinator: The transfer coordinator for the + particular transfer + """ + with self._lock: + self._tracked_transfer_coordinators.add(transfer_coordinator) + + def remove_transfer_coordinator(self, transfer_coordinator): """Remove a transfer coordinator from cancellation consideration - - Typically, this method is invoked by the transfer coordinator itself - to remove its self when it completes its transfer. - - :type transfer_coordinator: s3transfer.futures.TransferCoordinator - :param transfer_coordinator: The transfer coordinator for the - particular transfer - """ - with self._lock: - self._tracked_transfer_coordinators.remove(transfer_coordinator) - - def cancel(self, msg='', exc_type=CancelledError): - """Cancels all inprogress transfers - - This cancels the inprogress transfers by calling cancel() on all - tracked transfer coordinators. - - :param msg: The message to pass on to each transfer coordinator that - gets cancelled. - - :param exc_type: The type of exception to set for the cancellation - """ - for transfer_coordinator in self.tracked_transfer_coordinators: - transfer_coordinator.cancel(msg, exc_type) - - def wait(self): - """Wait until there are no more inprogress transfers - + + Typically, this method is invoked by the transfer coordinator itself + to remove its self when it completes its transfer. + + :type transfer_coordinator: s3transfer.futures.TransferCoordinator + :param transfer_coordinator: The transfer coordinator for the + particular transfer + """ + with self._lock: + self._tracked_transfer_coordinators.remove(transfer_coordinator) + + def cancel(self, msg='', exc_type=CancelledError): + """Cancels all inprogress transfers + + This cancels the inprogress transfers by calling cancel() on all + tracked transfer coordinators. + + :param msg: The message to pass on to each transfer coordinator that + gets cancelled. + + :param exc_type: The type of exception to set for the cancellation + """ + for transfer_coordinator in self.tracked_transfer_coordinators: + transfer_coordinator.cancel(msg, exc_type) + + def wait(self): + """Wait until there are no more inprogress transfers + This will not stop when failures are encountered and not propagate any - of these errors from failed transfers, but it can be interrupted with - a KeyboardInterrupt. - """ - try: - transfer_coordinator = None - for transfer_coordinator in self.tracked_transfer_coordinators: - transfer_coordinator.result() - except KeyboardInterrupt: - logger.debug('Received KeyboardInterrupt in wait()') - # If Keyboard interrupt is raised while waiting for - # the result, then exit out of the wait and raise the - # exception - if transfer_coordinator: - logger.debug( - 'On KeyboardInterrupt was waiting for %s', + of these errors from failed transfers, but it can be interrupted with + a KeyboardInterrupt. + """ + try: + transfer_coordinator = None + for transfer_coordinator in self.tracked_transfer_coordinators: + transfer_coordinator.result() + except KeyboardInterrupt: + logger.debug('Received KeyboardInterrupt in wait()') + # If Keyboard interrupt is raised while waiting for + # the result, then exit out of the wait and raise the + # exception + if transfer_coordinator: + logger.debug( + 'On KeyboardInterrupt was waiting for %s', transfer_coordinator, ) - raise - except Exception: - # A general exception could have been thrown because - # of result(). We just want to ignore this and continue - # because we at least know that the transfer coordinator - # has completed. - pass + raise + except Exception: + # A general exception could have been thrown because + # of result(). We just want to ignore this and continue + # because we at least know that the transfer coordinator + # has completed. + pass diff --git a/contrib/python/s3transfer/py3/s3transfer/subscribers.py b/contrib/python/s3transfer/py3/s3transfer/subscribers.py index f7697aef3b..cf0dbaa0d7 100644 --- a/contrib/python/s3transfer/py3/s3transfer/subscribers.py +++ b/contrib/python/s3transfer/py3/s3transfer/subscribers.py @@ -1,92 +1,92 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -from s3transfer.compat import accepts_kwargs -from s3transfer.exceptions import InvalidSubscriberMethodError - - +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from s3transfer.compat import accepts_kwargs +from s3transfer.exceptions import InvalidSubscriberMethodError + + class BaseSubscriber: - """The base subscriber class - - It is recommended that all subscriber implementations subclass and then - override the subscription methods (i.e. on_{subsribe_type}() methods). - """ - + """The base subscriber class + + It is recommended that all subscriber implementations subclass and then + override the subscription methods (i.e. on_{subsribe_type}() methods). + """ + VALID_SUBSCRIBER_TYPES = ['queued', 'progress', 'done'] - def __new__(cls, *args, **kwargs): - cls._validate_subscriber_methods() + def __new__(cls, *args, **kwargs): + cls._validate_subscriber_methods() return super().__new__(cls) - - @classmethod - def _validate_subscriber_methods(cls): - for subscriber_type in cls.VALID_SUBSCRIBER_TYPES: - subscriber_method = getattr(cls, 'on_' + subscriber_type) + + @classmethod + def _validate_subscriber_methods(cls): + for subscriber_type in cls.VALID_SUBSCRIBER_TYPES: + subscriber_method = getattr(cls, 'on_' + subscriber_type) if not callable(subscriber_method): - raise InvalidSubscriberMethodError( + raise InvalidSubscriberMethodError( 'Subscriber method %s must be callable.' % subscriber_method ) - - if not accepts_kwargs(subscriber_method): - raise InvalidSubscriberMethodError( - 'Subscriber method %s must accept keyword ' + + if not accepts_kwargs(subscriber_method): + raise InvalidSubscriberMethodError( + 'Subscriber method %s must accept keyword ' 'arguments (**kwargs)' % subscriber_method ) - - def on_queued(self, future, **kwargs): - """Callback to be invoked when transfer request gets queued - - This callback can be useful for: - - * Keeping track of how many transfers have been requested - * Providing the expected transfer size through - future.meta.provide_transfer_size() so a HeadObject would not - need to be made for copies and downloads. - - :type future: s3transfer.futures.TransferFuture - :param future: The TransferFuture representing the requested transfer. - """ - pass - - def on_progress(self, future, bytes_transferred, **kwargs): - """Callback to be invoked when progress is made on transfer - - This callback can be useful for: - - * Recording and displaying progress - - :type future: s3transfer.futures.TransferFuture - :param future: The TransferFuture representing the requested transfer. - - :type bytes_transferred: int - :param bytes_transferred: The number of bytes transferred for that - invocation of the callback. Note that a negative amount can be - provided, which usually indicates that an in-progress request - needed to be retried and thus progress was rewound. - """ - pass - - def on_done(self, future, **kwargs): - """Callback to be invoked once a transfer is done - - This callback can be useful for: - - * Recording and displaying whether the transfer succeeded or - failed using future.result() - * Running some task after the transfer completed like changing - the last modified time of a downloaded file. - - :type future: s3transfer.futures.TransferFuture - :param future: The TransferFuture representing the requested transfer. - """ - pass + + def on_queued(self, future, **kwargs): + """Callback to be invoked when transfer request gets queued + + This callback can be useful for: + + * Keeping track of how many transfers have been requested + * Providing the expected transfer size through + future.meta.provide_transfer_size() so a HeadObject would not + need to be made for copies and downloads. + + :type future: s3transfer.futures.TransferFuture + :param future: The TransferFuture representing the requested transfer. + """ + pass + + def on_progress(self, future, bytes_transferred, **kwargs): + """Callback to be invoked when progress is made on transfer + + This callback can be useful for: + + * Recording and displaying progress + + :type future: s3transfer.futures.TransferFuture + :param future: The TransferFuture representing the requested transfer. + + :type bytes_transferred: int + :param bytes_transferred: The number of bytes transferred for that + invocation of the callback. Note that a negative amount can be + provided, which usually indicates that an in-progress request + needed to be retried and thus progress was rewound. + """ + pass + + def on_done(self, future, **kwargs): + """Callback to be invoked once a transfer is done + + This callback can be useful for: + + * Recording and displaying whether the transfer succeeded or + failed using future.result() + * Running some task after the transfer completed like changing + the last modified time of a downloaded file. + + :type future: s3transfer.futures.TransferFuture + :param future: The TransferFuture representing the requested transfer. + """ + pass diff --git a/contrib/python/s3transfer/py3/s3transfer/tasks.py b/contrib/python/s3transfer/py3/s3transfer/tasks.py index 2d2402862c..1bad981264 100644 --- a/contrib/python/s3transfer/py3/s3transfer/tasks.py +++ b/contrib/python/s3transfer/py3/s3transfer/tasks.py @@ -1,29 +1,29 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import copy -import logging - -from s3transfer.utils import get_callbacks - -logger = logging.getLogger(__name__) - - +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import copy +import logging + +from s3transfer.utils import get_callbacks + +logger = logging.getLogger(__name__) + + class Task: - """A task associated to a TransferFuture request - - This is a base class for other classes to subclass from. All subclassed - classes must implement the main() method. - """ + """A task associated to a TransferFuture request + + This is a base class for other classes to subclass from. All subclassed + classes must implement the main() method. + """ def __init__( self, @@ -33,58 +33,58 @@ class Task: done_callbacks=None, is_final=False, ): - """ - :type transfer_coordinator: s3transfer.futures.TransferCoordinator - :param transfer_coordinator: The context associated to the - TransferFuture for which this Task is associated with. - - :type main_kwargs: dict - :param main_kwargs: The keyword args that can be immediately supplied - to the _main() method of the task - - :type pending_main_kwargs: dict - :param pending_main_kwargs: The keyword args that are depended upon - by the result from a dependent future(s). The result returned by - the future(s) will be used as the value for the keyword argument - when _main() is called. The values for each key can be: - * a single future - Once completed, its value will be the - result of that single future - * a list of futures - Once all of the futures complete, the - value used will be a list of each completed future result - value in order of when they were originally supplied. - - :type done_callbacks: list of callbacks - :param done_callbacks: A list of callbacks to call once the task is - done completing. Each callback will be called with no arguments - and will be called no matter if the task succeeds or an exception - is raised. - - :type is_final: boolean - :param is_final: True, to indicate that this task is the final task - for the TransferFuture request. By setting this value to True, it - will set the result of the entire TransferFuture to the result - returned by this task's main() method. - """ - self._transfer_coordinator = transfer_coordinator - - self._main_kwargs = main_kwargs - if self._main_kwargs is None: - self._main_kwargs = {} - - self._pending_main_kwargs = pending_main_kwargs - if pending_main_kwargs is None: - self._pending_main_kwargs = {} - - self._done_callbacks = done_callbacks - if self._done_callbacks is None: - self._done_callbacks = [] - - self._is_final = is_final - - def __repr__(self): - # These are the general main_kwarg parameters that we want to - # display in the repr. - params_to_display = [ + """ + :type transfer_coordinator: s3transfer.futures.TransferCoordinator + :param transfer_coordinator: The context associated to the + TransferFuture for which this Task is associated with. + + :type main_kwargs: dict + :param main_kwargs: The keyword args that can be immediately supplied + to the _main() method of the task + + :type pending_main_kwargs: dict + :param pending_main_kwargs: The keyword args that are depended upon + by the result from a dependent future(s). The result returned by + the future(s) will be used as the value for the keyword argument + when _main() is called. The values for each key can be: + * a single future - Once completed, its value will be the + result of that single future + * a list of futures - Once all of the futures complete, the + value used will be a list of each completed future result + value in order of when they were originally supplied. + + :type done_callbacks: list of callbacks + :param done_callbacks: A list of callbacks to call once the task is + done completing. Each callback will be called with no arguments + and will be called no matter if the task succeeds or an exception + is raised. + + :type is_final: boolean + :param is_final: True, to indicate that this task is the final task + for the TransferFuture request. By setting this value to True, it + will set the result of the entire TransferFuture to the result + returned by this task's main() method. + """ + self._transfer_coordinator = transfer_coordinator + + self._main_kwargs = main_kwargs + if self._main_kwargs is None: + self._main_kwargs = {} + + self._pending_main_kwargs = pending_main_kwargs + if pending_main_kwargs is None: + self._pending_main_kwargs = {} + + self._done_callbacks = done_callbacks + if self._done_callbacks is None: + self._done_callbacks = [] + + self._is_final = is_final + + def __repr__(self): + # These are the general main_kwarg parameters that we want to + # display in the repr. + params_to_display = [ 'bucket', 'key', 'part_number', @@ -92,8 +92,8 @@ class Task: 'transfer_future', 'offset', 'extra_args', - ] - main_kwargs_to_display = self._get_kwargs_with_params_to_include( + ] + main_kwargs_to_display = self._get_kwargs_with_params_to_include( self._main_kwargs, params_to_display ) return '{}(transfer_id={}, {})'.format( @@ -101,287 +101,287 @@ class Task: self._transfer_coordinator.transfer_id, main_kwargs_to_display, ) - - @property - def transfer_id(self): - """The id for the transfer request that the task belongs to""" - return self._transfer_coordinator.transfer_id - - def _get_kwargs_with_params_to_include(self, kwargs, include): - filtered_kwargs = {} - for param in include: - if param in kwargs: - filtered_kwargs[param] = kwargs[param] - return filtered_kwargs - - def _get_kwargs_with_params_to_exclude(self, kwargs, exclude): - filtered_kwargs = {} - for param, value in kwargs.items(): - if param in exclude: - continue - filtered_kwargs[param] = value - return filtered_kwargs - - def __call__(self): - """The callable to use when submitting a Task to an executor""" - try: - # Wait for all of futures this task depends on. - self._wait_on_dependent_futures() - # Gather up all of the main keyword arguments for main(). - # This includes the immediately provided main_kwargs and - # the values for pending_main_kwargs that source from the return + + @property + def transfer_id(self): + """The id for the transfer request that the task belongs to""" + return self._transfer_coordinator.transfer_id + + def _get_kwargs_with_params_to_include(self, kwargs, include): + filtered_kwargs = {} + for param in include: + if param in kwargs: + filtered_kwargs[param] = kwargs[param] + return filtered_kwargs + + def _get_kwargs_with_params_to_exclude(self, kwargs, exclude): + filtered_kwargs = {} + for param, value in kwargs.items(): + if param in exclude: + continue + filtered_kwargs[param] = value + return filtered_kwargs + + def __call__(self): + """The callable to use when submitting a Task to an executor""" + try: + # Wait for all of futures this task depends on. + self._wait_on_dependent_futures() + # Gather up all of the main keyword arguments for main(). + # This includes the immediately provided main_kwargs and + # the values for pending_main_kwargs that source from the return # values from the task's dependent futures. - kwargs = self._get_all_main_kwargs() - # If the task is not done (really only if some other related - # task to the TransferFuture had failed) then execute the task's - # main() method. - if not self._transfer_coordinator.done(): - return self._execute_main(kwargs) - except Exception as e: - self._log_and_set_exception(e) - finally: - # Run any done callbacks associated to the task no matter what. - for done_callback in self._done_callbacks: - done_callback() - - if self._is_final: - # If this is the final task announce that it is done if results - # are waiting on its completion. - self._transfer_coordinator.announce_done() - - def _execute_main(self, kwargs): - # Do not display keyword args that should not be printed, especially - # if they are going to make the logs hard to follow. - params_to_exclude = ['data'] - kwargs_to_display = self._get_kwargs_with_params_to_exclude( + kwargs = self._get_all_main_kwargs() + # If the task is not done (really only if some other related + # task to the TransferFuture had failed) then execute the task's + # main() method. + if not self._transfer_coordinator.done(): + return self._execute_main(kwargs) + except Exception as e: + self._log_and_set_exception(e) + finally: + # Run any done callbacks associated to the task no matter what. + for done_callback in self._done_callbacks: + done_callback() + + if self._is_final: + # If this is the final task announce that it is done if results + # are waiting on its completion. + self._transfer_coordinator.announce_done() + + def _execute_main(self, kwargs): + # Do not display keyword args that should not be printed, especially + # if they are going to make the logs hard to follow. + params_to_exclude = ['data'] + kwargs_to_display = self._get_kwargs_with_params_to_exclude( kwargs, params_to_exclude ) - # Log what is about to be executed. + # Log what is about to be executed. logger.debug(f"Executing task {self} with kwargs {kwargs_to_display}") - - return_value = self._main(**kwargs) - # If the task is the final task, then set the TransferFuture's - # value to the return value from main(). - if self._is_final: - self._transfer_coordinator.set_result(return_value) - return return_value - - def _log_and_set_exception(self, exception): - # If an exception is ever thrown than set the exception for the - # entire TransferFuture. - logger.debug("Exception raised.", exc_info=True) - self._transfer_coordinator.set_exception(exception) - - def _main(self, **kwargs): - """The method that will be ran in the executor - - This method must be implemented by subclasses from Task. main() can - be implemented with any arguments decided upon by the subclass. - """ - raise NotImplementedError('_main() must be implemented') - - def _wait_on_dependent_futures(self): - # Gather all of the futures into that main() depends on. - futures_to_wait_on = [] - for _, future in self._pending_main_kwargs.items(): - # If the pending main keyword arg is a list then extend the list. - if isinstance(future, list): - futures_to_wait_on.extend(future) + + return_value = self._main(**kwargs) + # If the task is the final task, then set the TransferFuture's + # value to the return value from main(). + if self._is_final: + self._transfer_coordinator.set_result(return_value) + return return_value + + def _log_and_set_exception(self, exception): + # If an exception is ever thrown than set the exception for the + # entire TransferFuture. + logger.debug("Exception raised.", exc_info=True) + self._transfer_coordinator.set_exception(exception) + + def _main(self, **kwargs): + """The method that will be ran in the executor + + This method must be implemented by subclasses from Task. main() can + be implemented with any arguments decided upon by the subclass. + """ + raise NotImplementedError('_main() must be implemented') + + def _wait_on_dependent_futures(self): + # Gather all of the futures into that main() depends on. + futures_to_wait_on = [] + for _, future in self._pending_main_kwargs.items(): + # If the pending main keyword arg is a list then extend the list. + if isinstance(future, list): + futures_to_wait_on.extend(future) # If the pending main keyword arg is a future append it to the list. - else: - futures_to_wait_on.append(future) - # Now wait for all of the futures to complete. - self._wait_until_all_complete(futures_to_wait_on) - - def _wait_until_all_complete(self, futures): - # This is a basic implementation of the concurrent.futures.wait() - # - # concurrent.futures.wait() is not used instead because of this - # reported issue: https://bugs.python.org/issue20319. + else: + futures_to_wait_on.append(future) + # Now wait for all of the futures to complete. + self._wait_until_all_complete(futures_to_wait_on) + + def _wait_until_all_complete(self, futures): + # This is a basic implementation of the concurrent.futures.wait() + # + # concurrent.futures.wait() is not used instead because of this + # reported issue: https://bugs.python.org/issue20319. # The issue would occasionally cause multipart uploads to hang - # when wait() was called. With this approach, it avoids the - # concurrency bug by removing any association with concurrent.futures - # implementation of waiters. - logger.debug( + # when wait() was called. With this approach, it avoids the + # concurrency bug by removing any association with concurrent.futures + # implementation of waiters. + logger.debug( '%s about to wait for the following futures %s', self, futures ) - for future in futures: - try: - logger.debug('%s about to wait for %s', self, future) - future.result() - except Exception: - # result() can also produce exceptions. We want to ignore + for future in futures: + try: + logger.debug('%s about to wait for %s', self, future) + future.result() + except Exception: + # result() can also produce exceptions. We want to ignore # these to be deferred to error handling down the road. - pass - logger.debug('%s done waiting for dependent futures', self) - - def _get_all_main_kwargs(self): - # Copy over all of the kwargs that we know is available. - kwargs = copy.copy(self._main_kwargs) - - # Iterate through the kwargs whose values are pending on the result - # of a future. - for key, pending_value in self._pending_main_kwargs.items(): - # If the value is a list of futures, iterate though the list - # appending on the result from each future. - if isinstance(pending_value, list): - result = [] - for future in pending_value: - result.append(future.result()) - # Otherwise if the pending_value is a future, just wait for it. - else: - result = pending_value.result() - # Add the retrieved value to the kwargs to be sent to the - # main() call. - kwargs[key] = result - return kwargs - - -class SubmissionTask(Task): - """A base class for any submission task - - Submission tasks are the top-level task used to submit a series of tasks - to execute a particular transfer. - """ - - def _main(self, transfer_future, **kwargs): - """ - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future associated with the - transfer request that tasks are being submitted for - - :param kwargs: Any additional kwargs that you may want to pass - to the _submit() method - """ - try: - self._transfer_coordinator.set_status_to_queued() - - # Before submitting any tasks, run all of the on_queued callbacks - on_queued_callbacks = get_callbacks(transfer_future, 'queued') - for on_queued_callback in on_queued_callbacks: - on_queued_callback() - - # Once callbacks have been ran set the status to running. - self._transfer_coordinator.set_status_to_running() - - # Call the submit method to start submitting tasks to execute the - # transfer. - self._submit(transfer_future=transfer_future, **kwargs) - except BaseException as e: - # If there was an exception raised during the submission of task - # there is a chance that the final task that signals if a transfer - # is done and too run the cleanup may never have been submitted in - # the first place so we need to account accordingly. - # - # Note that BaseException is caught, instead of Exception, because + pass + logger.debug('%s done waiting for dependent futures', self) + + def _get_all_main_kwargs(self): + # Copy over all of the kwargs that we know is available. + kwargs = copy.copy(self._main_kwargs) + + # Iterate through the kwargs whose values are pending on the result + # of a future. + for key, pending_value in self._pending_main_kwargs.items(): + # If the value is a list of futures, iterate though the list + # appending on the result from each future. + if isinstance(pending_value, list): + result = [] + for future in pending_value: + result.append(future.result()) + # Otherwise if the pending_value is a future, just wait for it. + else: + result = pending_value.result() + # Add the retrieved value to the kwargs to be sent to the + # main() call. + kwargs[key] = result + return kwargs + + +class SubmissionTask(Task): + """A base class for any submission task + + Submission tasks are the top-level task used to submit a series of tasks + to execute a particular transfer. + """ + + def _main(self, transfer_future, **kwargs): + """ + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future associated with the + transfer request that tasks are being submitted for + + :param kwargs: Any additional kwargs that you may want to pass + to the _submit() method + """ + try: + self._transfer_coordinator.set_status_to_queued() + + # Before submitting any tasks, run all of the on_queued callbacks + on_queued_callbacks = get_callbacks(transfer_future, 'queued') + for on_queued_callback in on_queued_callbacks: + on_queued_callback() + + # Once callbacks have been ran set the status to running. + self._transfer_coordinator.set_status_to_running() + + # Call the submit method to start submitting tasks to execute the + # transfer. + self._submit(transfer_future=transfer_future, **kwargs) + except BaseException as e: + # If there was an exception raised during the submission of task + # there is a chance that the final task that signals if a transfer + # is done and too run the cleanup may never have been submitted in + # the first place so we need to account accordingly. + # + # Note that BaseException is caught, instead of Exception, because # for some implementations of executors, specifically the serial - # implementation, the SubmissionTask is directly exposed to - # KeyboardInterupts and so needs to cleanup and signal done - # for those as well. - - # Set the exception, that caused the process to fail. - self._log_and_set_exception(e) - - # Wait for all possibly associated futures that may have spawned + # implementation, the SubmissionTask is directly exposed to + # KeyboardInterupts and so needs to cleanup and signal done + # for those as well. + + # Set the exception, that caused the process to fail. + self._log_and_set_exception(e) + + # Wait for all possibly associated futures that may have spawned # from this submission task have finished before we announce the - # transfer done. - self._wait_for_all_submitted_futures_to_complete() - - # Announce the transfer as done, which will run any cleanups - # and done callbacks as well. - self._transfer_coordinator.announce_done() - - def _submit(self, transfer_future, **kwargs): + # transfer done. + self._wait_for_all_submitted_futures_to_complete() + + # Announce the transfer as done, which will run any cleanups + # and done callbacks as well. + self._transfer_coordinator.announce_done() + + def _submit(self, transfer_future, **kwargs): """The submission method to be implemented - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future associated with the - transfer request that tasks are being submitted for - - :param kwargs: Any additional keyword arguments you want to be passed - in - """ - raise NotImplementedError('_submit() must be implemented') - - def _wait_for_all_submitted_futures_to_complete(self): - # We want to wait for all futures that were submitted to - # complete as we do not want the cleanup callbacks or done callbacks - # to be called to early. The main problem is any task that was - # submitted may have submitted even more during its process and so - # we need to account accordingly. - - # First get all of the futures that were submitted up to this point. - submitted_futures = self._transfer_coordinator.associated_futures - while submitted_futures: - # Wait for those futures to complete. - self._wait_until_all_complete(submitted_futures) - # However, more futures may have been submitted as we waited so - # we need to check again for any more associated futures. + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future associated with the + transfer request that tasks are being submitted for + + :param kwargs: Any additional keyword arguments you want to be passed + in + """ + raise NotImplementedError('_submit() must be implemented') + + def _wait_for_all_submitted_futures_to_complete(self): + # We want to wait for all futures that were submitted to + # complete as we do not want the cleanup callbacks or done callbacks + # to be called to early. The main problem is any task that was + # submitted may have submitted even more during its process and so + # we need to account accordingly. + + # First get all of the futures that were submitted up to this point. + submitted_futures = self._transfer_coordinator.associated_futures + while submitted_futures: + # Wait for those futures to complete. + self._wait_until_all_complete(submitted_futures) + # However, more futures may have been submitted as we waited so + # we need to check again for any more associated futures. possibly_more_submitted_futures = ( - self._transfer_coordinator.associated_futures + self._transfer_coordinator.associated_futures ) - # If the current list of submitted futures is equal to the - # the list of associated futures for when after the wait completes, - # we can ensure no more futures were submitted in waiting on - # the current list of futures to complete ultimately meaning all - # futures that may have spawned from the original submission task - # have completed. - if submitted_futures == possibly_more_submitted_futures: - break - submitted_futures = possibly_more_submitted_futures - - -class CreateMultipartUploadTask(Task): - """Task to initiate a multipart upload""" - - def _main(self, client, bucket, key, extra_args): - """ - :param client: The client to use when calling CreateMultipartUpload - :param bucket: The name of the bucket to upload to - :param key: The name of the key to upload to - :param extra_args: A dictionary of any extra arguments that may be + # If the current list of submitted futures is equal to the + # the list of associated futures for when after the wait completes, + # we can ensure no more futures were submitted in waiting on + # the current list of futures to complete ultimately meaning all + # futures that may have spawned from the original submission task + # have completed. + if submitted_futures == possibly_more_submitted_futures: + break + submitted_futures = possibly_more_submitted_futures + + +class CreateMultipartUploadTask(Task): + """Task to initiate a multipart upload""" + + def _main(self, client, bucket, key, extra_args): + """ + :param client: The client to use when calling CreateMultipartUpload + :param bucket: The name of the bucket to upload to + :param key: The name of the key to upload to + :param extra_args: A dictionary of any extra arguments that may be used in the initialization. - - :returns: The upload id of the multipart upload - """ - # Create the multipart upload. - response = client.create_multipart_upload( + + :returns: The upload id of the multipart upload + """ + # Create the multipart upload. + response = client.create_multipart_upload( Bucket=bucket, Key=key, **extra_args ) - upload_id = response['UploadId'] - - # Add a cleanup if the multipart upload fails at any point. - self._transfer_coordinator.add_failure_cleanup( + upload_id = response['UploadId'] + + # Add a cleanup if the multipart upload fails at any point. + self._transfer_coordinator.add_failure_cleanup( client.abort_multipart_upload, Bucket=bucket, Key=key, UploadId=upload_id, - ) - return upload_id - - -class CompleteMultipartUploadTask(Task): - """Task to complete a multipart upload""" - - def _main(self, client, bucket, key, upload_id, parts, extra_args): - """ - :param client: The client to use when calling CompleteMultipartUpload - :param bucket: The name of the bucket to upload to - :param key: The name of the key to upload to - :param upload_id: The id of the upload - :param parts: A list of parts to use to complete the multipart upload:: - - [{'Etag': etag_value, 'PartNumber': part_number}, ...] - - Each element in the list consists of a return value from - ``UploadPartTask.main()``. - :param extra_args: A dictionary of any extra arguments that may be - used in completing the multipart transfer. - """ - client.complete_multipart_upload( + ) + return upload_id + + +class CompleteMultipartUploadTask(Task): + """Task to complete a multipart upload""" + + def _main(self, client, bucket, key, upload_id, parts, extra_args): + """ + :param client: The client to use when calling CompleteMultipartUpload + :param bucket: The name of the bucket to upload to + :param key: The name of the key to upload to + :param upload_id: The id of the upload + :param parts: A list of parts to use to complete the multipart upload:: + + [{'Etag': etag_value, 'PartNumber': part_number}, ...] + + Each element in the list consists of a return value from + ``UploadPartTask.main()``. + :param extra_args: A dictionary of any extra arguments that may be + used in completing the multipart transfer. + """ + client.complete_multipart_upload( Bucket=bucket, Key=key, UploadId=upload_id, - MultipartUpload={'Parts': parts}, + MultipartUpload={'Parts': parts}, **extra_args, ) diff --git a/contrib/python/s3transfer/py3/s3transfer/upload.py b/contrib/python/s3transfer/py3/s3transfer/upload.py index 17a950fa46..31ade051d7 100644 --- a/contrib/python/s3transfer/py3/s3transfer/upload.py +++ b/contrib/python/s3transfer/py3/s3transfer/upload.py @@ -1,20 +1,20 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import math +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import math from io import BytesIO - + from s3transfer.compat import readable, seekable -from s3transfer.futures import IN_MEMORY_UPLOAD_TAG +from s3transfer.futures import IN_MEMORY_UPLOAD_TAG from s3transfer.tasks import ( CompleteMultipartUploadTask, CreateMultipartUploadTask, @@ -27,527 +27,527 @@ from s3transfer.utils import ( get_callbacks, get_filtered_dict, ) - - + + class AggregatedProgressCallback: - def __init__(self, callbacks, threshold=1024 * 256): - """Aggregates progress updates for every provided progress callback - - :type callbacks: A list of functions that accepts bytes_transferred - as a single argument - :param callbacks: The callbacks to invoke when threshold is reached - - :type threshold: int - :param threshold: The progress threshold in which to take the - aggregated progress and invoke the progress callback with that - aggregated progress total - """ - self._callbacks = callbacks - self._threshold = threshold - self._bytes_seen = 0 - - def __call__(self, bytes_transferred): - self._bytes_seen += bytes_transferred - if self._bytes_seen >= self._threshold: - self._trigger_callbacks() - - def flush(self): - """Flushes out any progress that has not been sent to its callbacks""" - if self._bytes_seen > 0: - self._trigger_callbacks() - - def _trigger_callbacks(self): - for callback in self._callbacks: - callback(bytes_transferred=self._bytes_seen) - self._bytes_seen = 0 - - + def __init__(self, callbacks, threshold=1024 * 256): + """Aggregates progress updates for every provided progress callback + + :type callbacks: A list of functions that accepts bytes_transferred + as a single argument + :param callbacks: The callbacks to invoke when threshold is reached + + :type threshold: int + :param threshold: The progress threshold in which to take the + aggregated progress and invoke the progress callback with that + aggregated progress total + """ + self._callbacks = callbacks + self._threshold = threshold + self._bytes_seen = 0 + + def __call__(self, bytes_transferred): + self._bytes_seen += bytes_transferred + if self._bytes_seen >= self._threshold: + self._trigger_callbacks() + + def flush(self): + """Flushes out any progress that has not been sent to its callbacks""" + if self._bytes_seen > 0: + self._trigger_callbacks() + + def _trigger_callbacks(self): + for callback in self._callbacks: + callback(bytes_transferred=self._bytes_seen) + self._bytes_seen = 0 + + class InterruptReader: - """Wrapper that can interrupt reading using an error - + """Wrapper that can interrupt reading using an error + It uses a transfer coordinator to propagate an error if it notices - that a read is being made while the file is being read from. - - :type fileobj: file-like obj - :param fileobj: The file-like object to read from - - :type transfer_coordinator: s3transfer.futures.TransferCoordinator - :param transfer_coordinator: The transfer coordinator to use if the - reader needs to be interrupted. - """ - - def __init__(self, fileobj, transfer_coordinator): - self._fileobj = fileobj - self._transfer_coordinator = transfer_coordinator - - def read(self, amount=None): - # If there is an exception, then raise the exception. - # We raise an error instead of returning no bytes because for - # requests where the content length and md5 was sent, it will - # cause md5 mismatches and retries as there was no indication that - # the stream being read from encountered any issues. - if self._transfer_coordinator.exception: - raise self._transfer_coordinator.exception - return self._fileobj.read(amount) - + that a read is being made while the file is being read from. + + :type fileobj: file-like obj + :param fileobj: The file-like object to read from + + :type transfer_coordinator: s3transfer.futures.TransferCoordinator + :param transfer_coordinator: The transfer coordinator to use if the + reader needs to be interrupted. + """ + + def __init__(self, fileobj, transfer_coordinator): + self._fileobj = fileobj + self._transfer_coordinator = transfer_coordinator + + def read(self, amount=None): + # If there is an exception, then raise the exception. + # We raise an error instead of returning no bytes because for + # requests where the content length and md5 was sent, it will + # cause md5 mismatches and retries as there was no indication that + # the stream being read from encountered any issues. + if self._transfer_coordinator.exception: + raise self._transfer_coordinator.exception + return self._fileobj.read(amount) + def seek(self, where, whence=0): self._fileobj.seek(where, whence) - - def tell(self): - return self._fileobj.tell() - - def close(self): - self._fileobj.close() - - def __enter__(self): - return self - - def __exit__(self, *args, **kwargs): - self.close() - - + + def tell(self): + return self._fileobj.tell() + + def close(self): + self._fileobj.close() + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + self.close() + + class UploadInputManager: - """Base manager class for handling various types of files for uploads - - This class is typically used for the UploadSubmissionTask class to help - determine the following: - - * How to determine the size of the file - * How to determine if a multipart upload is required - * How to retrieve the body for a PutObject - * How to retrieve the bodies for a set of UploadParts - - The answers/implementations differ for the various types of file inputs - that may be accepted. All implementations must subclass and override - public methods from this class. - """ - - def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None): - self._osutil = osutil - self._transfer_coordinator = transfer_coordinator - self._bandwidth_limiter = bandwidth_limiter - - @classmethod - def is_compatible(cls, upload_source): - """Determines if the source for the upload is compatible with manager - - :param upload_source: The source for which the upload will pull data - from. - - :returns: True if the manager can handle the type of source specified - otherwise returns False. - """ - raise NotImplementedError('must implement _is_compatible()') - - def stores_body_in_memory(self, operation_name): - """Whether the body it provides are stored in-memory - - :type operation_name: str - :param operation_name: The name of the client operation that the body - is being used for. Valid operation_names are ``put_object`` and - ``upload_part``. - - :rtype: boolean - :returns: True if the body returned by the manager will be stored in - memory. False if the manager will not directly store the body in - memory. - """ + """Base manager class for handling various types of files for uploads + + This class is typically used for the UploadSubmissionTask class to help + determine the following: + + * How to determine the size of the file + * How to determine if a multipart upload is required + * How to retrieve the body for a PutObject + * How to retrieve the bodies for a set of UploadParts + + The answers/implementations differ for the various types of file inputs + that may be accepted. All implementations must subclass and override + public methods from this class. + """ + + def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None): + self._osutil = osutil + self._transfer_coordinator = transfer_coordinator + self._bandwidth_limiter = bandwidth_limiter + + @classmethod + def is_compatible(cls, upload_source): + """Determines if the source for the upload is compatible with manager + + :param upload_source: The source for which the upload will pull data + from. + + :returns: True if the manager can handle the type of source specified + otherwise returns False. + """ + raise NotImplementedError('must implement _is_compatible()') + + def stores_body_in_memory(self, operation_name): + """Whether the body it provides are stored in-memory + + :type operation_name: str + :param operation_name: The name of the client operation that the body + is being used for. Valid operation_names are ``put_object`` and + ``upload_part``. + + :rtype: boolean + :returns: True if the body returned by the manager will be stored in + memory. False if the manager will not directly store the body in + memory. + """ raise NotImplementedError('must implement store_body_in_memory()') - - def provide_transfer_size(self, transfer_future): - """Provides the transfer size of an upload - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The future associated with upload request - """ - raise NotImplementedError('must implement provide_transfer_size()') - - def requires_multipart_upload(self, transfer_future, config): - """Determines where a multipart upload is required - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The future associated with upload request - - :type config: s3transfer.manager.TransferConfig - :param config: The config associated to the transfer manager - - :rtype: boolean - :returns: True, if the upload should be multipart based on + + def provide_transfer_size(self, transfer_future): + """Provides the transfer size of an upload + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The future associated with upload request + """ + raise NotImplementedError('must implement provide_transfer_size()') + + def requires_multipart_upload(self, transfer_future, config): + """Determines where a multipart upload is required + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The future associated with upload request + + :type config: s3transfer.manager.TransferConfig + :param config: The config associated to the transfer manager + + :rtype: boolean + :returns: True, if the upload should be multipart based on configuration and size. False, otherwise. - """ - raise NotImplementedError('must implement requires_multipart_upload()') - - def get_put_object_body(self, transfer_future): - """Returns the body to use for PutObject - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The future associated with upload request - - :type config: s3transfer.manager.TransferConfig - :param config: The config associated to the transfer manager - - :rtype: s3transfer.utils.ReadFileChunk - :returns: A ReadFileChunk including all progress callbacks - associated with the transfer future. - """ - raise NotImplementedError('must implement get_put_object_body()') - - def yield_upload_part_bodies(self, transfer_future, chunksize): - """Yields the part number and body to use for each UploadPart - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The future associated with upload request - - :type chunksize: int - :param chunksize: The chunksize to use for this upload. - - :rtype: int, s3transfer.utils.ReadFileChunk - :returns: Yields the part number and the ReadFileChunk including all - progress callbacks associated with the transfer future for that - specific yielded part. - """ - raise NotImplementedError('must implement yield_upload_part_bodies()') - - def _wrap_fileobj(self, fileobj): - fileobj = InterruptReader(fileobj, self._transfer_coordinator) - if self._bandwidth_limiter: - fileobj = self._bandwidth_limiter.get_bandwith_limited_stream( + """ + raise NotImplementedError('must implement requires_multipart_upload()') + + def get_put_object_body(self, transfer_future): + """Returns the body to use for PutObject + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The future associated with upload request + + :type config: s3transfer.manager.TransferConfig + :param config: The config associated to the transfer manager + + :rtype: s3transfer.utils.ReadFileChunk + :returns: A ReadFileChunk including all progress callbacks + associated with the transfer future. + """ + raise NotImplementedError('must implement get_put_object_body()') + + def yield_upload_part_bodies(self, transfer_future, chunksize): + """Yields the part number and body to use for each UploadPart + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The future associated with upload request + + :type chunksize: int + :param chunksize: The chunksize to use for this upload. + + :rtype: int, s3transfer.utils.ReadFileChunk + :returns: Yields the part number and the ReadFileChunk including all + progress callbacks associated with the transfer future for that + specific yielded part. + """ + raise NotImplementedError('must implement yield_upload_part_bodies()') + + def _wrap_fileobj(self, fileobj): + fileobj = InterruptReader(fileobj, self._transfer_coordinator) + if self._bandwidth_limiter: + fileobj = self._bandwidth_limiter.get_bandwith_limited_stream( fileobj, self._transfer_coordinator, enabled=False ) - return fileobj - - def _get_progress_callbacks(self, transfer_future): - callbacks = get_callbacks(transfer_future, 'progress') - # We only want to be wrapping the callbacks if there are callbacks to - # invoke because we do not want to be doing any unnecessary work if - # there are no callbacks to invoke. - if callbacks: - return [AggregatedProgressCallback(callbacks)] - return [] - - def _get_close_callbacks(self, aggregated_progress_callbacks): - return [callback.flush for callback in aggregated_progress_callbacks] - - -class UploadFilenameInputManager(UploadInputManager): - """Upload utility for filenames""" - - @classmethod - def is_compatible(cls, upload_source): + return fileobj + + def _get_progress_callbacks(self, transfer_future): + callbacks = get_callbacks(transfer_future, 'progress') + # We only want to be wrapping the callbacks if there are callbacks to + # invoke because we do not want to be doing any unnecessary work if + # there are no callbacks to invoke. + if callbacks: + return [AggregatedProgressCallback(callbacks)] + return [] + + def _get_close_callbacks(self, aggregated_progress_callbacks): + return [callback.flush for callback in aggregated_progress_callbacks] + + +class UploadFilenameInputManager(UploadInputManager): + """Upload utility for filenames""" + + @classmethod + def is_compatible(cls, upload_source): return isinstance(upload_source, str) - - def stores_body_in_memory(self, operation_name): - return False - - def provide_transfer_size(self, transfer_future): - transfer_future.meta.provide_transfer_size( + + def stores_body_in_memory(self, operation_name): + return False + + def provide_transfer_size(self, transfer_future): + transfer_future.meta.provide_transfer_size( self._osutil.get_file_size(transfer_future.meta.call_args.fileobj) ) - - def requires_multipart_upload(self, transfer_future, config): - return transfer_future.meta.size >= config.multipart_threshold - - def get_put_object_body(self, transfer_future): - # Get a file-like object for the given input - fileobj, full_size = self._get_put_object_fileobj_with_full_size( + + def requires_multipart_upload(self, transfer_future, config): + return transfer_future.meta.size >= config.multipart_threshold + + def get_put_object_body(self, transfer_future): + # Get a file-like object for the given input + fileobj, full_size = self._get_put_object_fileobj_with_full_size( transfer_future ) - - # Wrap fileobj with interrupt reader that will quickly cancel - # uploads if needed instead of having to wait for the socket - # to completely read all of the data. - fileobj = self._wrap_fileobj(fileobj) - - callbacks = self._get_progress_callbacks(transfer_future) - close_callbacks = self._get_close_callbacks(callbacks) - size = transfer_future.meta.size - # Return the file-like object wrapped into a ReadFileChunk to get - # progress. - return self._osutil.open_file_chunk_reader_from_fileobj( + + # Wrap fileobj with interrupt reader that will quickly cancel + # uploads if needed instead of having to wait for the socket + # to completely read all of the data. + fileobj = self._wrap_fileobj(fileobj) + + callbacks = self._get_progress_callbacks(transfer_future) + close_callbacks = self._get_close_callbacks(callbacks) + size = transfer_future.meta.size + # Return the file-like object wrapped into a ReadFileChunk to get + # progress. + return self._osutil.open_file_chunk_reader_from_fileobj( fileobj=fileobj, chunk_size=size, full_file_size=full_size, callbacks=callbacks, close_callbacks=close_callbacks, ) - - def yield_upload_part_bodies(self, transfer_future, chunksize): - full_file_size = transfer_future.meta.size - num_parts = self._get_num_parts(transfer_future, chunksize) - for part_number in range(1, num_parts + 1): - callbacks = self._get_progress_callbacks(transfer_future) - close_callbacks = self._get_close_callbacks(callbacks) - start_byte = chunksize * (part_number - 1) - # Get a file-like object for that part and the size of the full - # file size for the associated file-like object for that part. - fileobj, full_size = self._get_upload_part_fileobj_with_full_size( + + def yield_upload_part_bodies(self, transfer_future, chunksize): + full_file_size = transfer_future.meta.size + num_parts = self._get_num_parts(transfer_future, chunksize) + for part_number in range(1, num_parts + 1): + callbacks = self._get_progress_callbacks(transfer_future) + close_callbacks = self._get_close_callbacks(callbacks) + start_byte = chunksize * (part_number - 1) + # Get a file-like object for that part and the size of the full + # file size for the associated file-like object for that part. + fileobj, full_size = self._get_upload_part_fileobj_with_full_size( transfer_future.meta.call_args.fileobj, start_byte=start_byte, part_size=chunksize, full_file_size=full_file_size, ) - - # Wrap fileobj with interrupt reader that will quickly cancel - # uploads if needed instead of having to wait for the socket - # to completely read all of the data. - fileobj = self._wrap_fileobj(fileobj) - - # Wrap the file-like object into a ReadFileChunk to get progress. - read_file_chunk = self._osutil.open_file_chunk_reader_from_fileobj( + + # Wrap fileobj with interrupt reader that will quickly cancel + # uploads if needed instead of having to wait for the socket + # to completely read all of the data. + fileobj = self._wrap_fileobj(fileobj) + + # Wrap the file-like object into a ReadFileChunk to get progress. + read_file_chunk = self._osutil.open_file_chunk_reader_from_fileobj( fileobj=fileobj, chunk_size=chunksize, full_file_size=full_size, callbacks=callbacks, close_callbacks=close_callbacks, ) - yield part_number, read_file_chunk - - def _get_deferred_open_file(self, fileobj, start_byte): - fileobj = DeferredOpenFile( + yield part_number, read_file_chunk + + def _get_deferred_open_file(self, fileobj, start_byte): + fileobj = DeferredOpenFile( fileobj, start_byte, open_function=self._osutil.open ) - return fileobj - - def _get_put_object_fileobj_with_full_size(self, transfer_future): - fileobj = transfer_future.meta.call_args.fileobj - size = transfer_future.meta.size - return self._get_deferred_open_file(fileobj, 0), size - - def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs): - start_byte = kwargs['start_byte'] - full_size = kwargs['full_file_size'] - return self._get_deferred_open_file(fileobj, start_byte), full_size - - def _get_num_parts(self, transfer_future, part_size): + return fileobj + + def _get_put_object_fileobj_with_full_size(self, transfer_future): + fileobj = transfer_future.meta.call_args.fileobj + size = transfer_future.meta.size + return self._get_deferred_open_file(fileobj, 0), size + + def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs): + start_byte = kwargs['start_byte'] + full_size = kwargs['full_file_size'] + return self._get_deferred_open_file(fileobj, start_byte), full_size + + def _get_num_parts(self, transfer_future, part_size): return int(math.ceil(transfer_future.meta.size / float(part_size))) - - -class UploadSeekableInputManager(UploadFilenameInputManager): - """Upload utility for an open file object""" - - @classmethod - def is_compatible(cls, upload_source): - return readable(upload_source) and seekable(upload_source) - - def stores_body_in_memory(self, operation_name): - if operation_name == 'put_object': - return False - else: - return True - - def provide_transfer_size(self, transfer_future): - fileobj = transfer_future.meta.call_args.fileobj - # To determine size, first determine the starting position - # Seek to the end and then find the difference in the length - # between the end and start positions. - start_position = fileobj.tell() - fileobj.seek(0, 2) - end_position = fileobj.tell() - fileobj.seek(start_position) - transfer_future.meta.provide_transfer_size( + + +class UploadSeekableInputManager(UploadFilenameInputManager): + """Upload utility for an open file object""" + + @classmethod + def is_compatible(cls, upload_source): + return readable(upload_source) and seekable(upload_source) + + def stores_body_in_memory(self, operation_name): + if operation_name == 'put_object': + return False + else: + return True + + def provide_transfer_size(self, transfer_future): + fileobj = transfer_future.meta.call_args.fileobj + # To determine size, first determine the starting position + # Seek to the end and then find the difference in the length + # between the end and start positions. + start_position = fileobj.tell() + fileobj.seek(0, 2) + end_position = fileobj.tell() + fileobj.seek(start_position) + transfer_future.meta.provide_transfer_size( end_position - start_position ) - - def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs): - # Note: It is unfortunate that in order to do a multithreaded - # multipart upload we cannot simply copy the filelike object - # since there is not really a mechanism in python (i.e. os.dup - # points to the same OS filehandle which causes concurrency - # issues). So instead we need to read from the fileobj and + + def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs): + # Note: It is unfortunate that in order to do a multithreaded + # multipart upload we cannot simply copy the filelike object + # since there is not really a mechanism in python (i.e. os.dup + # points to the same OS filehandle which causes concurrency + # issues). So instead we need to read from the fileobj and # chunk the data out to separate file-like objects in memory. - data = fileobj.read(kwargs['part_size']) - # We return the length of the data instead of the full_file_size + data = fileobj.read(kwargs['part_size']) + # We return the length of the data instead of the full_file_size # because we partitioned the data into separate BytesIO objects - # meaning the BytesIO object has no knowledge of its start position - # relative the input source nor access to the rest of the input - # source. So we must treat it as its own standalone file. + # meaning the BytesIO object has no knowledge of its start position + # relative the input source nor access to the rest of the input + # source. So we must treat it as its own standalone file. return BytesIO(data), len(data) - - def _get_put_object_fileobj_with_full_size(self, transfer_future): - fileobj = transfer_future.meta.call_args.fileobj - # The current position needs to be taken into account when retrieving - # the full size of the file. - size = fileobj.tell() + transfer_future.meta.size - return fileobj, size - - -class UploadNonSeekableInputManager(UploadInputManager): - """Upload utility for a file-like object that cannot seek.""" - - def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None): + + def _get_put_object_fileobj_with_full_size(self, transfer_future): + fileobj = transfer_future.meta.call_args.fileobj + # The current position needs to be taken into account when retrieving + # the full size of the file. + size = fileobj.tell() + transfer_future.meta.size + return fileobj, size + + +class UploadNonSeekableInputManager(UploadInputManager): + """Upload utility for a file-like object that cannot seek.""" + + def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None): super().__init__(osutil, transfer_coordinator, bandwidth_limiter) - self._initial_data = b'' - - @classmethod - def is_compatible(cls, upload_source): - return readable(upload_source) - - def stores_body_in_memory(self, operation_name): - return True - - def provide_transfer_size(self, transfer_future): - # No-op because there is no way to do this short of reading the entire - # body into memory. - return - - def requires_multipart_upload(self, transfer_future, config): - # If the user has set the size, we can use that. - if transfer_future.meta.size is not None: - return transfer_future.meta.size >= config.multipart_threshold - - # This is tricky to determine in this case because we can't know how - # large the input is. So to figure it out, we read data into memory - # up until the threshold and compare how much data was actually read - # against the threshold. - fileobj = transfer_future.meta.call_args.fileobj - threshold = config.multipart_threshold - self._initial_data = self._read(fileobj, threshold, False) - if len(self._initial_data) < threshold: - return False - else: - return True - - def get_put_object_body(self, transfer_future): - callbacks = self._get_progress_callbacks(transfer_future) - close_callbacks = self._get_close_callbacks(callbacks) - fileobj = transfer_future.meta.call_args.fileobj - - body = self._wrap_data( + self._initial_data = b'' + + @classmethod + def is_compatible(cls, upload_source): + return readable(upload_source) + + def stores_body_in_memory(self, operation_name): + return True + + def provide_transfer_size(self, transfer_future): + # No-op because there is no way to do this short of reading the entire + # body into memory. + return + + def requires_multipart_upload(self, transfer_future, config): + # If the user has set the size, we can use that. + if transfer_future.meta.size is not None: + return transfer_future.meta.size >= config.multipart_threshold + + # This is tricky to determine in this case because we can't know how + # large the input is. So to figure it out, we read data into memory + # up until the threshold and compare how much data was actually read + # against the threshold. + fileobj = transfer_future.meta.call_args.fileobj + threshold = config.multipart_threshold + self._initial_data = self._read(fileobj, threshold, False) + if len(self._initial_data) < threshold: + return False + else: + return True + + def get_put_object_body(self, transfer_future): + callbacks = self._get_progress_callbacks(transfer_future) + close_callbacks = self._get_close_callbacks(callbacks) + fileobj = transfer_future.meta.call_args.fileobj + + body = self._wrap_data( self._initial_data + fileobj.read(), callbacks, close_callbacks ) - - # Zero out the stored data so we don't have additional copies - # hanging around in memory. - self._initial_data = None - return body - - def yield_upload_part_bodies(self, transfer_future, chunksize): - file_object = transfer_future.meta.call_args.fileobj - part_number = 0 - - # Continue reading parts from the file-like object until it is empty. - while True: - callbacks = self._get_progress_callbacks(transfer_future) - close_callbacks = self._get_close_callbacks(callbacks) - part_number += 1 - part_content = self._read(file_object, chunksize) - if not part_content: - break - part_object = self._wrap_data( + + # Zero out the stored data so we don't have additional copies + # hanging around in memory. + self._initial_data = None + return body + + def yield_upload_part_bodies(self, transfer_future, chunksize): + file_object = transfer_future.meta.call_args.fileobj + part_number = 0 + + # Continue reading parts from the file-like object until it is empty. + while True: + callbacks = self._get_progress_callbacks(transfer_future) + close_callbacks = self._get_close_callbacks(callbacks) + part_number += 1 + part_content = self._read(file_object, chunksize) + if not part_content: + break + part_object = self._wrap_data( part_content, callbacks, close_callbacks ) - - # Zero out part_content to avoid hanging on to additional data. - part_content = None - yield part_number, part_object - - def _read(self, fileobj, amount, truncate=True): - """ - Reads a specific amount of data from a stream and returns it. If there - is any data in initial_data, that will be popped out first. - - :type fileobj: A file-like object that implements read - :param fileobj: The stream to read from. - - :type amount: int - :param amount: The number of bytes to read from the stream. - - :type truncate: bool - :param truncate: Whether or not to truncate initial_data after - reading from it. - - :return: Generator which generates part bodies from the initial data. - """ - # If the the initial data is empty, we simply read from the fileobj - if len(self._initial_data) == 0: - return fileobj.read(amount) - + + # Zero out part_content to avoid hanging on to additional data. + part_content = None + yield part_number, part_object + + def _read(self, fileobj, amount, truncate=True): + """ + Reads a specific amount of data from a stream and returns it. If there + is any data in initial_data, that will be popped out first. + + :type fileobj: A file-like object that implements read + :param fileobj: The stream to read from. + + :type amount: int + :param amount: The number of bytes to read from the stream. + + :type truncate: bool + :param truncate: Whether or not to truncate initial_data after + reading from it. + + :return: Generator which generates part bodies from the initial data. + """ + # If the the initial data is empty, we simply read from the fileobj + if len(self._initial_data) == 0: + return fileobj.read(amount) + # If the requested number of bytes is less than the amount of - # initial data, pull entirely from initial data. - if amount <= len(self._initial_data): - data = self._initial_data[:amount] - # Truncate initial data so we don't hang onto the data longer - # than we need. - if truncate: - self._initial_data = self._initial_data[amount:] - return data - - # At this point there is some initial data left, but not enough to - # satisfy the number of bytes requested. Pull out the remaining - # initial data and read the rest from the fileobj. - amount_to_read = amount - len(self._initial_data) - data = self._initial_data + fileobj.read(amount_to_read) - - # Zero out initial data so we don't hang onto the data any more. - if truncate: - self._initial_data = b'' - return data - - def _wrap_data(self, data, callbacks, close_callbacks): - """ - Wraps data with the interrupt reader and the file chunk reader. - - :type data: bytes - :param data: The data to wrap. - - :type callbacks: list - :param callbacks: The callbacks associated with the transfer future. - - :type close_callbacks: list - :param close_callbacks: The callbacks to be called when closing the - wrapper for the data. - - :return: Fully wrapped data. - """ + # initial data, pull entirely from initial data. + if amount <= len(self._initial_data): + data = self._initial_data[:amount] + # Truncate initial data so we don't hang onto the data longer + # than we need. + if truncate: + self._initial_data = self._initial_data[amount:] + return data + + # At this point there is some initial data left, but not enough to + # satisfy the number of bytes requested. Pull out the remaining + # initial data and read the rest from the fileobj. + amount_to_read = amount - len(self._initial_data) + data = self._initial_data + fileobj.read(amount_to_read) + + # Zero out initial data so we don't hang onto the data any more. + if truncate: + self._initial_data = b'' + return data + + def _wrap_data(self, data, callbacks, close_callbacks): + """ + Wraps data with the interrupt reader and the file chunk reader. + + :type data: bytes + :param data: The data to wrap. + + :type callbacks: list + :param callbacks: The callbacks associated with the transfer future. + + :type close_callbacks: list + :param close_callbacks: The callbacks to be called when closing the + wrapper for the data. + + :return: Fully wrapped data. + """ fileobj = self._wrap_fileobj(BytesIO(data)) - return self._osutil.open_file_chunk_reader_from_fileobj( + return self._osutil.open_file_chunk_reader_from_fileobj( fileobj=fileobj, chunk_size=len(data), full_file_size=len(data), callbacks=callbacks, close_callbacks=close_callbacks, ) - - -class UploadSubmissionTask(SubmissionTask): - """Task for submitting tasks to execute an upload""" - - UPLOAD_PART_ARGS = [ - 'SSECustomerKey', - 'SSECustomerAlgorithm', - 'SSECustomerKeyMD5', - 'RequestPayer', + + +class UploadSubmissionTask(SubmissionTask): + """Task for submitting tasks to execute an upload""" + + UPLOAD_PART_ARGS = [ + 'SSECustomerKey', + 'SSECustomerAlgorithm', + 'SSECustomerKeyMD5', + 'RequestPayer', 'ExpectedBucketOwner', - ] - + ] + COMPLETE_MULTIPART_ARGS = ['RequestPayer', 'ExpectedBucketOwner'] - - def _get_upload_input_manager_cls(self, transfer_future): + + def _get_upload_input_manager_cls(self, transfer_future): """Retrieves a class for managing input for an upload based on file type - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future for the request - - :rtype: class of UploadInputManager - :returns: The appropriate class to use for managing a specific type of - input for uploads. - """ - upload_manager_resolver_chain = [ - UploadFilenameInputManager, - UploadSeekableInputManager, + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future for the request + + :rtype: class of UploadInputManager + :returns: The appropriate class to use for managing a specific type of + input for uploads. + """ + upload_manager_resolver_chain = [ + UploadFilenameInputManager, + UploadSeekableInputManager, UploadNonSeekableInputManager, - ] - - fileobj = transfer_future.meta.call_args.fileobj - for upload_manager_cls in upload_manager_resolver_chain: - if upload_manager_cls.is_compatible(fileobj): - return upload_manager_cls - raise RuntimeError( + ] + + fileobj = transfer_future.meta.call_args.fileobj + for upload_manager_cls in upload_manager_resolver_chain: + if upload_manager_cls.is_compatible(fileobj): + return upload_manager_cls + raise RuntimeError( 'Input {} of type: {} is not supported.'.format( fileobj, type(fileobj) ) ) - + def _submit( self, client, @@ -557,37 +557,37 @@ class UploadSubmissionTask(SubmissionTask): transfer_future, bandwidth_limiter=None, ): - """ - :param client: The client associated with the transfer manager - - :type config: s3transfer.manager.TransferConfig - :param config: The transfer config associated with the transfer - manager - - :type osutil: s3transfer.utils.OSUtil - :param osutil: The os utility associated to the transfer manager - - :type request_executor: s3transfer.futures.BoundedExecutor - :param request_executor: The request executor associated with the - transfer manager - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future associated with the - transfer request that tasks are being submitted for - """ - upload_input_manager = self._get_upload_input_manager_cls( + """ + :param client: The client associated with the transfer manager + + :type config: s3transfer.manager.TransferConfig + :param config: The transfer config associated with the transfer + manager + + :type osutil: s3transfer.utils.OSUtil + :param osutil: The os utility associated to the transfer manager + + :type request_executor: s3transfer.futures.BoundedExecutor + :param request_executor: The request executor associated with the + transfer manager + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future associated with the + transfer request that tasks are being submitted for + """ + upload_input_manager = self._get_upload_input_manager_cls( transfer_future )(osutil, self._transfer_coordinator, bandwidth_limiter) - - # Determine the size if it was not provided - if transfer_future.meta.size is None: - upload_input_manager.provide_transfer_size(transfer_future) - - # Do a multipart upload if needed, otherwise do a regular put object. - if not upload_input_manager.requires_multipart_upload( + + # Determine the size if it was not provided + if transfer_future.meta.size is None: + upload_input_manager.provide_transfer_size(transfer_future) + + # Do a multipart upload if needed, otherwise do a regular put object. + if not upload_input_manager.requires_multipart_upload( transfer_future, config ): - self._submit_upload_request( + self._submit_upload_request( client, config, osutil, @@ -595,8 +595,8 @@ class UploadSubmissionTask(SubmissionTask): transfer_future, upload_input_manager, ) - else: - self._submit_multipart_request( + else: + self._submit_multipart_request( client, config, osutil, @@ -604,7 +604,7 @@ class UploadSubmissionTask(SubmissionTask): transfer_future, upload_input_manager, ) - + def _submit_upload_request( self, client, @@ -614,32 +614,32 @@ class UploadSubmissionTask(SubmissionTask): transfer_future, upload_input_manager, ): - call_args = transfer_future.meta.call_args - - # Get any tags that need to be associated to the put object task - put_object_tag = self._get_upload_task_tag( + call_args = transfer_future.meta.call_args + + # Get any tags that need to be associated to the put object task + put_object_tag = self._get_upload_task_tag( upload_input_manager, 'put_object' ) - - # Submit the request of a single upload. - self._transfer_coordinator.submit( - request_executor, - PutObjectTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'fileobj': upload_input_manager.get_put_object_body( + + # Submit the request of a single upload. + self._transfer_coordinator.submit( + request_executor, + PutObjectTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'fileobj': upload_input_manager.get_put_object_body( transfer_future ), - 'bucket': call_args.bucket, - 'key': call_args.key, + 'bucket': call_args.bucket, + 'key': call_args.key, 'extra_args': call_args.extra_args, - }, + }, is_final=True, - ), + ), tag=put_object_tag, - ) - + ) + def _submit_multipart_request( self, client, @@ -649,141 +649,141 @@ class UploadSubmissionTask(SubmissionTask): transfer_future, upload_input_manager, ): - call_args = transfer_future.meta.call_args - - # Submit the request to create a multipart upload. - create_multipart_future = self._transfer_coordinator.submit( - request_executor, - CreateMultipartUploadTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': call_args.extra_args, + call_args = transfer_future.meta.call_args + + # Submit the request to create a multipart upload. + create_multipart_future = self._transfer_coordinator.submit( + request_executor, + CreateMultipartUploadTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'extra_args': call_args.extra_args, }, ), - ) - - # Submit requests to upload the parts of the file. - part_futures = [] - extra_part_args = self._extra_upload_part_args(call_args.extra_args) - - # Get any tags that need to be associated to the submitted task - # for upload the data - upload_part_tag = self._get_upload_task_tag( + ) + + # Submit requests to upload the parts of the file. + part_futures = [] + extra_part_args = self._extra_upload_part_args(call_args.extra_args) + + # Get any tags that need to be associated to the submitted task + # for upload the data + upload_part_tag = self._get_upload_task_tag( upload_input_manager, 'upload_part' ) - - size = transfer_future.meta.size - adjuster = ChunksizeAdjuster() - chunksize = adjuster.adjust_chunksize(config.multipart_chunksize, size) - part_iterator = upload_input_manager.yield_upload_part_bodies( + + size = transfer_future.meta.size + adjuster = ChunksizeAdjuster() + chunksize = adjuster.adjust_chunksize(config.multipart_chunksize, size) + part_iterator = upload_input_manager.yield_upload_part_bodies( transfer_future, chunksize ) - - for part_number, fileobj in part_iterator: - part_futures.append( - self._transfer_coordinator.submit( - request_executor, - UploadPartTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'fileobj': fileobj, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'part_number': part_number, + + for part_number, fileobj in part_iterator: + part_futures.append( + self._transfer_coordinator.submit( + request_executor, + UploadPartTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'fileobj': fileobj, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'part_number': part_number, 'extra_args': extra_part_args, - }, - pending_main_kwargs={ - 'upload_id': create_multipart_future }, - ), + pending_main_kwargs={ + 'upload_id': create_multipart_future + }, + ), tag=upload_part_tag, - ) - ) - - complete_multipart_extra_args = self._extra_complete_multipart_args( + ) + ) + + complete_multipart_extra_args = self._extra_complete_multipart_args( call_args.extra_args ) - # Submit the request to complete the multipart upload. - self._transfer_coordinator.submit( - request_executor, - CompleteMultipartUploadTask( - transfer_coordinator=self._transfer_coordinator, - main_kwargs={ - 'client': client, - 'bucket': call_args.bucket, - 'key': call_args.key, - 'extra_args': complete_multipart_extra_args, - }, - pending_main_kwargs={ - 'upload_id': create_multipart_future, + # Submit the request to complete the multipart upload. + self._transfer_coordinator.submit( + request_executor, + CompleteMultipartUploadTask( + transfer_coordinator=self._transfer_coordinator, + main_kwargs={ + 'client': client, + 'bucket': call_args.bucket, + 'key': call_args.key, + 'extra_args': complete_multipart_extra_args, + }, + pending_main_kwargs={ + 'upload_id': create_multipart_future, 'parts': part_futures, - }, + }, is_final=True, ), - ) - - def _extra_upload_part_args(self, extra_args): - # Only the args in UPLOAD_PART_ARGS actually need to be passed - # onto the upload_part calls. - return get_filtered_dict(extra_args, self.UPLOAD_PART_ARGS) - - def _extra_complete_multipart_args(self, extra_args): - return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS) - - def _get_upload_task_tag(self, upload_input_manager, operation_name): - tag = None - if upload_input_manager.stores_body_in_memory(operation_name): - tag = IN_MEMORY_UPLOAD_TAG - return tag - - -class PutObjectTask(Task): - """Task to do a nonmultipart upload""" - - def _main(self, client, fileobj, bucket, key, extra_args): - """ - :param client: The client to use when calling PutObject - :param fileobj: The file to upload. - :param bucket: The name of the bucket to upload to - :param key: The name of the key to upload to - :param extra_args: A dictionary of any extra arguments that may be - used in the upload. - """ - with fileobj as body: - client.put_object(Bucket=bucket, Key=key, Body=body, **extra_args) - - -class UploadPartTask(Task): - """Task to upload a part in a multipart upload""" + ) + + def _extra_upload_part_args(self, extra_args): + # Only the args in UPLOAD_PART_ARGS actually need to be passed + # onto the upload_part calls. + return get_filtered_dict(extra_args, self.UPLOAD_PART_ARGS) + + def _extra_complete_multipart_args(self, extra_args): + return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS) + + def _get_upload_task_tag(self, upload_input_manager, operation_name): + tag = None + if upload_input_manager.stores_body_in_memory(operation_name): + tag = IN_MEMORY_UPLOAD_TAG + return tag + + +class PutObjectTask(Task): + """Task to do a nonmultipart upload""" + + def _main(self, client, fileobj, bucket, key, extra_args): + """ + :param client: The client to use when calling PutObject + :param fileobj: The file to upload. + :param bucket: The name of the bucket to upload to + :param key: The name of the key to upload to + :param extra_args: A dictionary of any extra arguments that may be + used in the upload. + """ + with fileobj as body: + client.put_object(Bucket=bucket, Key=key, Body=body, **extra_args) + + +class UploadPartTask(Task): + """Task to upload a part in a multipart upload""" def _main( self, client, fileobj, bucket, key, upload_id, part_number, extra_args ): - """ - :param client: The client to use when calling PutObject - :param fileobj: The file to upload. - :param bucket: The name of the bucket to upload to - :param key: The name of the key to upload to - :param upload_id: The id of the upload - :param part_number: The number representing the part of the multipart - upload - :param extra_args: A dictionary of any extra arguments that may be - used in the upload. - - :rtype: dict - :returns: A dictionary representing a part:: - - {'Etag': etag_value, 'PartNumber': part_number} - - This value can be appended to a list to be used to complete - the multipart upload. - """ - with fileobj as body: - response = client.upload_part( + """ + :param client: The client to use when calling PutObject + :param fileobj: The file to upload. + :param bucket: The name of the bucket to upload to + :param key: The name of the key to upload to + :param upload_id: The id of the upload + :param part_number: The number representing the part of the multipart + upload + :param extra_args: A dictionary of any extra arguments that may be + used in the upload. + + :rtype: dict + :returns: A dictionary representing a part:: + + {'Etag': etag_value, 'PartNumber': part_number} + + This value can be appended to a list to be used to complete + the multipart upload. + """ + with fileobj as body: + response = client.upload_part( Bucket=bucket, Key=key, UploadId=upload_id, @@ -791,5 +791,5 @@ class UploadPartTask(Task): Body=body, **extra_args ) - etag = response['ETag'] - return {'ETag': etag, 'PartNumber': part_number} + etag = response['ETag'] + return {'ETag': etag, 'PartNumber': part_number} diff --git a/contrib/python/s3transfer/py3/s3transfer/utils.py b/contrib/python/s3transfer/py3/s3transfer/utils.py index 6901f9fcbd..ba881c67dd 100644 --- a/contrib/python/s3transfer/py3/s3transfer/utils.py +++ b/contrib/python/s3transfer/py3/s3transfer/utils.py @@ -1,39 +1,39 @@ -# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). You -# may not use this file except in compliance with the License. A copy of -# the License is located at -# -# http://aws.amazon.com/apache2.0/ -# -# or in the "license" file accompanying this file. This file is -# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific -# language governing permissions and limitations under the License. -import functools +# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +import functools import logging -import math -import os +import math +import os import random import socket -import stat -import string -import threading -from collections import defaultdict - +import stat +import string +import threading +from collections import defaultdict + from botocore.exceptions import IncompleteReadError, ReadTimeoutError from s3transfer.compat import SOCKET_ERROR, fallocate, rename_file - -MAX_PARTS = 10000 -# The maximum file size you can upload via S3 per request. -# See: http://docs.aws.amazon.com/AmazonS3/latest/dev/UploadingObjects.html -# and: http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html -MAX_SINGLE_UPLOAD_SIZE = 5 * (1024 ** 3) -MIN_UPLOAD_CHUNKSIZE = 5 * (1024 ** 2) -logger = logging.getLogger(__name__) - - + +MAX_PARTS = 10000 +# The maximum file size you can upload via S3 per request. +# See: http://docs.aws.amazon.com/AmazonS3/latest/dev/UploadingObjects.html +# and: http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html +MAX_SINGLE_UPLOAD_SIZE = 5 * (1024 ** 3) +MIN_UPLOAD_CHUNKSIZE = 5 * (1024 ** 2) +logger = logging.getLogger(__name__) + + S3_RETRYABLE_DOWNLOAD_ERRORS = ( socket.timeout, SOCKET_ERROR, @@ -42,24 +42,24 @@ S3_RETRYABLE_DOWNLOAD_ERRORS = ( ) -def random_file_extension(num_digits=8): - return ''.join(random.choice(string.hexdigits) for _ in range(num_digits)) - - -def signal_not_transferring(request, operation_name, **kwargs): +def random_file_extension(num_digits=8): + return ''.join(random.choice(string.hexdigits) for _ in range(num_digits)) + + +def signal_not_transferring(request, operation_name, **kwargs): if operation_name in ['PutObject', 'UploadPart'] and hasattr( request.body, 'signal_not_transferring' ): - request.body.signal_not_transferring() - - -def signal_transferring(request, operation_name, **kwargs): + request.body.signal_not_transferring() + + +def signal_transferring(request, operation_name, **kwargs): if operation_name in ['PutObject', 'UploadPart'] and hasattr( request.body, 'signal_transferring' ): - request.body.signal_transferring() - - + request.body.signal_transferring() + + def calculate_num_parts(size, part_size): return int(math.ceil(size / float(part_size))) @@ -67,190 +67,190 @@ def calculate_num_parts(size, part_size): def calculate_range_parameter( part_size, part_index, num_parts, total_size=None ): - """Calculate the range parameter for multipart downloads/copies - - :type part_size: int - :param part_size: The size of the part - - :type part_index: int - :param part_index: The index for which this parts starts. This index starts - at zero - - :type num_parts: int - :param num_parts: The total number of parts in the transfer - - :returns: The value to use for Range parameter on downloads or - the CopySourceRange parameter for copies - """ - # Used to calculate the Range parameter - start_range = part_index * part_size - if part_index == num_parts - 1: - end_range = '' - if total_size is not None: - end_range = str(total_size - 1) - else: - end_range = start_range + part_size - 1 + """Calculate the range parameter for multipart downloads/copies + + :type part_size: int + :param part_size: The size of the part + + :type part_index: int + :param part_index: The index for which this parts starts. This index starts + at zero + + :type num_parts: int + :param num_parts: The total number of parts in the transfer + + :returns: The value to use for Range parameter on downloads or + the CopySourceRange parameter for copies + """ + # Used to calculate the Range parameter + start_range = part_index * part_size + if part_index == num_parts - 1: + end_range = '' + if total_size is not None: + end_range = str(total_size - 1) + else: + end_range = start_range + part_size - 1 range_param = f'bytes={start_range}-{end_range}' - return range_param - - -def get_callbacks(transfer_future, callback_type): - """Retrieves callbacks from a subscriber - - :type transfer_future: s3transfer.futures.TransferFuture - :param transfer_future: The transfer future the subscriber is associated - to. - - :type callback_type: str - :param callback_type: The type of callback to retrieve from the subscriber. - Valid types include: - * 'queued' - * 'progress' - * 'done' - - :returns: A list of callbacks for the type specified. All callbacks are - preinjected with the transfer future. - """ - callbacks = [] - for subscriber in transfer_future.meta.call_args.subscribers: - callback_name = 'on_' + callback_type - if hasattr(subscriber, callback_name): - callbacks.append( - functools.partial( + return range_param + + +def get_callbacks(transfer_future, callback_type): + """Retrieves callbacks from a subscriber + + :type transfer_future: s3transfer.futures.TransferFuture + :param transfer_future: The transfer future the subscriber is associated + to. + + :type callback_type: str + :param callback_type: The type of callback to retrieve from the subscriber. + Valid types include: + * 'queued' + * 'progress' + * 'done' + + :returns: A list of callbacks for the type specified. All callbacks are + preinjected with the transfer future. + """ + callbacks = [] + for subscriber in transfer_future.meta.call_args.subscribers: + callback_name = 'on_' + callback_type + if hasattr(subscriber, callback_name): + callbacks.append( + functools.partial( getattr(subscriber, callback_name), future=transfer_future - ) - ) - return callbacks - - -def invoke_progress_callbacks(callbacks, bytes_transferred): - """Calls all progress callbacks - - :param callbacks: A list of progress callbacks to invoke - :param bytes_transferred: The number of bytes transferred. This is passed - to the callbacks. If no bytes were transferred the callbacks will not - be invoked because no progress was achieved. It is also possible - to receive a negative amount which comes from retrying a transfer - request. - """ - # Only invoke the callbacks if bytes were actually transferred. - if bytes_transferred: - for callback in callbacks: - callback(bytes_transferred=bytes_transferred) - - -def get_filtered_dict(original_dict, whitelisted_keys): - """Gets a dictionary filtered by whitelisted keys - - :param original_dict: The original dictionary of arguments to source keys - and values. - :param whitelisted_key: A list of keys to include in the filtered - dictionary. - - :returns: A dictionary containing key/values from the original dictionary - whose key was included in the whitelist - """ - filtered_dict = {} - for key, value in original_dict.items(): - if key in whitelisted_keys: - filtered_dict[key] = value - return filtered_dict - - + ) + ) + return callbacks + + +def invoke_progress_callbacks(callbacks, bytes_transferred): + """Calls all progress callbacks + + :param callbacks: A list of progress callbacks to invoke + :param bytes_transferred: The number of bytes transferred. This is passed + to the callbacks. If no bytes were transferred the callbacks will not + be invoked because no progress was achieved. It is also possible + to receive a negative amount which comes from retrying a transfer + request. + """ + # Only invoke the callbacks if bytes were actually transferred. + if bytes_transferred: + for callback in callbacks: + callback(bytes_transferred=bytes_transferred) + + +def get_filtered_dict(original_dict, whitelisted_keys): + """Gets a dictionary filtered by whitelisted keys + + :param original_dict: The original dictionary of arguments to source keys + and values. + :param whitelisted_key: A list of keys to include in the filtered + dictionary. + + :returns: A dictionary containing key/values from the original dictionary + whose key was included in the whitelist + """ + filtered_dict = {} + for key, value in original_dict.items(): + if key in whitelisted_keys: + filtered_dict[key] = value + return filtered_dict + + class CallArgs: - def __init__(self, **kwargs): - """A class that records call arguments - - The call arguments must be passed as keyword arguments. It will set - each keyword argument as an attribute of the object along with its - associated value. - """ - for arg, value in kwargs.items(): - setattr(self, arg, value) - - + def __init__(self, **kwargs): + """A class that records call arguments + + The call arguments must be passed as keyword arguments. It will set + each keyword argument as an attribute of the object along with its + associated value. + """ + for arg, value in kwargs.items(): + setattr(self, arg, value) + + class FunctionContainer: - """An object that contains a function and any args or kwargs to call it - - When called the provided function will be called with provided args - and kwargs. - """ - - def __init__(self, func, *args, **kwargs): - self._func = func - self._args = args - self._kwargs = kwargs - - def __repr__(self): + """An object that contains a function and any args or kwargs to call it + + When called the provided function will be called with provided args + and kwargs. + """ + + def __init__(self, func, *args, **kwargs): + self._func = func + self._args = args + self._kwargs = kwargs + + def __repr__(self): return 'Function: {} with args {} and kwargs {}'.format( self._func, self._args, self._kwargs ) - - def __call__(self): - return self._func(*self._args, **self._kwargs) - - + + def __call__(self): + return self._func(*self._args, **self._kwargs) + + class CountCallbackInvoker: - """An abstraction to invoke a callback when a shared count reaches zero - - :param callback: Callback invoke when finalized count reaches zero - """ - - def __init__(self, callback): - self._lock = threading.Lock() - self._callback = callback - self._count = 0 - self._is_finalized = False - - @property - def current_count(self): - with self._lock: - return self._count - - def increment(self): - """Increment the count by one""" - with self._lock: - if self._is_finalized: - raise RuntimeError( - 'Counter has been finalized it can no longer be ' - 'incremented.' - ) - self._count += 1 - - def decrement(self): - """Decrement the count by one""" - with self._lock: - if self._count == 0: - raise RuntimeError( + """An abstraction to invoke a callback when a shared count reaches zero + + :param callback: Callback invoke when finalized count reaches zero + """ + + def __init__(self, callback): + self._lock = threading.Lock() + self._callback = callback + self._count = 0 + self._is_finalized = False + + @property + def current_count(self): + with self._lock: + return self._count + + def increment(self): + """Increment the count by one""" + with self._lock: + if self._is_finalized: + raise RuntimeError( + 'Counter has been finalized it can no longer be ' + 'incremented.' + ) + self._count += 1 + + def decrement(self): + """Decrement the count by one""" + with self._lock: + if self._count == 0: + raise RuntimeError( 'Counter is at zero. It cannot dip below zero' ) - self._count -= 1 - if self._is_finalized and self._count == 0: - self._callback() - - def finalize(self): - """Finalize the counter - - Once finalized, the counter never be incremented and the callback - can be invoked once the count reaches zero - """ - with self._lock: - self._is_finalized = True - if self._count == 0: - self._callback() - - + self._count -= 1 + if self._is_finalized and self._count == 0: + self._callback() + + def finalize(self): + """Finalize the counter + + Once finalized, the counter never be incremented and the callback + can be invoked once the count reaches zero + """ + with self._lock: + self._is_finalized = True + if self._count == 0: + self._callback() + + class OSUtils: _MAX_FILENAME_LEN = 255 - def get_file_size(self, filename): - return os.path.getsize(filename) - - def open_file_chunk_reader(self, filename, start_byte, size, callbacks): + def get_file_size(self, filename): + return os.path.getsize(filename) + + def open_file_chunk_reader(self, filename, start_byte, size, callbacks): return ReadFileChunk.from_filename( filename, start_byte, size, callbacks, enable_callbacks=False ) - + def open_file_chunk_reader_from_fileobj( self, fileobj, @@ -259,7 +259,7 @@ class OSUtils: callbacks, close_callbacks=None, ): - return ReadFileChunk( + return ReadFileChunk( fileobj, chunk_size, full_file_size, @@ -267,58 +267,58 @@ class OSUtils: enable_callbacks=False, close_callbacks=close_callbacks, ) - - def open(self, filename, mode): - return open(filename, mode) - - def remove_file(self, filename): - """Remove a file, noop if file does not exist.""" - # Unlike os.remove, if the file does not exist, - # then this method does nothing. - try: - os.remove(filename) - except OSError: - pass - - def rename_file(self, current_filename, new_filename): - rename_file(current_filename, new_filename) - - def is_special_file(cls, filename): - """Checks to see if a file is a special UNIX file. - - It checks if the file is a character special device, block special - device, FIFO, or socket. - - :param filename: Name of the file - - :returns: True if the file is a special file. False, if is not. - """ - # If it does not exist, it must be a new file so it cannot be - # a special file. - if not os.path.exists(filename): - return False - mode = os.stat(filename).st_mode - # Character special device. - if stat.S_ISCHR(mode): - return True - # Block special device - if stat.S_ISBLK(mode): - return True - # Named pipe / FIFO - if stat.S_ISFIFO(mode): - return True - # Socket. - if stat.S_ISSOCK(mode): - return True - return False - + + def open(self, filename, mode): + return open(filename, mode) + + def remove_file(self, filename): + """Remove a file, noop if file does not exist.""" + # Unlike os.remove, if the file does not exist, + # then this method does nothing. + try: + os.remove(filename) + except OSError: + pass + + def rename_file(self, current_filename, new_filename): + rename_file(current_filename, new_filename) + + def is_special_file(cls, filename): + """Checks to see if a file is a special UNIX file. + + It checks if the file is a character special device, block special + device, FIFO, or socket. + + :param filename: Name of the file + + :returns: True if the file is a special file. False, if is not. + """ + # If it does not exist, it must be a new file so it cannot be + # a special file. + if not os.path.exists(filename): + return False + mode = os.stat(filename).st_mode + # Character special device. + if stat.S_ISCHR(mode): + return True + # Block special device + if stat.S_ISBLK(mode): + return True + # Named pipe / FIFO + if stat.S_ISFIFO(mode): + return True + # Socket. + if stat.S_ISSOCK(mode): + return True + return False + def get_temp_filename(self, filename): suffix = os.extsep + random_file_extension() path = os.path.dirname(filename) name = os.path.basename(filename) temp_filename = name[: self._MAX_FILENAME_LEN - len(suffix)] + suffix return os.path.join(path, temp_filename) - + def allocate(self, filename, size): try: with self.open(filename, 'wb') as f: @@ -329,72 +329,72 @@ class OSUtils: class DeferredOpenFile: - def __init__(self, filename, start_byte=0, mode='rb', open_function=open): - """A class that defers the opening of a file till needed - - This is useful for deferring opening of a file till it is needed - in a separate thread, as there is a limit of how many open files - there can be in a single thread for most operating systems. The - file gets opened in the following methods: ``read()``, ``seek()``, - and ``__enter__()`` - - :type filename: str - :param filename: The name of the file to open - - :type start_byte: int - :param start_byte: The byte to seek to when the file is opened. - - :type mode: str - :param mode: The mode to use to open the file - - :type open_function: function - :param open_function: The function to use to open the file - """ - self._filename = filename - self._fileobj = None - self._start_byte = start_byte - self._mode = mode - self._open_function = open_function - - def _open_if_needed(self): - if self._fileobj is None: - self._fileobj = self._open_function(self._filename, self._mode) - if self._start_byte != 0: - self._fileobj.seek(self._start_byte) - - @property - def name(self): - return self._filename - - def read(self, amount=None): - self._open_if_needed() - return self._fileobj.read(amount) - - def write(self, data): - self._open_if_needed() - self._fileobj.write(data) - + def __init__(self, filename, start_byte=0, mode='rb', open_function=open): + """A class that defers the opening of a file till needed + + This is useful for deferring opening of a file till it is needed + in a separate thread, as there is a limit of how many open files + there can be in a single thread for most operating systems. The + file gets opened in the following methods: ``read()``, ``seek()``, + and ``__enter__()`` + + :type filename: str + :param filename: The name of the file to open + + :type start_byte: int + :param start_byte: The byte to seek to when the file is opened. + + :type mode: str + :param mode: The mode to use to open the file + + :type open_function: function + :param open_function: The function to use to open the file + """ + self._filename = filename + self._fileobj = None + self._start_byte = start_byte + self._mode = mode + self._open_function = open_function + + def _open_if_needed(self): + if self._fileobj is None: + self._fileobj = self._open_function(self._filename, self._mode) + if self._start_byte != 0: + self._fileobj.seek(self._start_byte) + + @property + def name(self): + return self._filename + + def read(self, amount=None): + self._open_if_needed() + return self._fileobj.read(amount) + + def write(self, data): + self._open_if_needed() + self._fileobj.write(data) + def seek(self, where, whence=0): - self._open_if_needed() + self._open_if_needed() self._fileobj.seek(where, whence) - - def tell(self): - if self._fileobj is None: - return self._start_byte - return self._fileobj.tell() - - def close(self): - if self._fileobj: - self._fileobj.close() - - def __enter__(self): - self._open_if_needed() - return self - - def __exit__(self, *args, **kwargs): - self.close() - - + + def tell(self): + if self._fileobj is None: + return self._start_byte + return self._fileobj.tell() + + def close(self): + if self._fileobj: + self._fileobj.close() + + def __enter__(self): + self._open_if_needed() + return self + + def __exit__(self, *args, **kwargs): + self.close() + + class ReadFileChunk: def __init__( self, @@ -405,42 +405,42 @@ class ReadFileChunk: enable_callbacks=True, close_callbacks=None, ): - """ - - Given a file object shown below:: - - |___________________________________________________| - 0 | | full_file_size - |----chunk_size---| - f.tell() - - :type fileobj: file - :param fileobj: File like object - - :type chunk_size: int - :param chunk_size: The max chunk size to read. Trying to read - pass the end of the chunk size will behave like you've - reached the end of the file. - - :type full_file_size: int - :param full_file_size: The entire content length associated - with ``fileobj``. - - :type callbacks: A list of function(amount_read) - :param callbacks: Called whenever data is read from this object in the - order provided. - - :type enable_callbacks: boolean - :param enable_callbacks: True if to run callbacks. Otherwise, do not - run callbacks - - :type close_callbacks: A list of function() - :param close_callbacks: Called when close is called. The function - should take no arguments. - """ - self._fileobj = fileobj - self._start_byte = self._fileobj.tell() - self._size = self._calculate_file_size( + """ + + Given a file object shown below:: + + |___________________________________________________| + 0 | | full_file_size + |----chunk_size---| + f.tell() + + :type fileobj: file + :param fileobj: File like object + + :type chunk_size: int + :param chunk_size: The max chunk size to read. Trying to read + pass the end of the chunk size will behave like you've + reached the end of the file. + + :type full_file_size: int + :param full_file_size: The entire content length associated + with ``fileobj``. + + :type callbacks: A list of function(amount_read) + :param callbacks: Called whenever data is read from this object in the + order provided. + + :type enable_callbacks: boolean + :param enable_callbacks: True if to run callbacks. Otherwise, do not + run callbacks + + :type close_callbacks: A list of function() + :param close_callbacks: Called when close is called. The function + should take no arguments. + """ + self._fileobj = fileobj + self._start_byte = self._fileobj.tell() + self._size = self._calculate_file_size( self._fileobj, requested_size=chunk_size, start_byte=self._start_byte, @@ -448,16 +448,16 @@ class ReadFileChunk: ) # _amount_read represents the position in the chunk and may exceed # the chunk size, but won't allow reads out of bounds. - self._amount_read = 0 - self._callbacks = callbacks - if callbacks is None: - self._callbacks = [] - self._callbacks_enabled = enable_callbacks - self._close_callbacks = close_callbacks - if close_callbacks is None: - self._close_callbacks = close_callbacks - - @classmethod + self._amount_read = 0 + self._callbacks = callbacks + if callbacks is None: + self._callbacks = [] + self._callbacks_enabled = enable_callbacks + self._close_callbacks = close_callbacks + if close_callbacks is None: + self._close_callbacks = close_callbacks + + @classmethod def from_filename( cls, filename, @@ -466,70 +466,70 @@ class ReadFileChunk: callbacks=None, enable_callbacks=True, ): - """Convenience factory function to create from a filename. - - :type start_byte: int - :param start_byte: The first byte from which to start reading. - - :type chunk_size: int - :param chunk_size: The max chunk size to read. Trying to read - pass the end of the chunk size will behave like you've - reached the end of the file. - - :type full_file_size: int - :param full_file_size: The entire content length associated - with ``fileobj``. - - :type callbacks: function(amount_read) - :param callbacks: Called whenever data is read from this object. - - :type enable_callbacks: bool - :param enable_callbacks: Indicate whether to invoke callback - during read() calls. - - :rtype: ``ReadFileChunk`` - :return: A new instance of ``ReadFileChunk`` - - """ - f = open(filename, 'rb') - f.seek(start_byte) - file_size = os.fstat(f.fileno()).st_size - return cls(f, chunk_size, file_size, callbacks, enable_callbacks) - + """Convenience factory function to create from a filename. + + :type start_byte: int + :param start_byte: The first byte from which to start reading. + + :type chunk_size: int + :param chunk_size: The max chunk size to read. Trying to read + pass the end of the chunk size will behave like you've + reached the end of the file. + + :type full_file_size: int + :param full_file_size: The entire content length associated + with ``fileobj``. + + :type callbacks: function(amount_read) + :param callbacks: Called whenever data is read from this object. + + :type enable_callbacks: bool + :param enable_callbacks: Indicate whether to invoke callback + during read() calls. + + :rtype: ``ReadFileChunk`` + :return: A new instance of ``ReadFileChunk`` + + """ + f = open(filename, 'rb') + f.seek(start_byte) + file_size = os.fstat(f.fileno()).st_size + return cls(f, chunk_size, file_size, callbacks, enable_callbacks) + def _calculate_file_size( self, fileobj, requested_size, start_byte, actual_file_size ): - max_chunk_size = actual_file_size - start_byte - return min(max_chunk_size, requested_size) - - def read(self, amount=None): + max_chunk_size = actual_file_size - start_byte + return min(max_chunk_size, requested_size) + + def read(self, amount=None): amount_left = max(self._size - self._amount_read, 0) - if amount is None: + if amount is None: amount_to_read = amount_left - else: + else: amount_to_read = min(amount_left, amount) - data = self._fileobj.read(amount_to_read) - self._amount_read += len(data) - if self._callbacks is not None and self._callbacks_enabled: - invoke_progress_callbacks(self._callbacks, len(data)) - return data - - def signal_transferring(self): - self.enable_callback() - if hasattr(self._fileobj, 'signal_transferring'): - self._fileobj.signal_transferring() - - def signal_not_transferring(self): - self.disable_callback() - if hasattr(self._fileobj, 'signal_not_transferring'): - self._fileobj.signal_not_transferring() - - def enable_callback(self): - self._callbacks_enabled = True - - def disable_callback(self): - self._callbacks_enabled = False - + data = self._fileobj.read(amount_to_read) + self._amount_read += len(data) + if self._callbacks is not None and self._callbacks_enabled: + invoke_progress_callbacks(self._callbacks, len(data)) + return data + + def signal_transferring(self): + self.enable_callback() + if hasattr(self._fileobj, 'signal_transferring'): + self._fileobj.signal_transferring() + + def signal_not_transferring(self): + self.disable_callback() + if hasattr(self._fileobj, 'signal_not_transferring'): + self._fileobj.signal_not_transferring() + + def enable_callback(self): + self._callbacks_enabled = True + + def disable_callback(self): + self._callbacks_enabled = False + def seek(self, where, whence=0): if whence not in (0, 1, 2): # Mimic io's error for invalid whence values @@ -544,202 +544,202 @@ class ReadFileChunk: where += self._size self._fileobj.seek(max(where, self._start_byte)) - if self._callbacks is not None and self._callbacks_enabled: - # To also rewind the callback() for an accurate progress report + if self._callbacks is not None and self._callbacks_enabled: + # To also rewind the callback() for an accurate progress report bounded_where = max(min(where - self._start_byte, self._size), 0) bounded_amount_read = min(self._amount_read, self._size) amount = bounded_where - bounded_amount_read - invoke_progress_callbacks( + invoke_progress_callbacks( self._callbacks, bytes_transferred=amount ) self._amount_read = max(where - self._start_byte, 0) - - def close(self): - if self._close_callbacks is not None and self._callbacks_enabled: - for callback in self._close_callbacks: - callback() - self._fileobj.close() - - def tell(self): - return self._amount_read - - def __len__(self): - # __len__ is defined because requests will try to determine the length - # of the stream to set a content length. In the normal case - # of the file it will just stat the file, but we need to change that - # behavior. By providing a __len__, requests will use that instead - # of stat'ing the file. - return self._size - - def __enter__(self): - return self - - def __exit__(self, *args, **kwargs): - self.close() - - def __iter__(self): - # This is a workaround for http://bugs.python.org/issue17575 - # Basically httplib will try to iterate over the contents, even - # if its a file like object. This wasn't noticed because we've - # already exhausted the stream so iterating over the file immediately - # stops, which is what we're simulating here. - return iter([]) - - + + def close(self): + if self._close_callbacks is not None and self._callbacks_enabled: + for callback in self._close_callbacks: + callback() + self._fileobj.close() + + def tell(self): + return self._amount_read + + def __len__(self): + # __len__ is defined because requests will try to determine the length + # of the stream to set a content length. In the normal case + # of the file it will just stat the file, but we need to change that + # behavior. By providing a __len__, requests will use that instead + # of stat'ing the file. + return self._size + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + self.close() + + def __iter__(self): + # This is a workaround for http://bugs.python.org/issue17575 + # Basically httplib will try to iterate over the contents, even + # if its a file like object. This wasn't noticed because we've + # already exhausted the stream so iterating over the file immediately + # stops, which is what we're simulating here. + return iter([]) + + class StreamReaderProgress: - """Wrapper for a read only stream that adds progress callbacks.""" - - def __init__(self, stream, callbacks=None): - self._stream = stream - self._callbacks = callbacks - if callbacks is None: - self._callbacks = [] - - def read(self, *args, **kwargs): - value = self._stream.read(*args, **kwargs) - invoke_progress_callbacks(self._callbacks, len(value)) - return value - - -class NoResourcesAvailable(Exception): - pass - - + """Wrapper for a read only stream that adds progress callbacks.""" + + def __init__(self, stream, callbacks=None): + self._stream = stream + self._callbacks = callbacks + if callbacks is None: + self._callbacks = [] + + def read(self, *args, **kwargs): + value = self._stream.read(*args, **kwargs) + invoke_progress_callbacks(self._callbacks, len(value)) + return value + + +class NoResourcesAvailable(Exception): + pass + + class TaskSemaphore: - def __init__(self, count): - """A semaphore for the purpose of limiting the number of tasks - - :param count: The size of semaphore - """ - self._semaphore = threading.Semaphore(count) - - def acquire(self, tag, blocking=True): - """Acquire the semaphore - - :param tag: A tag identifying what is acquiring the semaphore. Note - that this is not really needed to directly use this class but is - needed for API compatibility with the SlidingWindowSemaphore - implementation. - :param block: If True, block until it can be acquired. If False, + def __init__(self, count): + """A semaphore for the purpose of limiting the number of tasks + + :param count: The size of semaphore + """ + self._semaphore = threading.Semaphore(count) + + def acquire(self, tag, blocking=True): + """Acquire the semaphore + + :param tag: A tag identifying what is acquiring the semaphore. Note + that this is not really needed to directly use this class but is + needed for API compatibility with the SlidingWindowSemaphore + implementation. + :param block: If True, block until it can be acquired. If False, do not block and raise an exception if cannot be acquired. - - :returns: A token (can be None) to use when releasing the semaphore - """ - logger.debug("Acquiring %s", tag) - if not self._semaphore.acquire(blocking): - raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag) - - def release(self, tag, acquire_token): - """Release the semaphore - - :param tag: A tag identifying what is releasing the semaphore - :param acquire_token: The token returned from when the semaphore was - acquired. Note that this is not really needed to directly use this - class but is needed for API compatibility with the - SlidingWindowSemaphore implementation. - """ + + :returns: A token (can be None) to use when releasing the semaphore + """ + logger.debug("Acquiring %s", tag) + if not self._semaphore.acquire(blocking): + raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag) + + def release(self, tag, acquire_token): + """Release the semaphore + + :param tag: A tag identifying what is releasing the semaphore + :param acquire_token: The token returned from when the semaphore was + acquired. Note that this is not really needed to directly use this + class but is needed for API compatibility with the + SlidingWindowSemaphore implementation. + """ logger.debug(f"Releasing acquire {tag}/{acquire_token}") - self._semaphore.release() - - -class SlidingWindowSemaphore(TaskSemaphore): - """A semaphore used to coordinate sequential resource access. - - This class is similar to the stdlib BoundedSemaphore: - - * It's initialized with a count. - * Each call to ``acquire()`` decrements the counter. - * If the count is at zero, then ``acquire()`` will either block until the - count increases, or if ``blocking=False``, then it will raise - a NoResourcesAvailable exception indicating that it failed to acquire the - semaphore. - - The main difference is that this semaphore is used to limit - access to a resource that requires sequential access. For example, - if I want to access resource R that has 20 subresources R_0 - R_19, - this semaphore can also enforce that you only have a max range of - 10 at any given point in time. You must also specify a tag name - when you acquire the semaphore. The sliding window semantics apply - on a per tag basis. The internal count will only be incremented - when the minimum sequence number for a tag is released. - - """ - - def __init__(self, count): - self._count = count - # Dict[tag, next_sequence_number]. - self._tag_sequences = defaultdict(int) - self._lowest_sequence = {} - self._lock = threading.Lock() - self._condition = threading.Condition(self._lock) - # Dict[tag, List[sequence_number]] - self._pending_release = {} - - def current_count(self): - with self._lock: - return self._count - - def acquire(self, tag, blocking=True): - logger.debug("Acquiring %s", tag) - self._condition.acquire() - try: - if self._count == 0: - if not blocking: - raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag) - else: - while self._count == 0: - self._condition.wait() - # self._count is no longer zero. - # First, check if this is the first time we're seeing this tag. - sequence_number = self._tag_sequences[tag] - if sequence_number == 0: - # First time seeing the tag, so record we're at 0. - self._lowest_sequence[tag] = sequence_number - self._tag_sequences[tag] += 1 - self._count -= 1 - return sequence_number - finally: - self._condition.release() - - def release(self, tag, acquire_token): - sequence_number = acquire_token - logger.debug("Releasing acquire %s/%s", tag, sequence_number) - self._condition.acquire() - try: - if tag not in self._tag_sequences: - raise ValueError("Attempted to release unknown tag: %s" % tag) - max_sequence = self._tag_sequences[tag] - if self._lowest_sequence[tag] == sequence_number: - # We can immediately process this request and free up - # resources. - self._lowest_sequence[tag] += 1 - self._count += 1 - self._condition.notify() - queued = self._pending_release.get(tag, []) - while queued: - if self._lowest_sequence[tag] == queued[-1]: - queued.pop() - self._lowest_sequence[tag] += 1 - self._count += 1 - else: - break - elif self._lowest_sequence[tag] < sequence_number < max_sequence: - # We can't do anything right now because we're still waiting - # for the min sequence for the tag to be released. We have - # to queue this for pending release. + self._semaphore.release() + + +class SlidingWindowSemaphore(TaskSemaphore): + """A semaphore used to coordinate sequential resource access. + + This class is similar to the stdlib BoundedSemaphore: + + * It's initialized with a count. + * Each call to ``acquire()`` decrements the counter. + * If the count is at zero, then ``acquire()`` will either block until the + count increases, or if ``blocking=False``, then it will raise + a NoResourcesAvailable exception indicating that it failed to acquire the + semaphore. + + The main difference is that this semaphore is used to limit + access to a resource that requires sequential access. For example, + if I want to access resource R that has 20 subresources R_0 - R_19, + this semaphore can also enforce that you only have a max range of + 10 at any given point in time. You must also specify a tag name + when you acquire the semaphore. The sliding window semantics apply + on a per tag basis. The internal count will only be incremented + when the minimum sequence number for a tag is released. + + """ + + def __init__(self, count): + self._count = count + # Dict[tag, next_sequence_number]. + self._tag_sequences = defaultdict(int) + self._lowest_sequence = {} + self._lock = threading.Lock() + self._condition = threading.Condition(self._lock) + # Dict[tag, List[sequence_number]] + self._pending_release = {} + + def current_count(self): + with self._lock: + return self._count + + def acquire(self, tag, blocking=True): + logger.debug("Acquiring %s", tag) + self._condition.acquire() + try: + if self._count == 0: + if not blocking: + raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag) + else: + while self._count == 0: + self._condition.wait() + # self._count is no longer zero. + # First, check if this is the first time we're seeing this tag. + sequence_number = self._tag_sequences[tag] + if sequence_number == 0: + # First time seeing the tag, so record we're at 0. + self._lowest_sequence[tag] = sequence_number + self._tag_sequences[tag] += 1 + self._count -= 1 + return sequence_number + finally: + self._condition.release() + + def release(self, tag, acquire_token): + sequence_number = acquire_token + logger.debug("Releasing acquire %s/%s", tag, sequence_number) + self._condition.acquire() + try: + if tag not in self._tag_sequences: + raise ValueError("Attempted to release unknown tag: %s" % tag) + max_sequence = self._tag_sequences[tag] + if self._lowest_sequence[tag] == sequence_number: + # We can immediately process this request and free up + # resources. + self._lowest_sequence[tag] += 1 + self._count += 1 + self._condition.notify() + queued = self._pending_release.get(tag, []) + while queued: + if self._lowest_sequence[tag] == queued[-1]: + queued.pop() + self._lowest_sequence[tag] += 1 + self._count += 1 + else: + break + elif self._lowest_sequence[tag] < sequence_number < max_sequence: + # We can't do anything right now because we're still waiting + # for the min sequence for the tag to be released. We have + # to queue this for pending release. self._pending_release.setdefault(tag, []).append( sequence_number ) - self._pending_release[tag].sort(reverse=True) - else: - raise ValueError( - "Attempted to release unknown sequence number " + self._pending_release[tag].sort(reverse=True) + else: + raise ValueError( + "Attempted to release unknown sequence number " "%s for tag: %s" % (sequence_number, tag) ) - finally: - self._condition.release() - - + finally: + self._condition.release() + + class ChunksizeAdjuster: def __init__( self, @@ -747,56 +747,56 @@ class ChunksizeAdjuster: min_size=MIN_UPLOAD_CHUNKSIZE, max_parts=MAX_PARTS, ): - self.max_size = max_size - self.min_size = min_size - self.max_parts = max_parts - - def adjust_chunksize(self, current_chunksize, file_size=None): - """Get a chunksize close to current that fits within all S3 limits. - - :type current_chunksize: int - :param current_chunksize: The currently configured chunksize. - - :type file_size: int or None - :param file_size: The size of the file to upload. This might be None - if the object being transferred has an unknown size. - - :returns: A valid chunksize that fits within configured limits. - """ - chunksize = current_chunksize - if file_size is not None: - chunksize = self._adjust_for_max_parts(chunksize, file_size) - return self._adjust_for_chunksize_limits(chunksize) - - def _adjust_for_chunksize_limits(self, current_chunksize): - if current_chunksize > self.max_size: - logger.debug( - "Chunksize greater than maximum chunksize. " + self.max_size = max_size + self.min_size = min_size + self.max_parts = max_parts + + def adjust_chunksize(self, current_chunksize, file_size=None): + """Get a chunksize close to current that fits within all S3 limits. + + :type current_chunksize: int + :param current_chunksize: The currently configured chunksize. + + :type file_size: int or None + :param file_size: The size of the file to upload. This might be None + if the object being transferred has an unknown size. + + :returns: A valid chunksize that fits within configured limits. + """ + chunksize = current_chunksize + if file_size is not None: + chunksize = self._adjust_for_max_parts(chunksize, file_size) + return self._adjust_for_chunksize_limits(chunksize) + + def _adjust_for_chunksize_limits(self, current_chunksize): + if current_chunksize > self.max_size: + logger.debug( + "Chunksize greater than maximum chunksize. " "Setting to %s from %s." % (self.max_size, current_chunksize) ) - return self.max_size - elif current_chunksize < self.min_size: - logger.debug( - "Chunksize less than minimum chunksize. " + return self.max_size + elif current_chunksize < self.min_size: + logger.debug( + "Chunksize less than minimum chunksize. " "Setting to %s from %s." % (self.min_size, current_chunksize) ) - return self.min_size - else: - return current_chunksize - - def _adjust_for_max_parts(self, current_chunksize, file_size): - chunksize = current_chunksize - num_parts = int(math.ceil(file_size / float(chunksize))) - - while num_parts > self.max_parts: - chunksize *= 2 - num_parts = int(math.ceil(file_size / float(chunksize))) - - if chunksize != current_chunksize: - logger.debug( - "Chunksize would result in the number of parts exceeding the " + return self.min_size + else: + return current_chunksize + + def _adjust_for_max_parts(self, current_chunksize, file_size): + chunksize = current_chunksize + num_parts = int(math.ceil(file_size / float(chunksize))) + + while num_parts > self.max_parts: + chunksize *= 2 + num_parts = int(math.ceil(file_size / float(chunksize))) + + if chunksize != current_chunksize: + logger.debug( + "Chunksize would result in the number of parts exceeding the " "maximum. Setting to %s from %s." % (chunksize, current_chunksize) ) - - return chunksize + + return chunksize diff --git a/contrib/python/s3transfer/py3/ya.make b/contrib/python/s3transfer/py3/ya.make index 8759ec0761..964a630639 100644 --- a/contrib/python/s3transfer/py3/ya.make +++ b/contrib/python/s3transfer/py3/ya.make @@ -1,50 +1,50 @@ # Generated by devtools/yamaker (pypi). PY3_LIBRARY() - + OWNER(gebetix g:python-contrib) VERSION(0.5.1) - + LICENSE(Apache-2.0) - -PEERDIR( - contrib/python/botocore -) - + +PEERDIR( + contrib/python/botocore +) + NO_LINT() NO_CHECK_IMPORTS( s3transfer.crt ) -PY_SRCS( - TOP_LEVEL - s3transfer/__init__.py - s3transfer/bandwidth.py - s3transfer/compat.py +PY_SRCS( + TOP_LEVEL + s3transfer/__init__.py + s3transfer/bandwidth.py + s3transfer/compat.py s3transfer/constants.py s3transfer/copies.py s3transfer/crt.py - s3transfer/delete.py - s3transfer/download.py - s3transfer/exceptions.py - s3transfer/futures.py - s3transfer/manager.py + s3transfer/delete.py + s3transfer/download.py + s3transfer/exceptions.py + s3transfer/futures.py + s3transfer/manager.py s3transfer/processpool.py - s3transfer/subscribers.py - s3transfer/tasks.py - s3transfer/upload.py - s3transfer/utils.py -) - + s3transfer/subscribers.py + s3transfer/tasks.py + s3transfer/upload.py + s3transfer/utils.py +) + RESOURCE_FILES( PREFIX contrib/python/s3transfer/py3/ .dist-info/METADATA .dist-info/top_level.txt ) -END() +END() RECURSE_FOR_TESTS( tests diff --git a/contrib/python/s3transfer/ya.make b/contrib/python/s3transfer/ya.make index 657af5f9f8..bf4a234edb 100644 --- a/contrib/python/s3transfer/ya.make +++ b/contrib/python/s3transfer/ya.make @@ -1,18 +1,18 @@ PY23_LIBRARY() - + LICENSE(Service-Py23-Proxy) OWNER(g:python-contrib) - + IF (PYTHON2) PEERDIR(contrib/python/s3transfer/py2) ELSE() PEERDIR(contrib/python/s3transfer/py3) ENDIF() - -NO_LINT() - -END() + +NO_LINT() + +END() RECURSE( py2 diff --git a/contrib/python/ya.make b/contrib/python/ya.make index b44a2928e8..d01ced9f3a 100644 --- a/contrib/python/ya.make +++ b/contrib/python/ya.make @@ -960,7 +960,7 @@ RECURSE( ruamel.yaml Rx s3-tests - s3transfer + s3transfer sacrebleu salt-pepper saneyaml @@ -1022,7 +1022,7 @@ RECURSE( spintop-openhtf splunk-sdk-python sqlalchemy - SQLAlchemy-Continuum + SQLAlchemy-Continuum sqlalchemy-stubs SQLAlchemy-Utils sqlparse @@ -1086,7 +1086,7 @@ RECURSE( transitions transliterate trollius - trollsift + trollsift Twiggy twiggy-goodies Twisted |